A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
+ A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3655137DAB8300332B0C /* addtargets2.cpp */; };
+ A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3656137DAB8300332B0C /* alignchime.cpp */; };
+ A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3657137DAB8300332B0C /* alignchimel.cpp */; };
+ A74D368A137DAB8400332B0C /* alnparams.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365A137DAB8300332B0C /* alnparams.cpp */; };
+ A74D368B137DAB8400332B0C /* alpha.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365C137DAB8300332B0C /* alpha.cpp */; };
+ A74D368C137DAB8400332B0C /* alpha2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D365E137DAB8300332B0C /* alpha2.cpp */; };
+ A74D368D137DAB8400332B0C /* fractid.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3664137DAB8300332B0C /* fractid.cpp */; };
+ A74D368E137DAB8400332B0C /* getparents.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3665137DAB8300332B0C /* getparents.cpp */; };
+ A74D368F137DAB8400332B0C /* globalalign2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3666137DAB8300332B0C /* globalalign2.cpp */; };
+ A74D3690137DAB8400332B0C /* make3way.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366A137DAB8300332B0C /* make3way.cpp */; };
+ A74D3691137DAB8400332B0C /* mx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366B137DAB8300332B0C /* mx.cpp */; };
+ A74D3692137DAB8400332B0C /* myutils.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D366E137DAB8300332B0C /* myutils.cpp */; };
+ A74D3693137DAB8400332B0C /* path.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3672137DAB8300332B0C /* path.cpp */; };
+ A74D3694137DAB8400332B0C /* searchchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3674137DAB8300332B0C /* searchchime.cpp */; };
+ A74D3695137DAB8400332B0C /* seqdb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3676137DAB8300332B0C /* seqdb.cpp */; };
+ A74D3696137DAB8400332B0C /* setnucmx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3678137DAB8300332B0C /* setnucmx.cpp */; };
+ A74D3697137DAB8400332B0C /* sfasta.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3679137DAB8300332B0C /* sfasta.cpp */; };
+ A74D3698137DAB8400332B0C /* tracebackbit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D367F137DAB8300332B0C /* tracebackbit.cpp */; };
+ A74D3699137DAB8400332B0C /* uchime_main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3681137DAB8300332B0C /* uchime_main.cpp */; };
+ A74D369A137DAB8400332B0C /* usort.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3683137DAB8300332B0C /* usort.cpp */; };
+ A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3684137DAB8300332B0C /* viterbifast.cpp */; };
+ A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3686137DAB8300332B0C /* writechhit.cpp */; };
+ A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; };
A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; };
A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
A799F5B91309A3E000AEEFA0 /* makefastqcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */; };
A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergegroupscommand.cpp; sourceTree = "<group>"; };
A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removerarecommand.cpp; sourceTree = "<group>"; };
+ A74D3655137DAB8300332B0C /* addtargets2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = addtargets2.cpp; sourceTree = "<group>"; };
+ A74D3656137DAB8300332B0C /* alignchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchime.cpp; sourceTree = "<group>"; };
+ A74D3657137DAB8300332B0C /* alignchimel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchimel.cpp; sourceTree = "<group>"; };
+ A74D3658137DAB8300332B0C /* allocs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = allocs.h; sourceTree = "<group>"; };
+ A74D3659137DAB8300332B0C /* alnheuristics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alnheuristics.h; sourceTree = "<group>"; };
+ A74D365A137DAB8300332B0C /* alnparams.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alnparams.cpp; sourceTree = "<group>"; };
+ A74D365B137DAB8300332B0C /* alnparams.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alnparams.h; sourceTree = "<group>"; };
+ A74D365C137DAB8300332B0C /* alpha.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha.cpp; sourceTree = "<group>"; };
+ A74D365D137DAB8300332B0C /* alpha.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alpha.h; sourceTree = "<group>"; };
+ A74D365E137DAB8300332B0C /* alpha2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alpha2.cpp; sourceTree = "<group>"; };
+ A74D365F137DAB8300332B0C /* chainer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chainer.h; sourceTree = "<group>"; };
+ A74D3660137DAB8300332B0C /* chime.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chime.h; sourceTree = "<group>"; };
+ A74D3661137DAB8300332B0C /* diagbox.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = diagbox.h; sourceTree = "<group>"; };
+ A74D3662137DAB8300332B0C /* dp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dp.h; sourceTree = "<group>"; };
+ A74D3663137DAB8300332B0C /* evalue.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = evalue.h; sourceTree = "<group>"; };
+ A74D3664137DAB8300332B0C /* fractid.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fractid.cpp; sourceTree = "<group>"; };
+ A74D3665137DAB8300332B0C /* getparents.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getparents.cpp; sourceTree = "<group>"; };
+ A74D3666137DAB8300332B0C /* globalalign2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = globalalign2.cpp; sourceTree = "<group>"; };
+ A74D3667137DAB8300332B0C /* help.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = help.h; sourceTree = "<group>"; };
+ A74D3668137DAB8300332B0C /* hsp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hsp.h; sourceTree = "<group>"; };
+ A74D3669137DAB8300332B0C /* hspfinder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hspfinder.h; sourceTree = "<group>"; };
+ A74D366A137DAB8300332B0C /* make3way.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = make3way.cpp; sourceTree = "<group>"; };
+ A74D366B137DAB8300332B0C /* mx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mx.cpp; sourceTree = "<group>"; };
+ A74D366C137DAB8300332B0C /* mx.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mx.h; sourceTree = "<group>"; };
+ A74D366D137DAB8300332B0C /* myopts.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myopts.h; sourceTree = "<group>"; };
+ A74D366E137DAB8300332B0C /* myutils.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = myutils.cpp; sourceTree = "<group>"; };
+ A74D366F137DAB8300332B0C /* myutils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = myutils.h; sourceTree = "<group>"; };
+ A74D3670137DAB8300332B0C /* orf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = orf.h; sourceTree = "<group>"; };
+ A74D3671137DAB8300332B0C /* out.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = out.h; sourceTree = "<group>"; };
+ A74D3672137DAB8300332B0C /* path.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = path.cpp; sourceTree = "<group>"; };
+ A74D3673137DAB8300332B0C /* path.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = path.h; sourceTree = "<group>"; };
+ A74D3674137DAB8300332B0C /* searchchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = searchchime.cpp; sourceTree = "<group>"; };
+ A74D3675137DAB8300332B0C /* seq.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seq.h; sourceTree = "<group>"; };
+ A74D3676137DAB8300332B0C /* seqdb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = seqdb.cpp; sourceTree = "<group>"; };
+ A74D3677137DAB8300332B0C /* seqdb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = seqdb.h; sourceTree = "<group>"; };
+ A74D3678137DAB8300332B0C /* setnucmx.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = setnucmx.cpp; sourceTree = "<group>"; };
+ A74D3679137DAB8300332B0C /* sfasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sfasta.cpp; sourceTree = "<group>"; };
+ A74D367A137DAB8300332B0C /* sfasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sfasta.h; sourceTree = "<group>"; };
+ A74D367B137DAB8300332B0C /* svnmods.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = svnmods.h; sourceTree = "<group>"; };
+ A74D367C137DAB8300332B0C /* svnversion.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = svnversion.h; sourceTree = "<group>"; };
+ A74D367D137DAB8300332B0C /* timers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timers.h; sourceTree = "<group>"; };
+ A74D367E137DAB8300332B0C /* timing.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = timing.h; sourceTree = "<group>"; };
+ A74D367F137DAB8300332B0C /* tracebackbit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tracebackbit.cpp; sourceTree = "<group>"; };
+ A74D3680137DAB8300332B0C /* uc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = uc.h; sourceTree = "<group>"; };
+ A74D3681137DAB8300332B0C /* uchime_main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = uchime_main.cpp; sourceTree = "<group>"; };
+ A74D3682137DAB8300332B0C /* ultra.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ultra.h; sourceTree = "<group>"; };
+ A74D3683137DAB8300332B0C /* usort.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = usort.cpp; sourceTree = "<group>"; };
+ A74D3684137DAB8300332B0C /* viterbifast.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = viterbifast.cpp; sourceTree = "<group>"; };
+ A74D3685137DAB8300332B0C /* windex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = windex.h; sourceTree = "<group>"; };
+ A74D3686137DAB8300332B0C /* writechhit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = writechhit.cpp; sourceTree = "<group>"; };
+ A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = "<group>"; };
+ A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = "<group>"; };
A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = "<group>"; };
A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = "<group>"; };
A778FE69134CA6CA00C0BA33 /* getcommandinfocommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcommandinfocommand.h; sourceTree = "<group>"; };
name = Products;
sourceTree = "<group>";
};
+ A74D3644137DA7CE00332B0C /* uchime */ = {
+ isa = PBXGroup;
+ children = (
+ A74D3655137DAB8300332B0C /* addtargets2.cpp */,
+ A74D3656137DAB8300332B0C /* alignchime.cpp */,
+ A74D3657137DAB8300332B0C /* alignchimel.cpp */,
+ A74D3658137DAB8300332B0C /* allocs.h */,
+ A74D3659137DAB8300332B0C /* alnheuristics.h */,
+ A74D365A137DAB8300332B0C /* alnparams.cpp */,
+ A74D365B137DAB8300332B0C /* alnparams.h */,
+ A74D365C137DAB8300332B0C /* alpha.cpp */,
+ A74D365D137DAB8300332B0C /* alpha.h */,
+ A74D365E137DAB8300332B0C /* alpha2.cpp */,
+ A74D365F137DAB8300332B0C /* chainer.h */,
+ A74D3660137DAB8300332B0C /* chime.h */,
+ A74D3661137DAB8300332B0C /* diagbox.h */,
+ A74D3662137DAB8300332B0C /* dp.h */,
+ A74D3663137DAB8300332B0C /* evalue.h */,
+ A74D3664137DAB8300332B0C /* fractid.cpp */,
+ A74D3665137DAB8300332B0C /* getparents.cpp */,
+ A74D3666137DAB8300332B0C /* globalalign2.cpp */,
+ A74D3667137DAB8300332B0C /* help.h */,
+ A74D3668137DAB8300332B0C /* hsp.h */,
+ A74D3669137DAB8300332B0C /* hspfinder.h */,
+ A74D366A137DAB8300332B0C /* make3way.cpp */,
+ A74D366B137DAB8300332B0C /* mx.cpp */,
+ A74D366C137DAB8300332B0C /* mx.h */,
+ A74D366D137DAB8300332B0C /* myopts.h */,
+ A74D366E137DAB8300332B0C /* myutils.cpp */,
+ A74D366F137DAB8300332B0C /* myutils.h */,
+ A74D3670137DAB8300332B0C /* orf.h */,
+ A74D3671137DAB8300332B0C /* out.h */,
+ A74D3672137DAB8300332B0C /* path.cpp */,
+ A74D3673137DAB8300332B0C /* path.h */,
+ A74D3674137DAB8300332B0C /* searchchime.cpp */,
+ A74D3675137DAB8300332B0C /* seq.h */,
+ A74D3676137DAB8300332B0C /* seqdb.cpp */,
+ A74D3677137DAB8300332B0C /* seqdb.h */,
+ A74D3678137DAB8300332B0C /* setnucmx.cpp */,
+ A74D3679137DAB8300332B0C /* sfasta.cpp */,
+ A74D367A137DAB8300332B0C /* sfasta.h */,
+ A74D367B137DAB8300332B0C /* svnmods.h */,
+ A74D367C137DAB8300332B0C /* svnversion.h */,
+ A74D367D137DAB8300332B0C /* timers.h */,
+ A74D367E137DAB8300332B0C /* timing.h */,
+ A74D367F137DAB8300332B0C /* tracebackbit.cpp */,
+ A74D3680137DAB8300332B0C /* uc.h */,
+ A74D3681137DAB8300332B0C /* uchime_main.cpp */,
+ A74D3682137DAB8300332B0C /* ultra.h */,
+ A74D3683137DAB8300332B0C /* usort.cpp */,
+ A74D3684137DAB8300332B0C /* viterbifast.cpp */,
+ A74D3685137DAB8300332B0C /* windex.h */,
+ A74D3686137DAB8300332B0C /* writechhit.cpp */,
+ );
+ name = uchime;
+ sourceTree = "<group>";
+ };
A7E9BA3812D3956100DA6239 /* commands */ = {
isa = PBXGroup;
children = (
A7E9B68212D37EC400DA6239 /* chimerapintailcommand.cpp */,
A7E9B68B12D37EC400DA6239 /* chimeraslayercommand.h */,
A7E9B68A12D37EC400DA6239 /* chimeraslayercommand.cpp */,
+ A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */,
+ A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */,
A7E9B68D12D37EC400DA6239 /* chopseqscommand.h */,
A7E9B68C12D37EC400DA6239 /* chopseqscommand.cpp */,
A7E9B69112D37EC400DA6239 /* classifyotucommand.h */,
A7E9BA4512D3965600DA6239 /* chimera */ = {
isa = PBXGroup;
children = (
+ A74D3644137DA7CE00332B0C /* uchime */,
A7E9B65C12D37EC300DA6239 /* bellerophon.cpp */,
A7E9B65D12D37EC300DA6239 /* bellerophon.h */,
A7E9B67412D37EC400DA6239 /* ccode.cpp */,
A7FE7C401330EA1000F7B327 /* getcurrentcommand.cpp in Sources */,
A7FE7E6D13311EA400F7B327 /* setcurrentcommand.cpp in Sources */,
A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */,
+ A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */,
+ A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */,
+ A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */,
+ A74D368A137DAB8400332B0C /* alnparams.cpp in Sources */,
+ A74D368B137DAB8400332B0C /* alpha.cpp in Sources */,
+ A74D368C137DAB8400332B0C /* alpha2.cpp in Sources */,
+ A74D368D137DAB8400332B0C /* fractid.cpp in Sources */,
+ A74D368E137DAB8400332B0C /* getparents.cpp in Sources */,
+ A74D368F137DAB8400332B0C /* globalalign2.cpp in Sources */,
+ A74D3690137DAB8400332B0C /* make3way.cpp in Sources */,
+ A74D3691137DAB8400332B0C /* mx.cpp in Sources */,
+ A74D3692137DAB8400332B0C /* myutils.cpp in Sources */,
+ A74D3693137DAB8400332B0C /* path.cpp in Sources */,
+ A74D3694137DAB8400332B0C /* searchchime.cpp in Sources */,
+ A74D3695137DAB8400332B0C /* seqdb.cpp in Sources */,
+ A74D3696137DAB8400332B0C /* setnucmx.cpp in Sources */,
+ A74D3697137DAB8400332B0C /* sfasta.cpp in Sources */,
+ A74D3698137DAB8400332B0C /* tracebackbit.cpp in Sources */,
+ A74D3699137DAB8400332B0C /* uchime_main.cpp in Sources */,
+ A74D369A137DAB8400332B0C /* usort.cpp in Sources */,
+ A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */,
+ A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */,
+ A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
DEPLOYMENT_LOCATION = NO;
GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_ENABLE_SSE3_EXTENSIONS = NO;
+ GCC_ENABLE_SSE41_EXTENSIONS = NO;
+ GCC_ENABLE_SSE42_EXTENSIONS = NO;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"../release\\\"\"",
--- /dev/null
+//#if UCHIMES\r
+\r
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "ultra.h"\r
+#include <set>\r
+\r
+const float MAX_WORD_COUNT_DROP = 1;\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path);\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path);\r
+void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts,\r
+ vector<unsigned> &Order);\r
+\r
+void AddTargets(SeqDB &DB, const SeqData &Query, set<unsigned> &TargetIndexes)\r
+ {\r
+ const unsigned SeqCount = DB.GetSeqCount();\r
+ if (SeqCount == 0)\r
+ return;\r
+\r
+ vector<float> WordCounts;\r
+ vector<unsigned> Order;\r
+ USort(Query, DB, WordCounts, Order);\r
+ asserta(SIZE(Order) == SeqCount);\r
+ unsigned TopSeqIndex = Order[0];\r
+ float TopWordCount = WordCounts[TopSeqIndex];\r
+ for (unsigned i = 0; i < SeqCount; ++i)\r
+ {\r
+ unsigned SeqIndex = Order[i];\r
+ float WordCount = WordCounts[SeqIndex];\r
+ if (TopWordCount - WordCount > MAX_WORD_COUNT_DROP)\r
+ return;\r
+ TargetIndexes.insert(SeqIndex);\r
+ }\r
+ }\r
+\r
+//#endif\r
--- /dev/null
+#include "myutils.h"\r
+#include "seq.h"\r
+#include "chime.h"\r
+#include "dp.h"\r
+\r
+#define TRACE 0\r
+#define TRACE_BS 0\r
+\r
+void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
+ const string &PathQA, const string &PathQB,\r
+ string &Q3, string &A3, string &B3);\r
+\r
+void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
+ const string &QLabel, const string &ALabel, const string &BLabel,\r
+ ChimeHit2 &Hit);\r
+\r
+double GetScore2(double Y, double N, double A)\r
+ {\r
+ return Y/(opt_xn*(N + opt_dn) + opt_xa*A);\r
+ }\r
+\r
+void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
+ const string &QLabel, const string &ALabel, const string &BLabel,\r
+ ChimeHit2 &Hit)\r
+ {\r
+ Hit.Clear();\r
+ Hit.QLabel = QLabel;\r
+\r
+ const byte *Q3Seq = (const byte *) Q3.c_str();\r
+ const byte *A3Seq = (const byte *) A3.c_str();\r
+ const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+ const unsigned ColCount = SIZE(Q3);\r
+ asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+#if TRACE\r
+ Log("Q %5u %*.*s\n", ColCount, ColCount, ColCount, Q3Seq);\r
+ Log("A %5u %*.*s\n", ColCount, ColCount, ColCount, A3Seq);\r
+ Log("B %5u %*.*s\n", ColCount, ColCount, ColCount, B3Seq);\r
+#endif\r
+\r
+// Discard terminal gaps\r
+ unsigned ColLo = UINT_MAX;\r
+ unsigned ColHi = UINT_MAX;\r
+ for (unsigned Col = 2; Col + 2 < ColCount; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (isacgt(q) && isacgt(a) && isacgt(b))\r
+ {\r
+ if (ColLo == UINT_MAX)\r
+ ColLo = Col;\r
+ ColHi = Col;\r
+ }\r
+ }\r
+\r
+ if (ColLo == UINT_MAX)\r
+ return;\r
+\r
+ unsigned QPos = 0;\r
+ unsigned APos = 0;\r
+ unsigned BPos = 0;\r
+ unsigned DiffCount = 0;\r
+\r
+ vector<unsigned> ColToQPos(ColLo, UINT_MAX);\r
+ vector<unsigned> AccumCount(ColLo, UINT_MAX);\r
+ vector<unsigned> AccumSameA(ColLo, UINT_MAX);\r
+ vector<unsigned> AccumSameB(ColLo, UINT_MAX);\r
+ vector<unsigned> AccumForA(ColLo, UINT_MAX);\r
+ vector<unsigned> AccumForB(ColLo, UINT_MAX);\r
+ vector<unsigned> AccumAbstain(ColLo, UINT_MAX);\r
+ vector<unsigned> AccumAgainst(ColLo, UINT_MAX);\r
+\r
+ unsigned SumSameA = 0;\r
+ unsigned SumSameB = 0;\r
+ unsigned SumSameAB = 0;\r
+ unsigned Sum = 0;\r
+ unsigned SumForA = 0;\r
+ unsigned SumForB = 0;\r
+ unsigned SumAbstain = 0;\r
+ unsigned SumAgainst = 0;\r
+ for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (isacgt(q) && isacgt(a) && isacgt(b))\r
+ {\r
+ if (q == a)\r
+ ++SumSameA;\r
+ if (q == b)\r
+ ++SumSameB;\r
+ if (a == b)\r
+ ++SumSameAB;\r
+ if (q == a && q != b)\r
+ ++SumForA;\r
+ if (q == b && q != a)\r
+ ++SumForB;\r
+ if (a == b && q != a)\r
+ ++SumAgainst;\r
+ if (q != a && q != b)\r
+ ++SumAbstain;\r
+ ++Sum;\r
+ }\r
+\r
+ ColToQPos.push_back(QPos);\r
+ AccumSameA.push_back(SumSameA);\r
+ AccumSameB.push_back(SumSameB);\r
+ AccumCount.push_back(Sum);\r
+ AccumForA.push_back(SumForA);\r
+ AccumForB.push_back(SumForB);\r
+ AccumAbstain.push_back(SumAbstain);\r
+ AccumAgainst.push_back(SumAgainst);\r
+\r
+ if (q != '-')\r
+ ++QPos;\r
+ if (a != '-')\r
+ ++APos;\r
+ if (b != '-')\r
+ ++BPos;\r
+ }\r
+\r
+ asserta(SIZE(ColToQPos) == ColHi+1);\r
+ asserta(SIZE(AccumSameA) == ColHi+1);\r
+ asserta(SIZE(AccumSameB) == ColHi+1);\r
+ asserta(SIZE(AccumAbstain) == ColHi+1);\r
+ asserta(SIZE(AccumAgainst) == ColHi+1);\r
+\r
+ double IdQA = double(SumSameA)/Sum;\r
+ double IdQB = double(SumSameB)/Sum;\r
+ double IdAB = double(SumSameAB)/Sum;\r
+ double MaxId = max(IdQA, IdQB);\r
+\r
+#if TRACE\r
+ Log("IdQA=%.1f%% IdQB=%.1f%% IdAB=%.1f\n", IdQA*100.0, IdQB*100.0, IdAB*100.0);\r
+ Log("\n");\r
+ Log(" x AQB IdAL IdBL IdAR IdBR DivAB DivBA YAL YBL YAR YBR AbL AbR ScoreAB ScoreAB XLo Xhi\n");\r
+ Log("----- --- ----- ----- ----- ----- ------ ------ ----- ----- ----- ----- ----- ----- ------- ------- ----- -----\n");\r
+#endif\r
+ unsigned BestXLo = UINT_MAX;\r
+ unsigned BestXHi = UINT_MAX;\r
+ double BestDiv = 0.0;\r
+ double BestIdQM = 0.0;\r
+ double BestScore = 0.0;\r
+\r
+// Find range of cols BestXLo..BestXHi that maximizes score\r
+ bool FirstA = false;\r
+\r
+// NOTE: Must be < ColHi not <= because use Col+1 below\r
+ for (unsigned Col = ColLo; Col < ColHi; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ unsigned SameAL = AccumSameA[Col];\r
+ unsigned SameBL = AccumSameB[Col];\r
+ unsigned SameAR = SumSameA - AccumSameA[Col];\r
+ unsigned SameBR = SumSameB - AccumSameB[Col];\r
+\r
+ double IdAB = double(SameAL + SameBR)/Sum;\r
+ double IdBA = double(SameBL + SameAR)/Sum;\r
+\r
+ unsigned ForAL = AccumForA[Col];\r
+ unsigned ForBL = AccumForB[Col];\r
+ unsigned ForAR = SumForA - AccumForA[Col+1];\r
+ unsigned ForBR = SumForB - AccumForB[Col+1];\r
+ unsigned AbL = AccumAbstain[Col];\r
+ unsigned AbR = SumAbstain - AccumAbstain[Col+1];\r
+\r
+ double ScoreAB = GetScore2(ForAL, ForBL, AbL)*GetScore2(ForBR, ForAR, AbR);\r
+ double ScoreBA = GetScore2(ForBL, ForAL, AbL)*GetScore2(ForAR, ForBR, AbR);\r
+ \r
+ double DivAB = IdAB/MaxId;\r
+ double DivBA = IdBA/MaxId;\r
+ double MaxDiv = max(DivAB, DivBA);\r
+\r
+ //if (MaxDiv > BestDiv)\r
+ // {\r
+ // BestDiv = MaxDiv;\r
+ // BestXLo = Col;\r
+ // BestXHi = Col;\r
+ // FirstA = (DivAB > DivBA);\r
+ // if (FirstA)\r
+ // BestIdQM = IdAB;\r
+ // else\r
+ // BestIdQM = IdBA;\r
+ // }\r
+ //else if (MaxDiv == BestDiv)\r
+ // BestXHi = Col;\r
+\r
+ double MaxScore = max(ScoreAB, ScoreBA);\r
+ if (MaxScore > BestScore)\r
+ {\r
+ BestScore = MaxScore;\r
+ BestXLo = Col;\r
+ BestXHi = Col;\r
+ FirstA = (ScoreAB > ScoreBA);\r
+ if (FirstA)\r
+ BestIdQM = IdAB;\r
+ else\r
+ BestIdQM = IdBA;\r
+ if (MaxDiv > BestDiv)\r
+ BestDiv = MaxDiv;\r
+ }\r
+ else if (MaxScore == BestScore)\r
+ {\r
+ BestXHi = Col;\r
+ if (MaxDiv > BestDiv)\r
+ BestDiv = MaxDiv;\r
+ }\r
+\r
+#if TRACE\r
+ {\r
+ Log("%5u", Col);\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ Log(" %c%c%c", a, q, b);\r
+ Log(" %5u", SameAL);\r
+ Log(" %5u", SameBL);\r
+ Log(" %5u", SameAR);\r
+ Log(" %5u", SameBR);\r
+ Log(" %5.4f", DivAB);\r
+ Log(" %5.4f", DivBA);\r
+ Log(" %5u", ForAL);\r
+ Log(" %5u", ForBL);\r
+ Log(" %5u", ForAR);\r
+ Log(" %5u", ForBR);\r
+ Log(" %5u", AbL);\r
+ Log(" %5u", AbR);\r
+ Log(" %7.4f", ScoreAB);\r
+ Log(" %7.4f", ScoreBA);\r
+ if (BestXLo != UINT_MAX)\r
+ Log(" %5u", BestXLo);\r
+ if (BestXHi != UINT_MAX)\r
+ Log(" %5u", BestXHi);\r
+ Log("\n");\r
+ }\r
+#endif\r
+ }\r
+\r
+ if (BestXLo == UINT_MAX)\r
+ {\r
+#if TRACE\r
+ Log("\n");\r
+ Log("No crossover found.\n");\r
+#endif\r
+ return;\r
+ }\r
+#if TRACE\r
+ Log("BestX col %u - %u\n", BestXLo, BestXHi);\r
+#endif\r
+\r
+// Find maximum region of identity within BestXLo..BestXHi\r
+ unsigned ColXLo = (BestXLo + BestXHi)/2;\r
+ unsigned ColXHi = ColXLo;\r
+ unsigned SegLo = UINT_MAX;\r
+ unsigned SegHi = UINT_MAX;\r
+ for (unsigned Col = BestXLo; Col <= BestXHi; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (q == a && q == b)\r
+ {\r
+ if (SegLo == UINT_MAX)\r
+ SegLo = Col;\r
+ SegHi = Col;\r
+ }\r
+ else\r
+ {\r
+ unsigned SegLength = SegHi - SegLo + 1;\r
+ unsigned BestSegLength = ColXHi - ColXLo + 1;\r
+ if (SegLength > BestSegLength)\r
+ {\r
+ ColXLo = SegLo;\r
+ ColXHi = SegHi;\r
+ }\r
+ SegLo = UINT_MAX;\r
+ SegHi = UINT_MAX;\r
+ }\r
+ }\r
+ unsigned SegLength = SegHi - SegLo + 1;\r
+ unsigned BestSegLength = ColXHi - ColXLo + 1;\r
+ if (SegLength > BestSegLength)\r
+ {\r
+ ColXLo = SegLo;\r
+ ColXHi = SegHi;\r
+ }\r
+\r
+ QPos = 0;\r
+ for (unsigned x = 0; x < ColCount; ++x)\r
+ {\r
+ if (x == ColXLo)\r
+ Hit.QXLo = QPos;\r
+ else if (x == ColXHi)\r
+ {\r
+ Hit.QXHi = QPos;\r
+ break;\r
+ }\r
+ char q = Q3Seq[x];\r
+ if (q != '-')\r
+ ++QPos;\r
+ }\r
+\r
+ Hit.ColXLo = ColXLo;\r
+ Hit.ColXHi = ColXHi;\r
+\r
+ //if (FirstA)\r
+ // {\r
+ // Hit.LY = AccumForA[ColXLo];\r
+ // Hit.LN = AccumForB[ColXLo];\r
+\r
+ // Hit.RY = SumForB - AccumForB[ColXHi];\r
+ // Hit.RN = SumForA - AccumForA[ColXHi];\r
+ // }\r
+ //else\r
+ // {\r
+ // Hit.LY = AccumForB[ColXLo];\r
+ // Hit.LN = AccumForA[ColXLo];\r
+ // Hit.RY = SumForA - AccumForA[ColXHi];\r
+ // Hit.RN = SumForB - AccumForB[ColXHi];\r
+ // }\r
+\r
+ //Hit.LA = AccumAgainst[ColXLo];\r
+ //Hit.LD = AccumAbstain[ColXLo];\r
+\r
+ //Hit.RA = SumAgainst - AccumAgainst[ColXHi];\r
+ //Hit.RD = SumAbstain - AccumAbstain[ColXHi];\r
+\r
+ Hit.PctIdAB = IdAB*100.0;\r
+ Hit.PctIdQM = BestIdQM*100.0;\r
+\r
+ Hit.Div = (BestDiv - 1.0)*100.0;\r
+\r
+ //Hit.QSD = QSD;\r
+ Hit.Q3 = Q3;\r
+ Hit.QLabel = QLabel;\r
+ if (FirstA)\r
+ {\r
+ //Hit.ASD = ASD;\r
+ //Hit.BSD = BSD;\r
+ //Hit.PathQA = PathQA;\r
+ //Hit.PathQB = PathQB;\r
+ Hit.A3 = A3;\r
+ Hit.B3 = B3;\r
+ Hit.ALabel = ALabel;\r
+ Hit.BLabel = BLabel;\r
+ Hit.PctIdQA = IdQA*100.0;\r
+ Hit.PctIdQB = IdQB*100.0;\r
+ }\r
+ else\r
+ {\r
+ Hit.A3 = B3;\r
+ Hit.B3 = A3;\r
+ Hit.ALabel = BLabel;\r
+ Hit.BLabel = ALabel;\r
+ Hit.PctIdQA = IdQB*100.0;\r
+ Hit.PctIdQB = IdQA*100.0;\r
+ }\r
+\r
+// CS SNPs\r
+ Hit.CS_LY = 0;\r
+ Hit.CS_LN = 0;\r
+ Hit.CS_RY = 0;\r
+ Hit.CS_RN = 0;\r
+ Hit.CS_LA = 0;\r
+ Hit.CS_RA = 0;\r
+\r
+ //vector<float> Cons;\r
+ //for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ // {\r
+ // char q = Q3Seq[Col];\r
+ // char a = A3Seq[Col];\r
+ // char b = B3Seq[Col];\r
+ // if (q == a && q == b && a == b)\r
+ // {\r
+ // Cons.push_back(1.0f);\r
+ // continue;\r
+ // }\r
+\r
+ // bool gapq = isgap(q);\r
+ // bool gapa = isgap(a);\r
+ // bool gapb = isgap(b);\r
+\r
+ // if (!gapq && !gapa && !gapb)\r
+ // {\r
+ // if (q == a || q == b || a == b)\r
+ // Cons.push_back(0.75);\r
+ // else\r
+ // Cons.push_back(0.5);\r
+ // }\r
+ // else\r
+ // {\r
+ // if (!gapa && (a == b || a == q))\r
+ // Cons.push_back(0.5f);\r
+ // else if (!gapb && b == q)\r
+ // Cons.push_back(0.5f);\r
+ // else\r
+ // Cons.push_back(0.0f);\r
+ // }\r
+ // }\r
+\r
+ //float fLY = 0.0f;\r
+ //float fLN = 0.0f;\r
+ //float fLA = 0.0f;\r
+ //float fRY = 0.0f;\r
+ //float fRN = 0.0f;\r
+ //float fRA = 0.0f;\r
+ for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ if (q == a && q == b && a == b)\r
+ continue;\r
+\r
+ unsigned ngaps = 0;\r
+ if (isgap(q))\r
+ ++ngaps;\r
+ if (isgap(a))\r
+ ++ngaps;\r
+ if (isgap(b))\r
+ ++ngaps;\r
+\r
+ if (opt_skipgaps)\r
+ {\r
+ if (ngaps == 3)\r
+ continue;\r
+ }\r
+ else\r
+ {\r
+ if (ngaps == 2)\r
+ continue;\r
+ }\r
+\r
+ if (!FirstA)\r
+ swap(a, b);\r
+\r
+ //float AvgCons = (Cons[Col-2] + Cons[Col-1] + Cons[Col+1] + Cons[Col+2])/4;\r
+ //if (Col < ColXLo)\r
+ // {\r
+ // if (q == a && q != b)\r
+ // fLY += AvgCons;\r
+ // else if (q == b && q != a)\r
+ // fLN += AvgCons;\r
+ // else\r
+ // fLA += AvgCons;\r
+ // }\r
+ //else if (Col > ColXHi)\r
+ // {\r
+ // if (q == b && q != a)\r
+ // fRY += AvgCons;\r
+ // else if (q == a && q != b)\r
+ // fRN += AvgCons;\r
+ // else\r
+ // fRA += AvgCons;\r
+ // }\r
+\r
+ if (opt_skipgaps2)\r
+ {\r
+ if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+ continue;\r
+ if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+ continue;\r
+ }\r
+\r
+ //if (Col > 0 && isgap(Q3Seq[Col-1]))\r
+ //continue;\r
+ //if (Col + 1 < ColCount && isgap(Q3Seq[Col+1]))\r
+ // continue;\r
+\r
+ if (Col < ColXLo)\r
+ {\r
+ if (q == a && q != b)\r
+ ++Hit.CS_LY;\r
+ else if (q == b && q != a)\r
+ ++Hit.CS_LN;\r
+ else\r
+ ++Hit.CS_LA;\r
+ }\r
+ else if (Col > ColXHi)\r
+ {\r
+ if (q == b && q != a)\r
+ ++Hit.CS_RY;\r
+ else if (q == a && q != b)\r
+ ++Hit.CS_RN;\r
+ else\r
+ ++Hit.CS_RA;\r
+ }\r
+ }\r
+\r
+ double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
+ double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
+ Hit.Score = ScoreL*ScoreR;\r
+\r
+ extern bool g_UchimeDeNovo;\r
+\r
+ //if (0)//g_UchimeDeNovo)\r
+ // {\r
+ // double AbQ = GetAbFromLabel(QLabel.c_str());\r
+ // double AbA = GetAbFromLabel(ALabel.c_str());\r
+ // double AbB = GetAbFromLabel(BLabel.c_str());\r
+ // if (AbQ > 0.0 && AbA > 0.0 && AbB > 0.0)\r
+ // {\r
+ // double MinAb = min(AbA, AbB);\r
+ // double Ratio = MinAb/AbQ;\r
+ // double t = Ratio - opt_abx;\r
+ // // double Factor = 2.0/(1.0 + exp(-t));\r
+ // double Factor = min(Ratio, opt_abx)/opt_abx;\r
+ // if (opt_verbose)\r
+ // Log("Score %.4f Ab factor %.4f >%s\n", Hit.Score, Factor, QLabel.c_str());\r
+ // Hit.Score *= Factor;\r
+ // }\r
+ // }\r
+\r
+ extern FILE *g_fUChimeAlns;\r
+ if (g_fUChimeAlns != 0 && Hit.Div > 0.0)\r
+ {\r
+ void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit);\r
+ WriteChimeHitX(g_fUChimeAlns, Hit);\r
+ }\r
+ }\r
+\r
+void AlignChime3(const string &Q3, const string &A3, const string &B3,\r
+ const string &QLabel, const string &ALabel, const string &BLabel,\r
+ ChimeHit2 &Hit)\r
+ {\r
+ if (opt_ucl)\r
+ AlignChimeLocal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);\r
+ else\r
+ AlignChimeGlobal3(Q3, A3, B3, QLabel, ALabel, BLabel, Hit);\r
+ }\r
+\r
+static void StripGaps(const byte *Seq, unsigned L, string &s)\r
+ {\r
+ s.clear();\r
+ for (unsigned i = 0; i < L; ++i)\r
+ {\r
+ char c = Seq[i];\r
+ if (!isgap(c))\r
+ s.push_back(c);\r
+ }\r
+ }\r
+\r
+static void StripGapsAlloc(const SeqData &SDIn, SeqData &SDOut)\r
+ {\r
+ SDOut = SDIn;\r
+ byte *s = myalloc(byte, SDIn.L);\r
+ unsigned k = 0;\r
+ for (unsigned i = 0; i < SDIn.L; ++i)\r
+ {\r
+ char c = SDIn.Seq[i];\r
+ if (!isgap(c))\r
+ s[k++] = toupper(c);\r
+ }\r
+ SDOut.Seq = s;\r
+ SDOut.L = k;\r
+ }\r
+\r
+void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+ const string &PathQA, const string &PathQB, ChimeHit2 &Hit)\r
+ {\r
+ //if (opt_ucl)\r
+ // {\r
+ // AlignChimeLocal(QSD, ASD, BSD, PathQA, PathQB, Hit);\r
+ // return;\r
+ // }\r
+\r
+ string Q3;\r
+ string A3;\r
+ string B3;\r
+ Make3Way(QSD, ASD, BSD, PathQA, PathQB, Q3, A3, B3);\r
+\r
+ AlignChime3(Q3, A3, B3, QSD.Label, ASD.Label, BSD.Label, Hit);\r
+ }\r
+\r
+void AlignChime3SDRealign(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,\r
+ ChimeHit2 &Hit)\r
+ {\r
+ SeqData QSD;\r
+ SeqData ASD;\r
+ SeqData BSD;\r
+ StripGapsAlloc(QSD3, QSD);\r
+ StripGapsAlloc(ASD3, ASD);\r
+ StripGapsAlloc(BSD3, BSD);\r
+\r
+ string PathQA;\r
+ string PathQB;\r
+ bool FoundQA = GlobalAlign(QSD, ASD, PathQA);\r
+ bool FoundQB = GlobalAlign(QSD, BSD, PathQB);\r
+ if (!FoundQA || !FoundQB)\r
+ {\r
+ Hit.Clear();\r
+ Hit.QLabel = QSD3.Label;\r
+ return;\r
+ }\r
+\r
+ AlignChime(QSD, ASD, BSD, PathQA, PathQB, Hit);\r
+\r
+ myfree((void *) QSD.Seq);\r
+ myfree((void *) ASD.Seq);\r
+ myfree((void *) BSD.Seq);\r
+ }\r
+\r
+void AlignChime3SD(const SeqData &QSD3, const SeqData &ASD3, const SeqData &BSD3,\r
+ ChimeHit2 &Hit)\r
+ {\r
+ if (opt_realign)\r
+ {\r
+ AlignChime3SDRealign(QSD3, ASD3, BSD3, Hit);\r
+ return;\r
+ }\r
+\r
+ string Q3;\r
+ string A3;\r
+ string B3;\r
+\r
+ const unsigned ColCount = QSD3.L;\r
+ asserta(ASD3.L == ColCount && BSD3.L == ColCount);\r
+\r
+ Q3.reserve(ColCount);\r
+ A3.reserve(ColCount);\r
+ B3.reserve(ColCount);\r
+\r
+ const byte *QS = QSD3.Seq;\r
+ const byte *AS = ASD3.Seq;\r
+ const byte *BS = BSD3.Seq;\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ byte q = toupper(QS[Col]);\r
+ byte a = toupper(AS[Col]);\r
+ byte b = toupper(BS[Col]);\r
+\r
+ if (isgap(q) && isgap(a) && isgap(b))\r
+ continue;\r
+\r
+ Q3.push_back(q);\r
+ A3.push_back(a);\r
+ B3.push_back(b);\r
+ }\r
+\r
+ AlignChime3(Q3, A3, B3, QSD3.Label, ASD3.Label, BSD3.Label, Hit);\r
+ }\r
--- /dev/null
+#include "myutils.h"\r
+#include "seq.h"\r
+#include "chime.h"\r
+\r
+#define TRACE 0\r
+\r
+/***\r
+Let:\r
+ S[i] = Score of col i: 0=no SNP, +1 = Y, -3 = N or A.\r
+\r
+ V[k] = Best segment score from j, j+1 .. k for all possible j\r
+ max(j) Sum i=j..k S[i]\r
+\r
+Recursion relation:\r
+ V[k] = S[k] + max (V[k-1], 0)\r
+***/\r
+\r
+void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,\r
+ const string &QLabel, const string &ALabel, const string &BLabel,\r
+ ChimeHit2 &Hit);\r
+\r
+void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,\r
+ const string &PathQA, const string &PathQB,\r
+ string &Q3, string &A3, string &B3);\r
+\r
+double GetScore2(double Y, double N, double A);\r
+\r
+void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,\r
+ const string &QLabel, const string &ALabel, const string &BLabel,\r
+ ChimeHit2 &Hit)\r
+ {\r
+ Hit.Clear();\r
+\r
+ const byte *Q3Seq = (const byte *) Q3.c_str();\r
+ const byte *A3Seq = (const byte *) A3.c_str();\r
+ const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+ const unsigned ColCount = SIZE(Q3);\r
+ asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+ vector<float> ColScoresA(ColCount, 0.0f);\r
+ vector<float> ColScoresB(ColCount, 0.0f);\r
+\r
+ float ScoreN = -(float) opt_xn;\r
+ unsigned QL = 0;\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (!isgap(q))\r
+ ++QL;\r
+\r
+ if (q == a && q == b && a == b)\r
+ continue;\r
+\r
+ if (isgap(q) || isgap(a) || isgap(b))\r
+ continue;\r
+\r
+ if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+ continue;\r
+\r
+ if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+ continue;\r
+\r
+ if (q == a && q != b)\r
+ ColScoresA[Col] = 1;\r
+ else\r
+ ColScoresA[Col] = ScoreN;\r
+\r
+ if (q == b && q != a)\r
+ ColScoresB[Col] = 1;\r
+ else\r
+ ColScoresB[Col] = ScoreN;\r
+ }\r
+\r
+ vector<float> LVA(ColCount, 0.0f);\r
+ vector<float> LVB(ColCount, 0.0f);\r
+\r
+ LVA[0] = ColScoresA[0];\r
+ LVB[0] = ColScoresB[0];\r
+ for (unsigned Col = 1; Col < ColCount; ++Col)\r
+ {\r
+ LVA[Col] = max(LVA[Col-1], 0.0f) + ColScoresA[Col];\r
+ LVB[Col] = max(LVB[Col-1], 0.0f) + ColScoresB[Col];\r
+ }\r
+\r
+ vector<float> RVA(ColCount, 0.0f);\r
+ vector<float> RVB(ColCount, 0.0f);\r
+\r
+ RVA[ColCount-1] = ColScoresA[ColCount-1];\r
+ RVB[ColCount-1] = ColScoresB[ColCount-1];\r
+ for (int Col = ColCount-2; Col >= 0; --Col)\r
+ {\r
+ RVA[Col] = max(RVA[Col+1], 0.0f) + ColScoresA[Col];\r
+ RVB[Col] = max(RVB[Col+1], 0.0f) + ColScoresB[Col];\r
+ }\r
+\r
+ bool FirstA = true;\r
+ float MaxSum = 0.0;\r
+ unsigned ColX = UINT_MAX;\r
+ for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+ {\r
+ float Sum = LVA[Col] + RVB[Col+1];\r
+ if (Sum > MaxSum)\r
+ {\r
+ FirstA = true;\r
+ MaxSum = Sum;\r
+ ColX = Col;\r
+ }\r
+ }\r
+\r
+ for (unsigned Col = 1; Col < ColCount-1; ++Col)\r
+ {\r
+ float Sum = LVB[Col] + RVA[Col+1];\r
+ if (Sum > MaxSum)\r
+ {\r
+ FirstA = false;\r
+ MaxSum = Sum;\r
+ ColX = Col;\r
+ }\r
+ }\r
+ if (ColX == UINT_MAX)\r
+ return;\r
+\r
+ unsigned ColLo = UINT_MAX;\r
+ unsigned ColHi = UINT_MAX;\r
+ if (FirstA)\r
+ {\r
+ float Sum = 0.0f;\r
+ for (int Col = ColX; Col >= 0; --Col)\r
+ {\r
+ Sum += ColScoresA[Col];\r
+ if (Sum >= LVA[ColX])\r
+ {\r
+ ColLo = Col;\r
+ break;\r
+ }\r
+ }\r
+ asserta(Sum >= LVA[ColX]);\r
+ Sum = 0.0f;\r
+ for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+ {\r
+ Sum += ColScoresB[Col];\r
+ if (Sum >= RVB[ColX])\r
+ {\r
+ ColHi = Col;\r
+ break;\r
+ }\r
+ }\r
+ asserta(Sum >= RVB[ColX]);\r
+ }\r
+ else\r
+ {\r
+ float Sum = 0.0f;\r
+ for (int Col = ColX; Col >= 0; --Col)\r
+ {\r
+ Sum += ColScoresB[Col];\r
+ if (Sum >= LVB[ColX])\r
+ {\r
+ ColLo = Col;\r
+ break;\r
+ }\r
+ }\r
+ asserta(Sum >= LVB[ColX]);\r
+ Sum = 0.0f;\r
+ for (unsigned Col = ColX+1; Col < ColCount; ++Col)\r
+ {\r
+ Sum += ColScoresA[Col];\r
+ if (Sum >= RVA[ColX])\r
+ {\r
+ ColHi = Col;\r
+ break;\r
+ }\r
+ }\r
+ asserta(Sum >= RVA[ColX]);\r
+ }\r
+\r
+ unsigned ColXHi = ColX;\r
+ for (unsigned Col = ColX + 1; Col < ColCount; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ \r
+ if (q == a && q == b && !isgap(q))\r
+ ColXHi = Col;\r
+ else\r
+ break;\r
+ }\r
+\r
+ unsigned ColXLo = ColX;\r
+ for (int Col = (int) ColX - 1; Col >= 0; --Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ \r
+ if (q == a && q == b && !isgap(q))\r
+ ColXLo = Col;\r
+ else\r
+ break;\r
+ }\r
+\r
+ unsigned IdQA = 0;\r
+ unsigned IdQB = 0;\r
+ unsigned IdAB = 0;\r
+ unsigned NQA = 0;\r
+ unsigned NQB = 0;\r
+ unsigned NAB = 0;\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (!isgap(q) && !isgap(a))\r
+ {\r
+ ++NQA;\r
+ if (q == a)\r
+ ++IdQA;\r
+ }\r
+\r
+ if (!isgap(q) && !isgap(b))\r
+ {\r
+ ++NQB;\r
+ if (q == b)\r
+ ++IdQB;\r
+ }\r
+\r
+ if (!isgap(a) && !isgap(b))\r
+ {\r
+ ++NAB;\r
+ if (a == b)\r
+ ++IdAB;\r
+ }\r
+ }\r
+\r
+ Hit.PctIdQA = Pct(IdQA, NQA);\r
+ Hit.PctIdQB = Pct(IdQB, NQB);\r
+ Hit.PctIdAB = Pct(IdAB, NAB);\r
+\r
+ unsigned LIdQA = 0;\r
+ unsigned LIdQB = 0;\r
+ for (unsigned Col = ColLo; Col < ColXLo; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (!isgap(q) && !isgap(a))\r
+ {\r
+ if (q == a)\r
+ ++LIdQA;\r
+ }\r
+\r
+ if (!isgap(q) && !isgap(b))\r
+ {\r
+ if (q == b)\r
+ ++LIdQB;\r
+ }\r
+ }\r
+\r
+ unsigned RIdQA = 0;\r
+ unsigned RIdQB = 0;\r
+ for (unsigned Col = ColXHi+1; Col <= ColHi; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ if (!isgap(q) && !isgap(a))\r
+ {\r
+ if (q == a)\r
+ ++RIdQA;\r
+ }\r
+\r
+ if (!isgap(q) && !isgap(b))\r
+ {\r
+ if (q == b)\r
+ ++RIdQB;\r
+ }\r
+ }\r
+\r
+ unsigned IdDiffL = max(LIdQA, LIdQB) - min(LIdQA, LIdQB);\r
+ unsigned IdDiffR = max(RIdQA, RIdQB) - min(RIdQA, RIdQB);\r
+ unsigned MinIdDiff = min(IdDiffL, IdDiffR);\r
+ unsigned ColRange = ColHi - ColLo + 1;\r
+ if (opt_queryfract > 0.0f && float(ColRange)/float(QL) < opt_queryfract)\r
+ return;\r
+\r
+// double Div = Pct(MinIdDiff, QSD.L);\r
+\r
+#if TRACE\r
+ {\r
+ Log(" Col A Q B ScoreA ScoreB LVA LVB RVA RVB\n");\r
+ Log("----- - - - ------- ------- ------- ------- ------- -------\n");\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ if (ColScoresA[Col] == 0.0 && ColScoresB[Col] == 0.0)\r
+ continue;\r
+\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ Log("%5u %c %c %c", Col, a, q, b);\r
+\r
+ if (ColScoresA[Col] == 0.0)\r
+ Log(" %7.7s", "");\r
+ else\r
+ Log(" %7.1f", ColScoresA[Col]);\r
+\r
+ if (ColScoresB[Col] == 0.0)\r
+ Log(" %7.7s", "");\r
+ else\r
+ Log(" %7.1f", ColScoresB[Col]);\r
+\r
+ Log(" %7.1f %7.1f %7.1f %7.1f", LVA[Col], LVB[Col], RVA[Col], RVB[Col]);\r
+\r
+ Log("\n");\r
+ }\r
+ Log("\n");\r
+ Log("MaxSum %.1f, ColLo %u, ColXLo %u, ColX %u, ColXHi %u, ColHi %u, AF %c\n",\r
+ MaxSum, ColLo, ColXLo, ColX, ColXHi, ColHi, tof(FirstA));\r
+ Log(" LIdQA %u, LIdQB %u, RIdQA %u, RIdQB %u\n", LIdQA, LIdQB, RIdQA, RIdQB);\r
+ }\r
+#endif\r
+\r
+ string Q3L;\r
+ string A3L;\r
+ string B3L;\r
+ for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+ {\r
+ char q = Q3[Col];\r
+ char a = A3[Col];\r
+ char b = B3[Col];\r
+\r
+ Q3L += q;\r
+ A3L += a;\r
+ B3L += b;\r
+ }\r
+\r
+ AlignChimeGlobal3(Q3L, A3L, B3L, QLabel, ALabel, BLabel, Hit);\r
+\r
+#if 0\r
+// CS SNPs\r
+ Hit.CS_LY = 0;\r
+ Hit.CS_LN = 0;\r
+ Hit.CS_RY = 0;\r
+ Hit.CS_RN = 0;\r
+ Hit.CS_LA = 0;\r
+ Hit.CS_RA = 0;\r
+ for (unsigned Col = ColLo; Col <= ColHi; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ if (q == a && q == b && a == b)\r
+ continue;\r
+ if (isgap(q) || isgap(a) || isgap(b))\r
+ continue;\r
+ if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))\r
+ continue;\r
+ if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))\r
+ continue;\r
+\r
+ if (!FirstA)\r
+ swap(a, b);\r
+\r
+ if (Col < ColXLo)\r
+ {\r
+ if (q == a && q != b)\r
+ ++Hit.CS_LY;\r
+ else if (q == b && q != a)\r
+ ++Hit.CS_LN;\r
+ else\r
+ ++Hit.CS_LA;\r
+ }\r
+ else if (Col > ColXHi)\r
+ {\r
+ if (q == b && q != a)\r
+ ++Hit.CS_RY;\r
+ else if (q == a && q != b)\r
+ ++Hit.CS_RN;\r
+ else\r
+ ++Hit.CS_RA;\r
+ }\r
+ }\r
+\r
+ double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);\r
+ double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);\r
+ Hit.Score = ScoreL*ScoreR;\r
+\r
+ //Hit.QSD = QSD;\r
+ //if (FirstA)\r
+ // {\r
+ // Hit.ASD = ASD;\r
+ // Hit.BSD = BSD;\r
+ // Hit.PathQA = PathQA;\r
+ // Hit.PathQB = PathQB;\r
+ // }\r
+ //else\r
+ // {\r
+ // Hit.ASD = BSD;\r
+ // Hit.BSD = ASD;\r
+ // }\r
+\r
+ //Hit.ColLo = ColLo;\r
+ //Hit.ColXLo = ColXLo;\r
+ //Hit.ColXHi = ColXHi;\r
+ //Hit.ColHi = ColHi;\r
+ //Hit.Div = Div;\r
+\r
+// Hit.LogMe();\r
+#endif\r
+ }\r
--- /dev/null
+A(Alpha)\r
+A(Mx)\r
+A(ChainBrute)\r
+A(Chainer)\r
+A(Test)\r
+A(CompressPath)\r
+A(HSPFinder)\r
+A(Main)\r
+A(Clumps)\r
+A(Path)\r
+A(SeqDB)\r
+A(SFasta)\r
+A(SWUngapped)\r
+A(AllocBit)\r
+A(Ultra)\r
+A(UPGMA)\r
+A(Windex)\r
+A(XDropBwd)\r
+A(Xlat)\r
+A(MPath)\r
+A(ScoreCache)\r
+A(TargetHits)\r
+A(Out)\r
+A(Hashdex)\r
--- /dev/null
+#ifndef alnheuristics_h\r
+#define alnheuristics_h\r
+\r
+struct AlnParams;\r
+\r
+struct AlnHeuristics\r
+ {\r
+ unsigned BandRadius;\r
+ unsigned HSPFinderWordLength;\r
+ float SeedT;\r
+\r
+ float XDropG; // GappedBlast default\r
+ float XDropU; // UngappedBlast default\r
+ float XDropUG; // UngappedBlast called by GappedBlast\r
+\r
+ unsigned MinGlobalHSPLength;\r
+\r
+ AlnHeuristics();\r
+ void InitFromCmdLine(const AlnParams &AP);\r
+ void InitGlobalFull();\r
+\r
+ bool IsGlobalFull() const\r
+ {\r
+ return MinGlobalHSPLength == 0 && BandRadius == 0;\r
+ }\r
+\r
+ };\r
+\r
+#endif // alnheuristics_h\r
--- /dev/null
+#include "myutils.h"\r
+#include <float.h> // for FLT_MAX\r
+#include "mx.h"\r
+#include "alnparams.h"\r
+#include "hsp.h"\r
+\r
+#define TEST 0\r
+\r
+void SetBLOSUM62();
+void SetNucSubstMx(double Match, double Mismatch);\r
+void ReadSubstMx(const string &FileName, Mx<float> &Mxf);\r
+
+extern Mx<float> g_SubstMxf;
+extern float **g_SubstMx;
+\r
+void AlnParams::Clear()\r
+ {\r
+ SubstMxName = 0;\r
+ LocalOpen = OBVIOUSLY_WRONG_PENALTY;\r
+ LocalExt = OBVIOUSLY_WRONG_PENALTY;\r
+ OpenA = OBVIOUSLY_WRONG_PENALTY;\r
+ OpenB = OBVIOUSLY_WRONG_PENALTY;\r
+ ExtA = OBVIOUSLY_WRONG_PENALTY;\r
+ ExtB = OBVIOUSLY_WRONG_PENALTY;\r
+ LOpenA = OBVIOUSLY_WRONG_PENALTY;\r
+ LOpenB = OBVIOUSLY_WRONG_PENALTY;\r
+ ROpenA = OBVIOUSLY_WRONG_PENALTY;\r
+ ROpenB = OBVIOUSLY_WRONG_PENALTY;\r
+ LExtA = OBVIOUSLY_WRONG_PENALTY;\r
+ LExtB = OBVIOUSLY_WRONG_PENALTY;\r
+ RExtA = OBVIOUSLY_WRONG_PENALTY;\r
+ RExtB = OBVIOUSLY_WRONG_PENALTY;\r
+ Nucleo = false;\r
+ NucleoSet = false;\r
+ }\r
+\r
+bool AlnParams::Is2() const\r
+ {\r
+ float g = OpenA;\r
+ float e = ExtA;\r
+ if (OpenB != g || LOpenA != g || LOpenB != g || ROpenA != g || ROpenB != g)\r
+ return false;\r
+ if (ExtB != e || LExtA != e || LExtB != e || RExtA != e || RExtB != e)\r
+ return false;\r
+ return true;\r
+ }\r
+\r
+bool AlnParams::Is4() const\r
+ {\r
+ float g = OpenA;\r
+ float tg = LOpenA;\r
+ float e = ExtA;\r
+ float te = LExtA;\r
+ if (OpenB != g || LOpenA != tg || LOpenB != tg || ROpenA != tg || ROpenB != tg)\r
+ return false;\r
+ if (ExtB != e || LExtA != te || LExtB != te || RExtA != te || RExtB != te)\r
+ return false;\r
+ return true;\r
+ }\r
+\r
+const char *AlnParams::GetType() const\r
+ {\r
+ if (Is2())\r
+ return "2";\r
+ else if (Is4())\r
+ return "4";\r
+ return "12";\r
+ }\r
+\r
+void AlnParams::Init2(const float * const *Mx, float Open, float Ext)\r
+ {\r
+ SubstMx = Mx;\r
+ OpenA = OpenB = LOpenA = LOpenB = ROpenA = ROpenB = Open;\r
+ ExtA = ExtB = LExtA = LExtB = RExtA = RExtB = Ext;\r
+ }\r
+\r
+void AlnParams::SetLocal(float Open, float Ext)\r
+ {\r
+ LocalOpen = Open;\r
+ LocalExt = Ext;\r
+ }\r
+\r
+void AlnParams::Init4(const float * const *Mx, float Open, float Ext,\r
+ float TermOpen, float TermExt)\r
+ {\r
+ SubstMx = Mx;\r
+ OpenA = OpenB = Open;\r
+ LOpenA = LOpenB = ROpenA = ROpenB = TermOpen;\r
+ ExtA = ExtB = Ext;\r
+ LExtA = LExtB = RExtA = RExtB = TermExt;\r
+ }\r
+\r
+void AlnParams::Init(const AlnParams &AP, const HSPData &HSP,\r
+ unsigned LA, unsigned LB)\r
+ {\r
+ SubstMx = AP.SubstMx;\r
+ OpenA = AP.OpenA;\r
+ OpenB = AP.OpenB;\r
+ ExtA = AP.ExtA;\r
+ ExtB = AP.ExtB;\r
+\r
+ if (HSP.LeftA())\r
+ {\r
+ LOpenA = AP.LOpenA;\r
+ LExtA = AP.LExtA;\r
+ }\r
+ else\r
+ {\r
+ LOpenA = AP.OpenA;\r
+ LExtA = AP.ExtA;\r
+ }\r
+\r
+ if (HSP.LeftB())\r
+ {\r
+ LOpenB = AP.LOpenB;\r
+ LExtB = AP.LExtB;\r
+ }\r
+ else\r
+ {\r
+ LOpenB = AP.OpenB;\r
+ LExtB = AP.ExtB;\r
+ }\r
+\r
+ if (HSP.RightA(LA))\r
+ {\r
+ ROpenA = AP.ROpenA;\r
+ RExtA = AP.RExtA;\r
+ }\r
+ else\r
+ {\r
+ ROpenA = AP.OpenA;\r
+ RExtA = AP.ExtA;\r
+ }\r
+\r
+ if (HSP.RightB(LB))\r
+ {\r
+ ROpenB = AP.ROpenB;\r
+ RExtB = AP.RExtB;\r
+ }\r
+ else\r
+ {\r
+ ROpenB = AP.OpenB;\r
+ RExtB = AP.ExtB;\r
+ }\r
+ }\r
+\r
+void AlnParams::LogMe() const\r
+ {\r
+ Log("AlnParams(%s)", GetType());\r
+ if (Is2())\r
+ Log(" g=%.1f e=%.1f", -OpenA, -ExtA);\r
+ else if (Is4())\r
+ Log(" g=%.1f tg=%.1f e=%.1f te=%.1f", -OpenA, -ExtA, -LOpenA, -LExtA);\r
+ else\r
+ Log(\r
+" gA=%.1f gB=%.1f gAL=%.1f gBL=%.1f gAR=%.1f gBR=%.1f eA=%.1f eB=%.1f eAL=%.1f eBL=%.1f eAR=%.1f eBR=%.1f",\r
+ OpenA, OpenB, LOpenA, LOpenB, ROpenA, ROpenB, ExtA, ExtB, LExtA, LExtB, RExtA, RExtB);\r
+ Log("\n");\r
+ }\r
+\r
+/***\r
+Open/Ext format string is one or more:\r
+ [<flag><flag>...]<value>\r
+\r
+Value is (positive) penalty or * (disabled).\r
+Flag is:\r
+ Q Query.\r
+ T Target sequence.\r
+ I Internal gaps (defafault internal and terminal).\r
+ E End gaps (default internal and terminal).\r
+ L Left end.\r
+ R Right end.\r
+***/\r
+\r
+static void ParseGapStr(const string &s,\r
+ float &QI, float &QL, float &QR,\r
+ float &TI, float &TL, float &TR)\r
+ {\r
+ if (s.empty())\r
+ return;\r
+\r
+ bool Q = false;\r
+ bool T = false;\r
+ bool I = false;\r
+ bool E = false;\r
+ bool L = false;\r
+ bool R = false;\r
+\r
+ const unsigned K = SIZE(s);\r
+ unsigned Dec = 0;\r
+ float Value = FLT_MAX;\r
+ for (unsigned i = 0; i <= K; ++i)\r
+ {\r
+ char c = s.c_str()[i];\r
+ if (c == 0 || c == '/')\r
+ {\r
+ if (Value == FLT_MAX)\r
+ Die("Invalid gap penalty string, missing penalty '%s'", s.c_str());\r
+ if (!Q && !T && !I && !E && !L && !R)\r
+ {\r
+ Q = true;\r
+ T = true;\r
+ L = true;\r
+ R = true;\r
+ I = true;\r
+ }\r
+\r
+ if (!E && !I && !L && !R)\r
+ {\r
+ E = false;\r
+ I = true;\r
+ L = true;\r
+ R = true;\r
+ }\r
+\r
+ if (E)\r
+ {\r
+ if (L || R)\r
+ Die("Invalid gap penalty string (E and L or R) '%s'", s.c_str());\r
+ L = true;\r
+ R = true;\r
+ }\r
+\r
+ if (!Q && !T)\r
+ {\r
+ Q = true;\r
+ T = true;\r
+ }\r
+\r
+ if (Q && L)\r
+ QL = -Value;\r
+ if (Q && R)\r
+ QR = -Value;\r
+ if (Q && I)\r
+ QI = -Value;\r
+ if (T && L)\r
+ TL = -Value;\r
+ if (T && R)\r
+ TR = -Value;\r
+ if (T && I)\r
+ TI = -Value;\r
+ \r
+ Value = FLT_MAX;\r
+ Dec = 0;\r
+ Q = false;\r
+ T = false;\r
+ I = false;\r
+ E = false;\r
+ L = false;\r
+ R = false;\r
+ }\r
+ else if (c == '*')\r
+ {\r
+ if (Value != FLT_MAX)\r
+ Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
+ Value = -MINUS_INFINITY;\r
+ }\r
+ else if (isdigit(c))\r
+ {\r
+ if (Value == -MINUS_INFINITY)\r
+ Die("Invalid gap penalty (* in floating point number) '%s'", s.c_str());\r
+ if (Value == FLT_MAX)\r
+ Value = 0.0;\r
+ if (Dec > 0)\r
+ {\r
+ Dec *= 10;\r
+ Value += float(c - '0')/Dec;\r
+ }\r
+ else\r
+ Value = Value*10 + (c - '0');\r
+ }\r
+ else if (c == '.')\r
+ {\r
+ if (Dec > 0)\r
+ Die("Invalid gap penalty (two decimal points) '%s'", s.c_str());\r
+ Dec = 1;\r
+ }\r
+ else\r
+ {\r
+ switch (c)\r
+ {\r
+ case 'Q':\r
+ Q = true;\r
+ break;\r
+ case 'T':\r
+ T = true;\r
+ break;\r
+ case 'I':\r
+ I = true;\r
+ break;\r
+ case 'L':\r
+ L = true;\r
+ break;\r
+ case 'R':\r
+ R = true;\r
+ break;\r
+ case 'E':\r
+ E = true;\r
+ break;\r
+ default:\r
+ Die("Invalid char '%c' in gap penalty string '%s'", c, s.c_str());\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+void AlnParams::SetPenalties(const string &OpenStr, const string &ExtStr)\r
+ {\r
+ ParseGapStr(OpenStr, OpenA, LOpenA, ROpenA, OpenB, LOpenB, ROpenB);\r
+ ParseGapStr(ExtStr, ExtA, LExtA, RExtA, ExtB, LExtB, RExtB);\r
+ }\r
+\r
+void AlnParams::SetMxFromCmdLine(bool IsNucleo)\r
+ {\r
+ if (IsNucleo)\r
+ SetNucSubstMx(opt_match, opt_mismatch);
+ else\r
+ {\r
+ if (opt_matrix == "")\r
+ {\r
+ SubstMxName = "BLOSUM62";\r
+ SetBLOSUM62();
+ }
+ else\r
+ {\r
+ ReadSubstMx(opt_matrix, g_SubstMxf);\r
+ g_SubstMx = g_SubstMxf.GetData();\r
+ g_SubstMxf.LogMe();\r
+ SubstMxName = opt_matrix.c_str();\r
+ }\r
+ }\r
+ SubstMx = g_SubstMx;\r
+ asserta(SubstMx != 0);\r
+ }\r
+\r
+void AlnParams::InitFromCmdLine(bool IsNucleo)\r
+ {\r
+ Clear();\r
+ Nucleo = IsNucleo;\r
+ NucleoSet = true;\r
+\r
+ SetMxFromCmdLine(IsNucleo);\r
+\r
+// Local\r
+ if (optset_lopen || optset_lext)\r
+ {\r
+ if (!optset_lopen || !optset_lext)\r
+ Die("Must set both --lopen and --lext");\r
+ if (opt_lopen < 0.0 || opt_lext < 0.0)\r
+ Die("Invalid --lopen/--lext, gap penalties must be >= 0");\r
+ SetLocal(float(-opt_lopen), float(-opt_lext));\r
+ }\r
+ else\r
+ {\r
+ // Same penalties, if-statement to note could differ.\r
+ if (IsNucleo)\r
+ SetLocal(-10.0f, -1.0f);\r
+ else\r
+ SetLocal(-10.0f, -1.0f);\r
+ }\r
+\r
+// Global\r
+ if (IsNucleo)\r
+ Init4(g_SubstMx, -10.0, -1.0, -0.5, -0.5);
+ else\r
+ Init4(g_SubstMx, -17.0, -1.0, -0.5, -0.5);
+ SetPenalties(opt_gapopen, opt_gapext);\r
+ }\r
+\r
+float AlnParams::GetLocalOpen() const\r
+ {\r
+ return LocalOpen;\r
+ }\r
+\r
+float AlnParams::GetLocalExt() const\r
+ {\r
+ return LocalExt;\r
+ }\r
+\r
+bool AlnParams::GetIsNucleo() const\r
+ {\r
+ asserta(NucleoSet);\r
+ return Nucleo;\r
+ }\r
+\r
+unsigned GetWindexWordLength(bool Nucleo)\r
+ {\r
+ if (optset_w)\r
+ return opt_w;\r
+\r
+ if (Nucleo)\r
+ return 8;\r
+ else\r
+ return 5;\r
+ }\r
+\r
+#if TEST\r
+static void Test1(const string &os, const string &es)\r
+ {\r
+ AlnParams AP;\r
+ Log("\n");\r
+ Log("OpenStr %s\n", os.c_str());\r
+ Log(" ExtStr %s\n", es.c_str());\r
+ AP.SetPenalties(os, es);\r
+ AP.LogMe();\r
+ }\r
+\r
+void TestGapStr()\r
+ {\r
+ Test1("17I/0.5E", "1I/0.5E");\r
+ Test1("17I/0.5L/0.4R", "1Q/2T");\r
+ Test1("1QL/2QR/3QI/4TL/5TR/6TI", ".1QL/.2QR/.3QI/.4TL/.5TR/.6TI");\r
+ }\r
+#endif // TEST\r
--- /dev/null
+#ifndef alnparams_h\r
+#define alnparams_h\r
+\r
+struct HSPData;\r
+\r
+// Gap penalty scores are negative\r
+// (i.e., are scores, not penalties).\r
+struct AlnParams\r
+ {\r
+ const char *SubstMxName;\r
+ const float * const *SubstMx;\r
+\r
+ bool Nucleo;\r
+ bool NucleoSet;\r
+\r
+// Local gaps\r
+ float LocalOpen;\r
+ float LocalExt;\r
+\r
+// Global internal gaps\r
+ float OpenA;\r
+ float OpenB;\r
+\r
+ float ExtA;\r
+ float ExtB;\r
+\r
+// Global terminal gaps\r
+ float LOpenA;\r
+ float LOpenB;\r
+ float ROpenA;\r
+ float ROpenB;\r
+\r
+ float LExtA;\r
+ float LExtB;\r
+ float RExtA;\r
+ float RExtB;\r
+\r
+ void Clear();\r
+ void SetLocal(float Open, float Ext);\r
+ void Init2(const float * const *Mx, float Open, float Ext);\r
+ void Init4(const float * const *Mx, float Open, float Ext, float TermOpen, float TermExt);\r
+ void Init(const AlnParams &AP, const HSPData &HSP, unsigned LA, unsigned LB);\r
+ void InitFromCmdLine(bool Nucleo);\r
+ void SetMxFromCmdLine(bool Nucleo);\r
+ void SetPenalties(const string &OpenStr, const string &ExtStr);\r
+ float GetLocalOpen() const;\r
+ float GetLocalExt() const;\r
+ bool GetIsNucleo() const;\r
+\r
+ bool Is2() const;\r
+ bool Is4() const;\r
+ const char *GetType() const;\r
+\r
+ void LogMe() const;\r
+ };\r
+\r
+const float OBVIOUSLY_WRONG_PENALTY = 1000.0;\r
+\r
+#endif // alnparams_h\r
--- /dev/null
+// Generated by /p/py/alphac.py
+#include "alpha.h"
+
+unsigned g_CharToLetterAminoStop[256] =
+ {
+ INVALID_LETTER, // [ 0] 0x00
+ INVALID_LETTER, // [ 1] 0x01
+ INVALID_LETTER, // [ 2] 0x02
+ INVALID_LETTER, // [ 3] 0x03
+ INVALID_LETTER, // [ 4] 0x04
+ INVALID_LETTER, // [ 5] 0x05
+ INVALID_LETTER, // [ 6] 0x06
+ INVALID_LETTER, // [ 7] 0x07
+ INVALID_LETTER, // [ 8] 0x08
+ INVALID_LETTER, // [ 9] 0x09
+ INVALID_LETTER, // [ 10] 0x0a
+ INVALID_LETTER, // [ 11] 0x0b
+ INVALID_LETTER, // [ 12] 0x0c
+ INVALID_LETTER, // [ 13] 0x0d
+ INVALID_LETTER, // [ 14] 0x0e
+ INVALID_LETTER, // [ 15] 0x0f
+ INVALID_LETTER, // [ 16] 0x10
+ INVALID_LETTER, // [ 17] 0x11
+ INVALID_LETTER, // [ 18] 0x12
+ INVALID_LETTER, // [ 19] 0x13
+ INVALID_LETTER, // [ 20] 0x14
+ INVALID_LETTER, // [ 21] 0x15
+ INVALID_LETTER, // [ 22] 0x16
+ INVALID_LETTER, // [ 23] 0x17
+ INVALID_LETTER, // [ 24] 0x18
+ INVALID_LETTER, // [ 25] 0x19
+ INVALID_LETTER, // [ 26] 0x1a
+ INVALID_LETTER, // [ 27] 0x1b
+ INVALID_LETTER, // [ 28] 0x1c
+ INVALID_LETTER, // [ 29] 0x1d
+ INVALID_LETTER, // [ 30] 0x1e
+ INVALID_LETTER, // [ 31] 0x1f
+ INVALID_LETTER, // [ 32] ' '
+ INVALID_LETTER, // [ 33] '!'
+ INVALID_LETTER, // [ 34] '"'
+ INVALID_LETTER, // [ 35] '#'
+ INVALID_LETTER, // [ 36] '$'
+ INVALID_LETTER, // [ 37] '%'
+ INVALID_LETTER, // [ 38] '&'
+ INVALID_LETTER, // [ 39] '''
+ INVALID_LETTER, // [ 40] '('
+ INVALID_LETTER, // [ 41] ')'
+ 20 , // [ 42] '*' = STP
+ INVALID_LETTER, // [ 43] '+'
+ INVALID_LETTER, // [ 44] ','
+ INVALID_LETTER, // [ 45] '-'
+ INVALID_LETTER, // [ 46] '.'
+ INVALID_LETTER, // [ 47] '/'
+ INVALID_LETTER, // [ 48] '0'
+ INVALID_LETTER, // [ 49] '1'
+ INVALID_LETTER, // [ 50] '2'
+ INVALID_LETTER, // [ 51] '3'
+ INVALID_LETTER, // [ 52] '4'
+ INVALID_LETTER, // [ 53] '5'
+ INVALID_LETTER, // [ 54] '6'
+ INVALID_LETTER, // [ 55] '7'
+ INVALID_LETTER, // [ 56] '8'
+ INVALID_LETTER, // [ 57] '9'
+ INVALID_LETTER, // [ 58] ':'
+ INVALID_LETTER, // [ 59] ';'
+ INVALID_LETTER, // [ 60] '<'
+ INVALID_LETTER, // [ 61] '='
+ INVALID_LETTER, // [ 62] '>'
+ INVALID_LETTER, // [ 63] '?'
+ INVALID_LETTER, // [ 64] '@'
+ 0 , // [ 65] 'A' = Ala
+ INVALID_LETTER, // [ 66] 'B'
+ 1 , // [ 67] 'C' = Cys
+ 2 , // [ 68] 'D' = Asp
+ 3 , // [ 69] 'E' = Glu
+ 4 , // [ 70] 'F' = Phe
+ 5 , // [ 71] 'G' = Gly
+ 6 , // [ 72] 'H' = His
+ 7 , // [ 73] 'I' = Ile
+ INVALID_LETTER, // [ 74] 'J'
+ 8 , // [ 75] 'K' = Lys
+ 9 , // [ 76] 'L' = Leu
+ 10 , // [ 77] 'M' = Met
+ 11 , // [ 78] 'N' = Asn
+ INVALID_LETTER, // [ 79] 'O'
+ 12 , // [ 80] 'P' = Pro
+ 13 , // [ 81] 'Q' = Gln
+ 14 , // [ 82] 'R' = Arg
+ 15 , // [ 83] 'S' = Ser
+ 16 , // [ 84] 'T' = Thr
+ INVALID_LETTER, // [ 85] 'U'
+ 17 , // [ 86] 'V' = Val
+ 18 , // [ 87] 'W' = Trp
+ INVALID_LETTER, // [ 88] 'X'
+ 19 , // [ 89] 'Y' = Tyr
+ INVALID_LETTER, // [ 90] 'Z'
+ INVALID_LETTER, // [ 91] '['
+ INVALID_LETTER, // [ 92] '\'
+ INVALID_LETTER, // [ 93] ']'
+ INVALID_LETTER, // [ 94] '^'
+ INVALID_LETTER, // [ 95] '_'
+ INVALID_LETTER, // [ 96] '`'
+ 0 , // [ 97] 'a' = Ala
+ INVALID_LETTER, // [ 98] 'b'
+ 1 , // [ 99] 'c' = Cys
+ 2 , // [100] 'd' = Asp
+ 3 , // [101] 'e' = Glu
+ 4 , // [102] 'f' = Phe
+ 5 , // [103] 'g' = Gly
+ 6 , // [104] 'h' = His
+ 7 , // [105] 'i' = Ile
+ INVALID_LETTER, // [106] 'j'
+ 8 , // [107] 'k' = Lys
+ 9 , // [108] 'l' = Leu
+ 10 , // [109] 'm' = Met
+ 11 , // [110] 'n' = Asn
+ INVALID_LETTER, // [111] 'o'
+ 12 , // [112] 'p' = Pro
+ 13 , // [113] 'q' = Gln
+ 14 , // [114] 'r' = Arg
+ 15 , // [115] 's' = Ser
+ 16 , // [116] 't' = Thr
+ INVALID_LETTER, // [117] 'u'
+ 17 , // [118] 'v' = Val
+ 18 , // [119] 'w' = Trp
+ INVALID_LETTER, // [120] 'x'
+ 19 , // [121] 'y' = Tyr
+ INVALID_LETTER, // [122] 'z'
+ INVALID_LETTER, // [123] '{'
+ INVALID_LETTER, // [124] '|'
+ INVALID_LETTER, // [125] '}'
+ INVALID_LETTER, // [126] '~'
+ INVALID_LETTER, // [127] 0x7f
+ INVALID_LETTER, // [128] 0x80
+ INVALID_LETTER, // [129] 0x81
+ INVALID_LETTER, // [130] 0x82
+ INVALID_LETTER, // [131] 0x83
+ INVALID_LETTER, // [132] 0x84
+ INVALID_LETTER, // [133] 0x85
+ INVALID_LETTER, // [134] 0x86
+ INVALID_LETTER, // [135] 0x87
+ INVALID_LETTER, // [136] 0x88
+ INVALID_LETTER, // [137] 0x89
+ INVALID_LETTER, // [138] 0x8a
+ INVALID_LETTER, // [139] 0x8b
+ INVALID_LETTER, // [140] 0x8c
+ INVALID_LETTER, // [141] 0x8d
+ INVALID_LETTER, // [142] 0x8e
+ INVALID_LETTER, // [143] 0x8f
+ INVALID_LETTER, // [144] 0x90
+ INVALID_LETTER, // [145] 0x91
+ INVALID_LETTER, // [146] 0x92
+ INVALID_LETTER, // [147] 0x93
+ INVALID_LETTER, // [148] 0x94
+ INVALID_LETTER, // [149] 0x95
+ INVALID_LETTER, // [150] 0x96
+ INVALID_LETTER, // [151] 0x97
+ INVALID_LETTER, // [152] 0x98
+ INVALID_LETTER, // [153] 0x99
+ INVALID_LETTER, // [154] 0x9a
+ INVALID_LETTER, // [155] 0x9b
+ INVALID_LETTER, // [156] 0x9c
+ INVALID_LETTER, // [157] 0x9d
+ INVALID_LETTER, // [158] 0x9e
+ INVALID_LETTER, // [159] 0x9f
+ INVALID_LETTER, // [160] 0xa0
+ INVALID_LETTER, // [161] 0xa1
+ INVALID_LETTER, // [162] 0xa2
+ INVALID_LETTER, // [163] 0xa3
+ INVALID_LETTER, // [164] 0xa4
+ INVALID_LETTER, // [165] 0xa5
+ INVALID_LETTER, // [166] 0xa6
+ INVALID_LETTER, // [167] 0xa7
+ INVALID_LETTER, // [168] 0xa8
+ INVALID_LETTER, // [169] 0xa9
+ INVALID_LETTER, // [170] 0xaa
+ INVALID_LETTER, // [171] 0xab
+ INVALID_LETTER, // [172] 0xac
+ INVALID_LETTER, // [173] 0xad
+ INVALID_LETTER, // [174] 0xae
+ INVALID_LETTER, // [175] 0xaf
+ INVALID_LETTER, // [176] 0xb0
+ INVALID_LETTER, // [177] 0xb1
+ INVALID_LETTER, // [178] 0xb2
+ INVALID_LETTER, // [179] 0xb3
+ INVALID_LETTER, // [180] 0xb4
+ INVALID_LETTER, // [181] 0xb5
+ INVALID_LETTER, // [182] 0xb6
+ INVALID_LETTER, // [183] 0xb7
+ INVALID_LETTER, // [184] 0xb8
+ INVALID_LETTER, // [185] 0xb9
+ INVALID_LETTER, // [186] 0xba
+ INVALID_LETTER, // [187] 0xbb
+ INVALID_LETTER, // [188] 0xbc
+ INVALID_LETTER, // [189] 0xbd
+ INVALID_LETTER, // [190] 0xbe
+ INVALID_LETTER, // [191] 0xbf
+ INVALID_LETTER, // [192] 0xc0
+ INVALID_LETTER, // [193] 0xc1
+ INVALID_LETTER, // [194] 0xc2
+ INVALID_LETTER, // [195] 0xc3
+ INVALID_LETTER, // [196] 0xc4
+ INVALID_LETTER, // [197] 0xc5
+ INVALID_LETTER, // [198] 0xc6
+ INVALID_LETTER, // [199] 0xc7
+ INVALID_LETTER, // [200] 0xc8
+ INVALID_LETTER, // [201] 0xc9
+ INVALID_LETTER, // [202] 0xca
+ INVALID_LETTER, // [203] 0xcb
+ INVALID_LETTER, // [204] 0xcc
+ INVALID_LETTER, // [205] 0xcd
+ INVALID_LETTER, // [206] 0xce
+ INVALID_LETTER, // [207] 0xcf
+ INVALID_LETTER, // [208] 0xd0
+ INVALID_LETTER, // [209] 0xd1
+ INVALID_LETTER, // [210] 0xd2
+ INVALID_LETTER, // [211] 0xd3
+ INVALID_LETTER, // [212] 0xd4
+ INVALID_LETTER, // [213] 0xd5
+ INVALID_LETTER, // [214] 0xd6
+ INVALID_LETTER, // [215] 0xd7
+ INVALID_LETTER, // [216] 0xd8
+ INVALID_LETTER, // [217] 0xd9
+ INVALID_LETTER, // [218] 0xda
+ INVALID_LETTER, // [219] 0xdb
+ INVALID_LETTER, // [220] 0xdc
+ INVALID_LETTER, // [221] 0xdd
+ INVALID_LETTER, // [222] 0xde
+ INVALID_LETTER, // [223] 0xdf
+ INVALID_LETTER, // [224] 0xe0
+ INVALID_LETTER, // [225] 0xe1
+ INVALID_LETTER, // [226] 0xe2
+ INVALID_LETTER, // [227] 0xe3
+ INVALID_LETTER, // [228] 0xe4
+ INVALID_LETTER, // [229] 0xe5
+ INVALID_LETTER, // [230] 0xe6
+ INVALID_LETTER, // [231] 0xe7
+ INVALID_LETTER, // [232] 0xe8
+ INVALID_LETTER, // [233] 0xe9
+ INVALID_LETTER, // [234] 0xea
+ INVALID_LETTER, // [235] 0xeb
+ INVALID_LETTER, // [236] 0xec
+ INVALID_LETTER, // [237] 0xed
+ INVALID_LETTER, // [238] 0xee
+ INVALID_LETTER, // [239] 0xef
+ INVALID_LETTER, // [240] 0xf0
+ INVALID_LETTER, // [241] 0xf1
+ INVALID_LETTER, // [242] 0xf2
+ INVALID_LETTER, // [243] 0xf3
+ INVALID_LETTER, // [244] 0xf4
+ INVALID_LETTER, // [245] 0xf5
+ INVALID_LETTER, // [246] 0xf6
+ INVALID_LETTER, // [247] 0xf7
+ INVALID_LETTER, // [248] 0xf8
+ INVALID_LETTER, // [249] 0xf9
+ INVALID_LETTER, // [250] 0xfa
+ INVALID_LETTER, // [251] 0xfb
+ INVALID_LETTER, // [252] 0xfc
+ INVALID_LETTER, // [253] 0xfd
+ INVALID_LETTER, // [254] 0xfe
+ INVALID_LETTER, // [255] 0xff
+ };
+unsigned g_CharToLetterAmino[256] =
+ {
+ INVALID_LETTER, // [ 0] 0x00
+ INVALID_LETTER, // [ 1] 0x01
+ INVALID_LETTER, // [ 2] 0x02
+ INVALID_LETTER, // [ 3] 0x03
+ INVALID_LETTER, // [ 4] 0x04
+ INVALID_LETTER, // [ 5] 0x05
+ INVALID_LETTER, // [ 6] 0x06
+ INVALID_LETTER, // [ 7] 0x07
+ INVALID_LETTER, // [ 8] 0x08
+ INVALID_LETTER, // [ 9] 0x09
+ INVALID_LETTER, // [ 10] 0x0a
+ INVALID_LETTER, // [ 11] 0x0b
+ INVALID_LETTER, // [ 12] 0x0c
+ INVALID_LETTER, // [ 13] 0x0d
+ INVALID_LETTER, // [ 14] 0x0e
+ INVALID_LETTER, // [ 15] 0x0f
+ INVALID_LETTER, // [ 16] 0x10
+ INVALID_LETTER, // [ 17] 0x11
+ INVALID_LETTER, // [ 18] 0x12
+ INVALID_LETTER, // [ 19] 0x13
+ INVALID_LETTER, // [ 20] 0x14
+ INVALID_LETTER, // [ 21] 0x15
+ INVALID_LETTER, // [ 22] 0x16
+ INVALID_LETTER, // [ 23] 0x17
+ INVALID_LETTER, // [ 24] 0x18
+ INVALID_LETTER, // [ 25] 0x19
+ INVALID_LETTER, // [ 26] 0x1a
+ INVALID_LETTER, // [ 27] 0x1b
+ INVALID_LETTER, // [ 28] 0x1c
+ INVALID_LETTER, // [ 29] 0x1d
+ INVALID_LETTER, // [ 30] 0x1e
+ INVALID_LETTER, // [ 31] 0x1f
+ INVALID_LETTER, // [ 32] ' '
+ INVALID_LETTER, // [ 33] '!'
+ INVALID_LETTER, // [ 34] '"'
+ INVALID_LETTER, // [ 35] '#'
+ INVALID_LETTER, // [ 36] '$'
+ INVALID_LETTER, // [ 37] '%'
+ INVALID_LETTER, // [ 38] '&'
+ INVALID_LETTER, // [ 39] '''
+ INVALID_LETTER, // [ 40] '('
+ INVALID_LETTER, // [ 41] ')'
+ INVALID_LETTER, // [ 42] '*'
+ INVALID_LETTER, // [ 43] '+'
+ INVALID_LETTER, // [ 44] ','
+ INVALID_LETTER, // [ 45] '-'
+ INVALID_LETTER, // [ 46] '.'
+ INVALID_LETTER, // [ 47] '/'
+ INVALID_LETTER, // [ 48] '0'
+ INVALID_LETTER, // [ 49] '1'
+ INVALID_LETTER, // [ 50] '2'
+ INVALID_LETTER, // [ 51] '3'
+ INVALID_LETTER, // [ 52] '4'
+ INVALID_LETTER, // [ 53] '5'
+ INVALID_LETTER, // [ 54] '6'
+ INVALID_LETTER, // [ 55] '7'
+ INVALID_LETTER, // [ 56] '8'
+ INVALID_LETTER, // [ 57] '9'
+ INVALID_LETTER, // [ 58] ':'
+ INVALID_LETTER, // [ 59] ';'
+ INVALID_LETTER, // [ 60] '<'
+ INVALID_LETTER, // [ 61] '='
+ INVALID_LETTER, // [ 62] '>'
+ INVALID_LETTER, // [ 63] '?'
+ INVALID_LETTER, // [ 64] '@'
+ 0 , // [ 65] 'A' = Ala
+ INVALID_LETTER, // [ 66] 'B'
+ 1 , // [ 67] 'C' = Cys
+ 2 , // [ 68] 'D' = Asp
+ 3 , // [ 69] 'E' = Glu
+ 4 , // [ 70] 'F' = Phe
+ 5 , // [ 71] 'G' = Gly
+ 6 , // [ 72] 'H' = His
+ 7 , // [ 73] 'I' = Ile
+ INVALID_LETTER, // [ 74] 'J'
+ 8 , // [ 75] 'K' = Lys
+ 9 , // [ 76] 'L' = Leu
+ 10 , // [ 77] 'M' = Met
+ 11 , // [ 78] 'N' = Asn
+ INVALID_LETTER, // [ 79] 'O'
+ 12 , // [ 80] 'P' = Pro
+ 13 , // [ 81] 'Q' = Gln
+ 14 , // [ 82] 'R' = Arg
+ 15 , // [ 83] 'S' = Ser
+ 16 , // [ 84] 'T' = Thr
+ INVALID_LETTER, // [ 85] 'U'
+ 17 , // [ 86] 'V' = Val
+ 18 , // [ 87] 'W' = Trp
+ INVALID_LETTER, // [ 88] 'X'
+ 19 , // [ 89] 'Y' = Tyr
+ INVALID_LETTER, // [ 90] 'Z'
+ INVALID_LETTER, // [ 91] '['
+ INVALID_LETTER, // [ 92] '\'
+ INVALID_LETTER, // [ 93] ']'
+ INVALID_LETTER, // [ 94] '^'
+ INVALID_LETTER, // [ 95] '_'
+ INVALID_LETTER, // [ 96] '`'
+ 0 , // [ 97] 'a' = Ala
+ INVALID_LETTER, // [ 98] 'b'
+ 1 , // [ 99] 'c' = Cys
+ 2 , // [100] 'd' = Asp
+ 3 , // [101] 'e' = Glu
+ 4 , // [102] 'f' = Phe
+ 5 , // [103] 'g' = Gly
+ 6 , // [104] 'h' = His
+ 7 , // [105] 'i' = Ile
+ INVALID_LETTER, // [106] 'j'
+ 8 , // [107] 'k' = Lys
+ 9 , // [108] 'l' = Leu
+ 10 , // [109] 'm' = Met
+ 11 , // [110] 'n' = Asn
+ INVALID_LETTER, // [111] 'o'
+ 12 , // [112] 'p' = Pro
+ 13 , // [113] 'q' = Gln
+ 14 , // [114] 'r' = Arg
+ 15 , // [115] 's' = Ser
+ 16 , // [116] 't' = Thr
+ INVALID_LETTER, // [117] 'u'
+ 17 , // [118] 'v' = Val
+ 18 , // [119] 'w' = Trp
+ INVALID_LETTER, // [120] 'x'
+ 19 , // [121] 'y' = Tyr
+ INVALID_LETTER, // [122] 'z'
+ INVALID_LETTER, // [123] '{'
+ INVALID_LETTER, // [124] '|'
+ INVALID_LETTER, // [125] '}'
+ INVALID_LETTER, // [126] '~'
+ INVALID_LETTER, // [127] 0x7f
+ INVALID_LETTER, // [128] 0x80
+ INVALID_LETTER, // [129] 0x81
+ INVALID_LETTER, // [130] 0x82
+ INVALID_LETTER, // [131] 0x83
+ INVALID_LETTER, // [132] 0x84
+ INVALID_LETTER, // [133] 0x85
+ INVALID_LETTER, // [134] 0x86
+ INVALID_LETTER, // [135] 0x87
+ INVALID_LETTER, // [136] 0x88
+ INVALID_LETTER, // [137] 0x89
+ INVALID_LETTER, // [138] 0x8a
+ INVALID_LETTER, // [139] 0x8b
+ INVALID_LETTER, // [140] 0x8c
+ INVALID_LETTER, // [141] 0x8d
+ INVALID_LETTER, // [142] 0x8e
+ INVALID_LETTER, // [143] 0x8f
+ INVALID_LETTER, // [144] 0x90
+ INVALID_LETTER, // [145] 0x91
+ INVALID_LETTER, // [146] 0x92
+ INVALID_LETTER, // [147] 0x93
+ INVALID_LETTER, // [148] 0x94
+ INVALID_LETTER, // [149] 0x95
+ INVALID_LETTER, // [150] 0x96
+ INVALID_LETTER, // [151] 0x97
+ INVALID_LETTER, // [152] 0x98
+ INVALID_LETTER, // [153] 0x99
+ INVALID_LETTER, // [154] 0x9a
+ INVALID_LETTER, // [155] 0x9b
+ INVALID_LETTER, // [156] 0x9c
+ INVALID_LETTER, // [157] 0x9d
+ INVALID_LETTER, // [158] 0x9e
+ INVALID_LETTER, // [159] 0x9f
+ INVALID_LETTER, // [160] 0xa0
+ INVALID_LETTER, // [161] 0xa1
+ INVALID_LETTER, // [162] 0xa2
+ INVALID_LETTER, // [163] 0xa3
+ INVALID_LETTER, // [164] 0xa4
+ INVALID_LETTER, // [165] 0xa5
+ INVALID_LETTER, // [166] 0xa6
+ INVALID_LETTER, // [167] 0xa7
+ INVALID_LETTER, // [168] 0xa8
+ INVALID_LETTER, // [169] 0xa9
+ INVALID_LETTER, // [170] 0xaa
+ INVALID_LETTER, // [171] 0xab
+ INVALID_LETTER, // [172] 0xac
+ INVALID_LETTER, // [173] 0xad
+ INVALID_LETTER, // [174] 0xae
+ INVALID_LETTER, // [175] 0xaf
+ INVALID_LETTER, // [176] 0xb0
+ INVALID_LETTER, // [177] 0xb1
+ INVALID_LETTER, // [178] 0xb2
+ INVALID_LETTER, // [179] 0xb3
+ INVALID_LETTER, // [180] 0xb4
+ INVALID_LETTER, // [181] 0xb5
+ INVALID_LETTER, // [182] 0xb6
+ INVALID_LETTER, // [183] 0xb7
+ INVALID_LETTER, // [184] 0xb8
+ INVALID_LETTER, // [185] 0xb9
+ INVALID_LETTER, // [186] 0xba
+ INVALID_LETTER, // [187] 0xbb
+ INVALID_LETTER, // [188] 0xbc
+ INVALID_LETTER, // [189] 0xbd
+ INVALID_LETTER, // [190] 0xbe
+ INVALID_LETTER, // [191] 0xbf
+ INVALID_LETTER, // [192] 0xc0
+ INVALID_LETTER, // [193] 0xc1
+ INVALID_LETTER, // [194] 0xc2
+ INVALID_LETTER, // [195] 0xc3
+ INVALID_LETTER, // [196] 0xc4
+ INVALID_LETTER, // [197] 0xc5
+ INVALID_LETTER, // [198] 0xc6
+ INVALID_LETTER, // [199] 0xc7
+ INVALID_LETTER, // [200] 0xc8
+ INVALID_LETTER, // [201] 0xc9
+ INVALID_LETTER, // [202] 0xca
+ INVALID_LETTER, // [203] 0xcb
+ INVALID_LETTER, // [204] 0xcc
+ INVALID_LETTER, // [205] 0xcd
+ INVALID_LETTER, // [206] 0xce
+ INVALID_LETTER, // [207] 0xcf
+ INVALID_LETTER, // [208] 0xd0
+ INVALID_LETTER, // [209] 0xd1
+ INVALID_LETTER, // [210] 0xd2
+ INVALID_LETTER, // [211] 0xd3
+ INVALID_LETTER, // [212] 0xd4
+ INVALID_LETTER, // [213] 0xd5
+ INVALID_LETTER, // [214] 0xd6
+ INVALID_LETTER, // [215] 0xd7
+ INVALID_LETTER, // [216] 0xd8
+ INVALID_LETTER, // [217] 0xd9
+ INVALID_LETTER, // [218] 0xda
+ INVALID_LETTER, // [219] 0xdb
+ INVALID_LETTER, // [220] 0xdc
+ INVALID_LETTER, // [221] 0xdd
+ INVALID_LETTER, // [222] 0xde
+ INVALID_LETTER, // [223] 0xdf
+ INVALID_LETTER, // [224] 0xe0
+ INVALID_LETTER, // [225] 0xe1
+ INVALID_LETTER, // [226] 0xe2
+ INVALID_LETTER, // [227] 0xe3
+ INVALID_LETTER, // [228] 0xe4
+ INVALID_LETTER, // [229] 0xe5
+ INVALID_LETTER, // [230] 0xe6
+ INVALID_LETTER, // [231] 0xe7
+ INVALID_LETTER, // [232] 0xe8
+ INVALID_LETTER, // [233] 0xe9
+ INVALID_LETTER, // [234] 0xea
+ INVALID_LETTER, // [235] 0xeb
+ INVALID_LETTER, // [236] 0xec
+ INVALID_LETTER, // [237] 0xed
+ INVALID_LETTER, // [238] 0xee
+ INVALID_LETTER, // [239] 0xef
+ INVALID_LETTER, // [240] 0xf0
+ INVALID_LETTER, // [241] 0xf1
+ INVALID_LETTER, // [242] 0xf2
+ INVALID_LETTER, // [243] 0xf3
+ INVALID_LETTER, // [244] 0xf4
+ INVALID_LETTER, // [245] 0xf5
+ INVALID_LETTER, // [246] 0xf6
+ INVALID_LETTER, // [247] 0xf7
+ INVALID_LETTER, // [248] 0xf8
+ INVALID_LETTER, // [249] 0xf9
+ INVALID_LETTER, // [250] 0xfa
+ INVALID_LETTER, // [251] 0xfb
+ INVALID_LETTER, // [252] 0xfc
+ INVALID_LETTER, // [253] 0xfd
+ INVALID_LETTER, // [254] 0xfe
+ INVALID_LETTER, // [255] 0xff
+ };
+
+unsigned char g_LetterToCharAmino[256] =
+ {
+ 'A', // [0]
+ 'C', // [1]
+ 'D', // [2]
+ 'E', // [3]
+ 'F', // [4]
+ 'G', // [5]
+ 'H', // [6]
+ 'I', // [7]
+ 'K', // [8]
+ 'L', // [9]
+ 'M', // [10]
+ 'N', // [11]
+ 'P', // [12]
+ 'Q', // [13]
+ 'R', // [14]
+ 'S', // [15]
+ 'T', // [16]
+ 'V', // [17]
+ 'W', // [18]
+ 'Y', // [19]
+ '*', // [20]
+ INVALID_CHAR, // [21]
+ INVALID_CHAR, // [22]
+ INVALID_CHAR, // [23]
+ INVALID_CHAR, // [24]
+ INVALID_CHAR, // [25]
+ INVALID_CHAR, // [26]
+ INVALID_CHAR, // [27]
+ INVALID_CHAR, // [28]
+ INVALID_CHAR, // [29]
+ INVALID_CHAR, // [30]
+ INVALID_CHAR, // [31]
+ INVALID_CHAR, // [32]
+ INVALID_CHAR, // [33]
+ INVALID_CHAR, // [34]
+ INVALID_CHAR, // [35]
+ INVALID_CHAR, // [36]
+ INVALID_CHAR, // [37]
+ INVALID_CHAR, // [38]
+ INVALID_CHAR, // [39]
+ INVALID_CHAR, // [40]
+ INVALID_CHAR, // [41]
+ INVALID_CHAR, // [42]
+ INVALID_CHAR, // [43]
+ INVALID_CHAR, // [44]
+ INVALID_CHAR, // [45]
+ INVALID_CHAR, // [46]
+ INVALID_CHAR, // [47]
+ INVALID_CHAR, // [48]
+ INVALID_CHAR, // [49]
+ INVALID_CHAR, // [50]
+ INVALID_CHAR, // [51]
+ INVALID_CHAR, // [52]
+ INVALID_CHAR, // [53]
+ INVALID_CHAR, // [54]
+ INVALID_CHAR, // [55]
+ INVALID_CHAR, // [56]
+ INVALID_CHAR, // [57]
+ INVALID_CHAR, // [58]
+ INVALID_CHAR, // [59]
+ INVALID_CHAR, // [60]
+ INVALID_CHAR, // [61]
+ INVALID_CHAR, // [62]
+ INVALID_CHAR, // [63]
+ INVALID_CHAR, // [64]
+ INVALID_CHAR, // [65]
+ INVALID_CHAR, // [66]
+ INVALID_CHAR, // [67]
+ INVALID_CHAR, // [68]
+ INVALID_CHAR, // [69]
+ INVALID_CHAR, // [70]
+ INVALID_CHAR, // [71]
+ INVALID_CHAR, // [72]
+ INVALID_CHAR, // [73]
+ INVALID_CHAR, // [74]
+ INVALID_CHAR, // [75]
+ INVALID_CHAR, // [76]
+ INVALID_CHAR, // [77]
+ INVALID_CHAR, // [78]
+ INVALID_CHAR, // [79]
+ INVALID_CHAR, // [80]
+ INVALID_CHAR, // [81]
+ INVALID_CHAR, // [82]
+ INVALID_CHAR, // [83]
+ INVALID_CHAR, // [84]
+ INVALID_CHAR, // [85]
+ INVALID_CHAR, // [86]
+ INVALID_CHAR, // [87]
+ INVALID_CHAR, // [88]
+ INVALID_CHAR, // [89]
+ INVALID_CHAR, // [90]
+ INVALID_CHAR, // [91]
+ INVALID_CHAR, // [92]
+ INVALID_CHAR, // [93]
+ INVALID_CHAR, // [94]
+ INVALID_CHAR, // [95]
+ INVALID_CHAR, // [96]
+ INVALID_CHAR, // [97]
+ INVALID_CHAR, // [98]
+ INVALID_CHAR, // [99]
+ INVALID_CHAR, // [100]
+ INVALID_CHAR, // [101]
+ INVALID_CHAR, // [102]
+ INVALID_CHAR, // [103]
+ INVALID_CHAR, // [104]
+ INVALID_CHAR, // [105]
+ INVALID_CHAR, // [106]
+ INVALID_CHAR, // [107]
+ INVALID_CHAR, // [108]
+ INVALID_CHAR, // [109]
+ INVALID_CHAR, // [110]
+ INVALID_CHAR, // [111]
+ INVALID_CHAR, // [112]
+ INVALID_CHAR, // [113]
+ INVALID_CHAR, // [114]
+ INVALID_CHAR, // [115]
+ INVALID_CHAR, // [116]
+ INVALID_CHAR, // [117]
+ INVALID_CHAR, // [118]
+ INVALID_CHAR, // [119]
+ INVALID_CHAR, // [120]
+ INVALID_CHAR, // [121]
+ INVALID_CHAR, // [122]
+ INVALID_CHAR, // [123]
+ INVALID_CHAR, // [124]
+ INVALID_CHAR, // [125]
+ INVALID_CHAR, // [126]
+ INVALID_CHAR, // [127]
+ INVALID_CHAR, // [128]
+ INVALID_CHAR, // [129]
+ INVALID_CHAR, // [130]
+ INVALID_CHAR, // [131]
+ INVALID_CHAR, // [132]
+ INVALID_CHAR, // [133]
+ INVALID_CHAR, // [134]
+ INVALID_CHAR, // [135]
+ INVALID_CHAR, // [136]
+ INVALID_CHAR, // [137]
+ INVALID_CHAR, // [138]
+ INVALID_CHAR, // [139]
+ INVALID_CHAR, // [140]
+ INVALID_CHAR, // [141]
+ INVALID_CHAR, // [142]
+ INVALID_CHAR, // [143]
+ INVALID_CHAR, // [144]
+ INVALID_CHAR, // [145]
+ INVALID_CHAR, // [146]
+ INVALID_CHAR, // [147]
+ INVALID_CHAR, // [148]
+ INVALID_CHAR, // [149]
+ INVALID_CHAR, // [150]
+ INVALID_CHAR, // [151]
+ INVALID_CHAR, // [152]
+ INVALID_CHAR, // [153]
+ INVALID_CHAR, // [154]
+ INVALID_CHAR, // [155]
+ INVALID_CHAR, // [156]
+ INVALID_CHAR, // [157]
+ INVALID_CHAR, // [158]
+ INVALID_CHAR, // [159]
+ INVALID_CHAR, // [160]
+ INVALID_CHAR, // [161]
+ INVALID_CHAR, // [162]
+ INVALID_CHAR, // [163]
+ INVALID_CHAR, // [164]
+ INVALID_CHAR, // [165]
+ INVALID_CHAR, // [166]
+ INVALID_CHAR, // [167]
+ INVALID_CHAR, // [168]
+ INVALID_CHAR, // [169]
+ INVALID_CHAR, // [170]
+ INVALID_CHAR, // [171]
+ INVALID_CHAR, // [172]
+ INVALID_CHAR, // [173]
+ INVALID_CHAR, // [174]
+ INVALID_CHAR, // [175]
+ INVALID_CHAR, // [176]
+ INVALID_CHAR, // [177]
+ INVALID_CHAR, // [178]
+ INVALID_CHAR, // [179]
+ INVALID_CHAR, // [180]
+ INVALID_CHAR, // [181]
+ INVALID_CHAR, // [182]
+ INVALID_CHAR, // [183]
+ INVALID_CHAR, // [184]
+ INVALID_CHAR, // [185]
+ INVALID_CHAR, // [186]
+ INVALID_CHAR, // [187]
+ INVALID_CHAR, // [188]
+ INVALID_CHAR, // [189]
+ INVALID_CHAR, // [190]
+ INVALID_CHAR, // [191]
+ INVALID_CHAR, // [192]
+ INVALID_CHAR, // [193]
+ INVALID_CHAR, // [194]
+ INVALID_CHAR, // [195]
+ INVALID_CHAR, // [196]
+ INVALID_CHAR, // [197]
+ INVALID_CHAR, // [198]
+ INVALID_CHAR, // [199]
+ INVALID_CHAR, // [200]
+ INVALID_CHAR, // [201]
+ INVALID_CHAR, // [202]
+ INVALID_CHAR, // [203]
+ INVALID_CHAR, // [204]
+ INVALID_CHAR, // [205]
+ INVALID_CHAR, // [206]
+ INVALID_CHAR, // [207]
+ INVALID_CHAR, // [208]
+ INVALID_CHAR, // [209]
+ INVALID_CHAR, // [210]
+ INVALID_CHAR, // [211]
+ INVALID_CHAR, // [212]
+ INVALID_CHAR, // [213]
+ INVALID_CHAR, // [214]
+ INVALID_CHAR, // [215]
+ INVALID_CHAR, // [216]
+ INVALID_CHAR, // [217]
+ INVALID_CHAR, // [218]
+ INVALID_CHAR, // [219]
+ INVALID_CHAR, // [220]
+ INVALID_CHAR, // [221]
+ INVALID_CHAR, // [222]
+ INVALID_CHAR, // [223]
+ INVALID_CHAR, // [224]
+ INVALID_CHAR, // [225]
+ INVALID_CHAR, // [226]
+ INVALID_CHAR, // [227]
+ INVALID_CHAR, // [228]
+ INVALID_CHAR, // [229]
+ INVALID_CHAR, // [230]
+ INVALID_CHAR, // [231]
+ INVALID_CHAR, // [232]
+ INVALID_CHAR, // [233]
+ INVALID_CHAR, // [234]
+ INVALID_CHAR, // [235]
+ INVALID_CHAR, // [236]
+ INVALID_CHAR, // [237]
+ INVALID_CHAR, // [238]
+ INVALID_CHAR, // [239]
+ INVALID_CHAR, // [240]
+ INVALID_CHAR, // [241]
+ INVALID_CHAR, // [242]
+ INVALID_CHAR, // [243]
+ INVALID_CHAR, // [244]
+ INVALID_CHAR, // [245]
+ INVALID_CHAR, // [246]
+ INVALID_CHAR, // [247]
+ INVALID_CHAR, // [248]
+ INVALID_CHAR, // [249]
+ INVALID_CHAR, // [250]
+ INVALID_CHAR, // [251]
+ INVALID_CHAR, // [252]
+ INVALID_CHAR, // [253]
+ INVALID_CHAR, // [254]
+ INVALID_CHAR, // [255]
+ };
+
+unsigned g_CharToLetterNucleo[256] =
+ {
+ INVALID_LETTER, // [ 0] = 0x00
+ INVALID_LETTER, // [ 1] = 0x01
+ INVALID_LETTER, // [ 2] = 0x02
+ INVALID_LETTER, // [ 3] = 0x03
+ INVALID_LETTER, // [ 4] = 0x04
+ INVALID_LETTER, // [ 5] = 0x05
+ INVALID_LETTER, // [ 6] = 0x06
+ INVALID_LETTER, // [ 7] = 0x07
+ INVALID_LETTER, // [ 8] = 0x08
+ INVALID_LETTER, // [ 9] = 0x09
+ INVALID_LETTER, // [ 10] = 0x0a
+ INVALID_LETTER, // [ 11] = 0x0b
+ INVALID_LETTER, // [ 12] = 0x0c
+ INVALID_LETTER, // [ 13] = 0x0d
+ INVALID_LETTER, // [ 14] = 0x0e
+ INVALID_LETTER, // [ 15] = 0x0f
+ INVALID_LETTER, // [ 16] = 0x10
+ INVALID_LETTER, // [ 17] = 0x11
+ INVALID_LETTER, // [ 18] = 0x12
+ INVALID_LETTER, // [ 19] = 0x13
+ INVALID_LETTER, // [ 20] = 0x14
+ INVALID_LETTER, // [ 21] = 0x15
+ INVALID_LETTER, // [ 22] = 0x16
+ INVALID_LETTER, // [ 23] = 0x17
+ INVALID_LETTER, // [ 24] = 0x18
+ INVALID_LETTER, // [ 25] = 0x19
+ INVALID_LETTER, // [ 26] = 0x1a
+ INVALID_LETTER, // [ 27] = 0x1b
+ INVALID_LETTER, // [ 28] = 0x1c
+ INVALID_LETTER, // [ 29] = 0x1d
+ INVALID_LETTER, // [ 30] = 0x1e
+ INVALID_LETTER, // [ 31] = 0x1f
+ INVALID_LETTER, // [ 32] = 32
+ INVALID_LETTER, // [ 33] = 33
+ INVALID_LETTER, // [ 34] = 34
+ INVALID_LETTER, // [ 35] = 35
+ INVALID_LETTER, // [ 36] = 36
+ INVALID_LETTER, // [ 37] = 37
+ INVALID_LETTER, // [ 38] = 38
+ INVALID_LETTER, // [ 39] = 39
+ INVALID_LETTER, // [ 40] = 40
+ INVALID_LETTER, // [ 41] = 41
+ INVALID_LETTER, // [ 42] = 42
+ INVALID_LETTER, // [ 43] = 43
+ INVALID_LETTER, // [ 44] = 44
+ INVALID_LETTER, // [ 45] = 45
+ INVALID_LETTER, // [ 46] = 46
+ INVALID_LETTER, // [ 47] = 47
+ INVALID_LETTER, // [ 48] = 48
+ INVALID_LETTER, // [ 49] = 49
+ INVALID_LETTER, // [ 50] = 50
+ INVALID_LETTER, // [ 51] = 51
+ INVALID_LETTER, // [ 52] = 52
+ INVALID_LETTER, // [ 53] = 53
+ INVALID_LETTER, // [ 54] = 54
+ INVALID_LETTER, // [ 55] = 55
+ INVALID_LETTER, // [ 56] = 56
+ INVALID_LETTER, // [ 57] = 57
+ INVALID_LETTER, // [ 58] = 58
+ INVALID_LETTER, // [ 59] = 59
+ INVALID_LETTER, // [ 60] = 60
+ INVALID_LETTER, // [ 61] = 61
+ INVALID_LETTER, // [ 62] = 62
+ INVALID_LETTER, // [ 63] = 63
+ INVALID_LETTER, // [ 64] = 64
+ 0 , // [ 65] = A (Nucleotide)
+ INVALID_LETTER, // [ 66] = 66
+ 1 , // [ 67] = C (Nucleotide)
+ INVALID_LETTER, // [ 68] = 68
+ INVALID_LETTER, // [ 69] = 69
+ INVALID_LETTER, // [ 70] = 70
+ 2 , // [ 71] = G (Nucleotide)
+ INVALID_LETTER, // [ 72] = 72
+ INVALID_LETTER, // [ 73] = 73
+ INVALID_LETTER, // [ 74] = 74
+ INVALID_LETTER, // [ 75] = 75
+ INVALID_LETTER, // [ 76] = 76
+ INVALID_LETTER, // [ 77] = 77
+ INVALID_LETTER, // [ 78] = 78
+ INVALID_LETTER, // [ 79] = 79
+ INVALID_LETTER, // [ 80] = 80
+ INVALID_LETTER, // [ 81] = 81
+ INVALID_LETTER, // [ 82] = 82
+ INVALID_LETTER, // [ 83] = 83
+ 3 , // [ 84] = T (Nucleotide)
+ 3 , // [ 85] = U (Nucleotide)
+ INVALID_LETTER, // [ 86] = 86
+ INVALID_LETTER, // [ 87] = 87
+ INVALID_LETTER, // [ 88] = 88
+ INVALID_LETTER, // [ 89] = 89
+ INVALID_LETTER, // [ 90] = 90
+ INVALID_LETTER, // [ 91] = 91
+ INVALID_LETTER, // [ 92] = 92
+ INVALID_LETTER, // [ 93] = 93
+ INVALID_LETTER, // [ 94] = 94
+ INVALID_LETTER, // [ 95] = 95
+ INVALID_LETTER, // [ 96] = 96
+ 0 , // [ 97] = a (Nucleotide)
+ INVALID_LETTER, // [ 98] = 98
+ 1 , // [ 99] = c (Nucleotide)
+ INVALID_LETTER, // [100] = 100
+ INVALID_LETTER, // [101] = 101
+ INVALID_LETTER, // [102] = 102
+ 2 , // [103] = g (Nucleotide)
+ INVALID_LETTER, // [104] = 104
+ INVALID_LETTER, // [105] = 105
+ INVALID_LETTER, // [106] = 106
+ INVALID_LETTER, // [107] = 107
+ INVALID_LETTER, // [108] = 108
+ INVALID_LETTER, // [109] = 109
+ INVALID_LETTER, // [110] = 110
+ INVALID_LETTER, // [111] = 111
+ INVALID_LETTER, // [112] = 112
+ INVALID_LETTER, // [113] = 113
+ INVALID_LETTER, // [114] = 114
+ INVALID_LETTER, // [115] = 115
+ 3 , // [116] = t (Nucleotide)
+ 3 , // [117] = u (Nucleotide)
+ INVALID_LETTER, // [118] = 118
+ INVALID_LETTER, // [119] = 119
+ INVALID_LETTER, // [120] = 120
+ INVALID_LETTER, // [121] = 121
+ INVALID_LETTER, // [122] = 122
+ INVALID_LETTER, // [123] = 123
+ INVALID_LETTER, // [124] = 124
+ INVALID_LETTER, // [125] = 125
+ INVALID_LETTER, // [126] = 126
+ INVALID_LETTER, // [127] = 0x7f
+ INVALID_LETTER, // [128] = 0x80
+ INVALID_LETTER, // [129] = 0x81
+ INVALID_LETTER, // [130] = 0x82
+ INVALID_LETTER, // [131] = 0x83
+ INVALID_LETTER, // [132] = 0x84
+ INVALID_LETTER, // [133] = 0x85
+ INVALID_LETTER, // [134] = 0x86
+ INVALID_LETTER, // [135] = 0x87
+ INVALID_LETTER, // [136] = 0x88
+ INVALID_LETTER, // [137] = 0x89
+ INVALID_LETTER, // [138] = 0x8a
+ INVALID_LETTER, // [139] = 0x8b
+ INVALID_LETTER, // [140] = 0x8c
+ INVALID_LETTER, // [141] = 0x8d
+ INVALID_LETTER, // [142] = 0x8e
+ INVALID_LETTER, // [143] = 0x8f
+ INVALID_LETTER, // [144] = 0x90
+ INVALID_LETTER, // [145] = 0x91
+ INVALID_LETTER, // [146] = 0x92
+ INVALID_LETTER, // [147] = 0x93
+ INVALID_LETTER, // [148] = 0x94
+ INVALID_LETTER, // [149] = 0x95
+ INVALID_LETTER, // [150] = 0x96
+ INVALID_LETTER, // [151] = 0x97
+ INVALID_LETTER, // [152] = 0x98
+ INVALID_LETTER, // [153] = 0x99
+ INVALID_LETTER, // [154] = 0x9a
+ INVALID_LETTER, // [155] = 0x9b
+ INVALID_LETTER, // [156] = 0x9c
+ INVALID_LETTER, // [157] = 0x9d
+ INVALID_LETTER, // [158] = 0x9e
+ INVALID_LETTER, // [159] = 0x9f
+ INVALID_LETTER, // [160] = 0xa0
+ INVALID_LETTER, // [161] = 0xa1
+ INVALID_LETTER, // [162] = 0xa2
+ INVALID_LETTER, // [163] = 0xa3
+ INVALID_LETTER, // [164] = 0xa4
+ INVALID_LETTER, // [165] = 0xa5
+ INVALID_LETTER, // [166] = 0xa6
+ INVALID_LETTER, // [167] = 0xa7
+ INVALID_LETTER, // [168] = 0xa8
+ INVALID_LETTER, // [169] = 0xa9
+ INVALID_LETTER, // [170] = 0xaa
+ INVALID_LETTER, // [171] = 0xab
+ INVALID_LETTER, // [172] = 0xac
+ INVALID_LETTER, // [173] = 0xad
+ INVALID_LETTER, // [174] = 0xae
+ INVALID_LETTER, // [175] = 0xaf
+ INVALID_LETTER, // [176] = 0xb0
+ INVALID_LETTER, // [177] = 0xb1
+ INVALID_LETTER, // [178] = 0xb2
+ INVALID_LETTER, // [179] = 0xb3
+ INVALID_LETTER, // [180] = 0xb4
+ INVALID_LETTER, // [181] = 0xb5
+ INVALID_LETTER, // [182] = 0xb6
+ INVALID_LETTER, // [183] = 0xb7
+ INVALID_LETTER, // [184] = 0xb8
+ INVALID_LETTER, // [185] = 0xb9
+ INVALID_LETTER, // [186] = 0xba
+ INVALID_LETTER, // [187] = 0xbb
+ INVALID_LETTER, // [188] = 0xbc
+ INVALID_LETTER, // [189] = 0xbd
+ INVALID_LETTER, // [190] = 0xbe
+ INVALID_LETTER, // [191] = 0xbf
+ INVALID_LETTER, // [192] = 0xc0
+ INVALID_LETTER, // [193] = 0xc1
+ INVALID_LETTER, // [194] = 0xc2
+ INVALID_LETTER, // [195] = 0xc3
+ INVALID_LETTER, // [196] = 0xc4
+ INVALID_LETTER, // [197] = 0xc5
+ INVALID_LETTER, // [198] = 0xc6
+ INVALID_LETTER, // [199] = 0xc7
+ INVALID_LETTER, // [200] = 0xc8
+ INVALID_LETTER, // [201] = 0xc9
+ INVALID_LETTER, // [202] = 0xca
+ INVALID_LETTER, // [203] = 0xcb
+ INVALID_LETTER, // [204] = 0xcc
+ INVALID_LETTER, // [205] = 0xcd
+ INVALID_LETTER, // [206] = 0xce
+ INVALID_LETTER, // [207] = 0xcf
+ INVALID_LETTER, // [208] = 0xd0
+ INVALID_LETTER, // [209] = 0xd1
+ INVALID_LETTER, // [210] = 0xd2
+ INVALID_LETTER, // [211] = 0xd3
+ INVALID_LETTER, // [212] = 0xd4
+ INVALID_LETTER, // [213] = 0xd5
+ INVALID_LETTER, // [214] = 0xd6
+ INVALID_LETTER, // [215] = 0xd7
+ INVALID_LETTER, // [216] = 0xd8
+ INVALID_LETTER, // [217] = 0xd9
+ INVALID_LETTER, // [218] = 0xda
+ INVALID_LETTER, // [219] = 0xdb
+ INVALID_LETTER, // [220] = 0xdc
+ INVALID_LETTER, // [221] = 0xdd
+ INVALID_LETTER, // [222] = 0xde
+ INVALID_LETTER, // [223] = 0xdf
+ INVALID_LETTER, // [224] = 0xe0
+ INVALID_LETTER, // [225] = 0xe1
+ INVALID_LETTER, // [226] = 0xe2
+ INVALID_LETTER, // [227] = 0xe3
+ INVALID_LETTER, // [228] = 0xe4
+ INVALID_LETTER, // [229] = 0xe5
+ INVALID_LETTER, // [230] = 0xe6
+ INVALID_LETTER, // [231] = 0xe7
+ INVALID_LETTER, // [232] = 0xe8
+ INVALID_LETTER, // [233] = 0xe9
+ INVALID_LETTER, // [234] = 0xea
+ INVALID_LETTER, // [235] = 0xeb
+ INVALID_LETTER, // [236] = 0xec
+ INVALID_LETTER, // [237] = 0xed
+ INVALID_LETTER, // [238] = 0xee
+ INVALID_LETTER, // [239] = 0xef
+ INVALID_LETTER, // [240] = 0xf0
+ INVALID_LETTER, // [241] = 0xf1
+ INVALID_LETTER, // [242] = 0xf2
+ INVALID_LETTER, // [243] = 0xf3
+ INVALID_LETTER, // [244] = 0xf4
+ INVALID_LETTER, // [245] = 0xf5
+ INVALID_LETTER, // [246] = 0xf6
+ INVALID_LETTER, // [247] = 0xf7
+ INVALID_LETTER, // [248] = 0xf8
+ INVALID_LETTER, // [249] = 0xf9
+ INVALID_LETTER, // [250] = 0xfa
+ INVALID_LETTER, // [251] = 0xfb
+ INVALID_LETTER, // [252] = 0xfc
+ INVALID_LETTER, // [253] = 0xfd
+ INVALID_LETTER, // [254] = 0xfe
+ INVALID_LETTER, // [255] = 0xff
+ };
+
+unsigned char g_LetterToCharNucleo[256] =
+ {
+ 'A', // [0]
+ 'C', // [1]
+ 'G', // [2]
+ 'T', // [3]
+ INVALID_CHAR, // [4]
+ INVALID_CHAR, // [5]
+ INVALID_CHAR, // [6]
+ INVALID_CHAR, // [7]
+ INVALID_CHAR, // [8]
+ INVALID_CHAR, // [9]
+ INVALID_CHAR, // [10]
+ INVALID_CHAR, // [11]
+ INVALID_CHAR, // [12]
+ INVALID_CHAR, // [13]
+ INVALID_CHAR, // [14]
+ INVALID_CHAR, // [15]
+ INVALID_CHAR, // [16]
+ INVALID_CHAR, // [17]
+ INVALID_CHAR, // [18]
+ INVALID_CHAR, // [19]
+ INVALID_CHAR, // [20]
+ INVALID_CHAR, // [21]
+ INVALID_CHAR, // [22]
+ INVALID_CHAR, // [23]
+ INVALID_CHAR, // [24]
+ INVALID_CHAR, // [25]
+ INVALID_CHAR, // [26]
+ INVALID_CHAR, // [27]
+ INVALID_CHAR, // [28]
+ INVALID_CHAR, // [29]
+ INVALID_CHAR, // [30]
+ INVALID_CHAR, // [31]
+ INVALID_CHAR, // [32]
+ INVALID_CHAR, // [33]
+ INVALID_CHAR, // [34]
+ INVALID_CHAR, // [35]
+ INVALID_CHAR, // [36]
+ INVALID_CHAR, // [37]
+ INVALID_CHAR, // [38]
+ INVALID_CHAR, // [39]
+ INVALID_CHAR, // [40]
+ INVALID_CHAR, // [41]
+ INVALID_CHAR, // [42]
+ INVALID_CHAR, // [43]
+ INVALID_CHAR, // [44]
+ INVALID_CHAR, // [45]
+ INVALID_CHAR, // [46]
+ INVALID_CHAR, // [47]
+ INVALID_CHAR, // [48]
+ INVALID_CHAR, // [49]
+ INVALID_CHAR, // [50]
+ INVALID_CHAR, // [51]
+ INVALID_CHAR, // [52]
+ INVALID_CHAR, // [53]
+ INVALID_CHAR, // [54]
+ INVALID_CHAR, // [55]
+ INVALID_CHAR, // [56]
+ INVALID_CHAR, // [57]
+ INVALID_CHAR, // [58]
+ INVALID_CHAR, // [59]
+ INVALID_CHAR, // [60]
+ INVALID_CHAR, // [61]
+ INVALID_CHAR, // [62]
+ INVALID_CHAR, // [63]
+ INVALID_CHAR, // [64]
+ INVALID_CHAR, // [65]
+ INVALID_CHAR, // [66]
+ INVALID_CHAR, // [67]
+ INVALID_CHAR, // [68]
+ INVALID_CHAR, // [69]
+ INVALID_CHAR, // [70]
+ INVALID_CHAR, // [71]
+ INVALID_CHAR, // [72]
+ INVALID_CHAR, // [73]
+ INVALID_CHAR, // [74]
+ INVALID_CHAR, // [75]
+ INVALID_CHAR, // [76]
+ INVALID_CHAR, // [77]
+ INVALID_CHAR, // [78]
+ INVALID_CHAR, // [79]
+ INVALID_CHAR, // [80]
+ INVALID_CHAR, // [81]
+ INVALID_CHAR, // [82]
+ INVALID_CHAR, // [83]
+ INVALID_CHAR, // [84]
+ INVALID_CHAR, // [85]
+ INVALID_CHAR, // [86]
+ INVALID_CHAR, // [87]
+ INVALID_CHAR, // [88]
+ INVALID_CHAR, // [89]
+ INVALID_CHAR, // [90]
+ INVALID_CHAR, // [91]
+ INVALID_CHAR, // [92]
+ INVALID_CHAR, // [93]
+ INVALID_CHAR, // [94]
+ INVALID_CHAR, // [95]
+ INVALID_CHAR, // [96]
+ INVALID_CHAR, // [97]
+ INVALID_CHAR, // [98]
+ INVALID_CHAR, // [99]
+ INVALID_CHAR, // [100]
+ INVALID_CHAR, // [101]
+ INVALID_CHAR, // [102]
+ INVALID_CHAR, // [103]
+ INVALID_CHAR, // [104]
+ INVALID_CHAR, // [105]
+ INVALID_CHAR, // [106]
+ INVALID_CHAR, // [107]
+ INVALID_CHAR, // [108]
+ INVALID_CHAR, // [109]
+ INVALID_CHAR, // [110]
+ INVALID_CHAR, // [111]
+ INVALID_CHAR, // [112]
+ INVALID_CHAR, // [113]
+ INVALID_CHAR, // [114]
+ INVALID_CHAR, // [115]
+ INVALID_CHAR, // [116]
+ INVALID_CHAR, // [117]
+ INVALID_CHAR, // [118]
+ INVALID_CHAR, // [119]
+ INVALID_CHAR, // [120]
+ INVALID_CHAR, // [121]
+ INVALID_CHAR, // [122]
+ INVALID_CHAR, // [123]
+ INVALID_CHAR, // [124]
+ INVALID_CHAR, // [125]
+ INVALID_CHAR, // [126]
+ INVALID_CHAR, // [127]
+ INVALID_CHAR, // [128]
+ INVALID_CHAR, // [129]
+ INVALID_CHAR, // [130]
+ INVALID_CHAR, // [131]
+ INVALID_CHAR, // [132]
+ INVALID_CHAR, // [133]
+ INVALID_CHAR, // [134]
+ INVALID_CHAR, // [135]
+ INVALID_CHAR, // [136]
+ INVALID_CHAR, // [137]
+ INVALID_CHAR, // [138]
+ INVALID_CHAR, // [139]
+ INVALID_CHAR, // [140]
+ INVALID_CHAR, // [141]
+ INVALID_CHAR, // [142]
+ INVALID_CHAR, // [143]
+ INVALID_CHAR, // [144]
+ INVALID_CHAR, // [145]
+ INVALID_CHAR, // [146]
+ INVALID_CHAR, // [147]
+ INVALID_CHAR, // [148]
+ INVALID_CHAR, // [149]
+ INVALID_CHAR, // [150]
+ INVALID_CHAR, // [151]
+ INVALID_CHAR, // [152]
+ INVALID_CHAR, // [153]
+ INVALID_CHAR, // [154]
+ INVALID_CHAR, // [155]
+ INVALID_CHAR, // [156]
+ INVALID_CHAR, // [157]
+ INVALID_CHAR, // [158]
+ INVALID_CHAR, // [159]
+ INVALID_CHAR, // [160]
+ INVALID_CHAR, // [161]
+ INVALID_CHAR, // [162]
+ INVALID_CHAR, // [163]
+ INVALID_CHAR, // [164]
+ INVALID_CHAR, // [165]
+ INVALID_CHAR, // [166]
+ INVALID_CHAR, // [167]
+ INVALID_CHAR, // [168]
+ INVALID_CHAR, // [169]
+ INVALID_CHAR, // [170]
+ INVALID_CHAR, // [171]
+ INVALID_CHAR, // [172]
+ INVALID_CHAR, // [173]
+ INVALID_CHAR, // [174]
+ INVALID_CHAR, // [175]
+ INVALID_CHAR, // [176]
+ INVALID_CHAR, // [177]
+ INVALID_CHAR, // [178]
+ INVALID_CHAR, // [179]
+ INVALID_CHAR, // [180]
+ INVALID_CHAR, // [181]
+ INVALID_CHAR, // [182]
+ INVALID_CHAR, // [183]
+ INVALID_CHAR, // [184]
+ INVALID_CHAR, // [185]
+ INVALID_CHAR, // [186]
+ INVALID_CHAR, // [187]
+ INVALID_CHAR, // [188]
+ INVALID_CHAR, // [189]
+ INVALID_CHAR, // [190]
+ INVALID_CHAR, // [191]
+ INVALID_CHAR, // [192]
+ INVALID_CHAR, // [193]
+ INVALID_CHAR, // [194]
+ INVALID_CHAR, // [195]
+ INVALID_CHAR, // [196]
+ INVALID_CHAR, // [197]
+ INVALID_CHAR, // [198]
+ INVALID_CHAR, // [199]
+ INVALID_CHAR, // [200]
+ INVALID_CHAR, // [201]
+ INVALID_CHAR, // [202]
+ INVALID_CHAR, // [203]
+ INVALID_CHAR, // [204]
+ INVALID_CHAR, // [205]
+ INVALID_CHAR, // [206]
+ INVALID_CHAR, // [207]
+ INVALID_CHAR, // [208]
+ INVALID_CHAR, // [209]
+ INVALID_CHAR, // [210]
+ INVALID_CHAR, // [211]
+ INVALID_CHAR, // [212]
+ INVALID_CHAR, // [213]
+ INVALID_CHAR, // [214]
+ INVALID_CHAR, // [215]
+ INVALID_CHAR, // [216]
+ INVALID_CHAR, // [217]
+ INVALID_CHAR, // [218]
+ INVALID_CHAR, // [219]
+ INVALID_CHAR, // [220]
+ INVALID_CHAR, // [221]
+ INVALID_CHAR, // [222]
+ INVALID_CHAR, // [223]
+ INVALID_CHAR, // [224]
+ INVALID_CHAR, // [225]
+ INVALID_CHAR, // [226]
+ INVALID_CHAR, // [227]
+ INVALID_CHAR, // [228]
+ INVALID_CHAR, // [229]
+ INVALID_CHAR, // [230]
+ INVALID_CHAR, // [231]
+ INVALID_CHAR, // [232]
+ INVALID_CHAR, // [233]
+ INVALID_CHAR, // [234]
+ INVALID_CHAR, // [235]
+ INVALID_CHAR, // [236]
+ INVALID_CHAR, // [237]
+ INVALID_CHAR, // [238]
+ INVALID_CHAR, // [239]
+ INVALID_CHAR, // [240]
+ INVALID_CHAR, // [241]
+ INVALID_CHAR, // [242]
+ INVALID_CHAR, // [243]
+ INVALID_CHAR, // [244]
+ INVALID_CHAR, // [245]
+ INVALID_CHAR, // [246]
+ INVALID_CHAR, // [247]
+ INVALID_CHAR, // [248]
+ INVALID_CHAR, // [249]
+ INVALID_CHAR, // [250]
+ INVALID_CHAR, // [251]
+ INVALID_CHAR, // [252]
+ INVALID_CHAR, // [253]
+ INVALID_CHAR, // [254]
+ INVALID_CHAR, // [255]
+ };
+
+unsigned g_CodonWordToAminoLetter[4*4*4] =
+ {
+ 8 , // [ 0] = AAA K (Lys)
+ 11, // [ 1] = AAC N (Asn)
+ 8 , // [ 2] = AAG K (Lys)
+ 11, // [ 3] = AAT N (Asn)
+ 16, // [ 4] = ACA T (Thr)
+ 16, // [ 5] = ACC T (Thr)
+ 16, // [ 6] = ACG T (Thr)
+ 16, // [ 7] = ACT T (Thr)
+ 14, // [ 8] = AGA R (Arg)
+ 15, // [ 9] = AGC S (Ser)
+ 14, // [10] = AGG R (Arg)
+ 15, // [11] = AGT S (Ser)
+ 7 , // [12] = ATA I (Ile)
+ 7 , // [13] = ATC I (Ile)
+ 10, // [14] = ATG M (Met)
+ 7 , // [15] = ATT I (Ile)
+ 13, // [16] = CAA Q (Gln)
+ 6 , // [17] = CAC H (His)
+ 13, // [18] = CAG Q (Gln)
+ 6 , // [19] = CAT H (His)
+ 12, // [20] = CCA P (Pro)
+ 12, // [21] = CCC P (Pro)
+ 12, // [22] = CCG P (Pro)
+ 12, // [23] = CCT P (Pro)
+ 14, // [24] = CGA R (Arg)
+ 14, // [25] = CGC R (Arg)
+ 14, // [26] = CGG R (Arg)
+ 14, // [27] = CGT R (Arg)
+ 9 , // [28] = CTA L (Leu)
+ 9 , // [29] = CTC L (Leu)
+ 9 , // [30] = CTG L (Leu)
+ 9 , // [31] = CTT L (Leu)
+ 3 , // [32] = GAA E (Glu)
+ 2 , // [33] = GAC D (Asp)
+ 3 , // [34] = GAG E (Glu)
+ 2 , // [35] = GAT D (Asp)
+ 0 , // [36] = GCA A (Ala)
+ 0 , // [37] = GCC A (Ala)
+ 0 , // [38] = GCG A (Ala)
+ 0 , // [39] = GCT A (Ala)
+ 5 , // [40] = GGA G (Gly)
+ 5 , // [41] = GGC G (Gly)
+ 5 , // [42] = GGG G (Gly)
+ 5 , // [43] = GGT G (Gly)
+ 17, // [44] = GTA V (Val)
+ 17, // [45] = GTC V (Val)
+ 17, // [46] = GTG V (Val)
+ 17, // [47] = GTT V (Val)
+ 20, // [48] = TAA * (STP)
+ 19, // [49] = TAC Y (Tyr)
+ 20, // [50] = TAG * (STP)
+ 19, // [51] = TAT Y (Tyr)
+ 15, // [52] = TCA S (Ser)
+ 15, // [53] = TCC S (Ser)
+ 15, // [54] = TCG S (Ser)
+ 15, // [55] = TCT S (Ser)
+ 20, // [56] = TGA * (STP)
+ 1 , // [57] = TGC C (Cys)
+ 18, // [58] = TGG W (Trp)
+ 1 , // [59] = TGT C (Cys)
+ 9 , // [60] = TTA L (Leu)
+ 4 , // [61] = TTC F (Phe)
+ 9 , // [62] = TTG L (Leu)
+ 4 , // [63] = TTT F (Phe)
+ };
+
+char g_CodonWordToAminoChar[4*4*4] =
+ {
+ 'K', // [ 0] = AAA (Lys)
+ 'N', // [ 1] = AAC (Asn)
+ 'K', // [ 2] = AAG (Lys)
+ 'N', // [ 3] = AAT (Asn)
+ 'T', // [ 4] = ACA (Thr)
+ 'T', // [ 5] = ACC (Thr)
+ 'T', // [ 6] = ACG (Thr)
+ 'T', // [ 7] = ACT (Thr)
+ 'R', // [ 8] = AGA (Arg)
+ 'S', // [ 9] = AGC (Ser)
+ 'R', // [10] = AGG (Arg)
+ 'S', // [11] = AGT (Ser)
+ 'I', // [12] = ATA (Ile)
+ 'I', // [13] = ATC (Ile)
+ 'M', // [14] = ATG (Met)
+ 'I', // [15] = ATT (Ile)
+ 'Q', // [16] = CAA (Gln)
+ 'H', // [17] = CAC (His)
+ 'Q', // [18] = CAG (Gln)
+ 'H', // [19] = CAT (His)
+ 'P', // [20] = CCA (Pro)
+ 'P', // [21] = CCC (Pro)
+ 'P', // [22] = CCG (Pro)
+ 'P', // [23] = CCT (Pro)
+ 'R', // [24] = CGA (Arg)
+ 'R', // [25] = CGC (Arg)
+ 'R', // [26] = CGG (Arg)
+ 'R', // [27] = CGT (Arg)
+ 'L', // [28] = CTA (Leu)
+ 'L', // [29] = CTC (Leu)
+ 'L', // [30] = CTG (Leu)
+ 'L', // [31] = CTT (Leu)
+ 'E', // [32] = GAA (Glu)
+ 'D', // [33] = GAC (Asp)
+ 'E', // [34] = GAG (Glu)
+ 'D', // [35] = GAT (Asp)
+ 'A', // [36] = GCA (Ala)
+ 'A', // [37] = GCC (Ala)
+ 'A', // [38] = GCG (Ala)
+ 'A', // [39] = GCT (Ala)
+ 'G', // [40] = GGA (Gly)
+ 'G', // [41] = GGC (Gly)
+ 'G', // [42] = GGG (Gly)
+ 'G', // [43] = GGT (Gly)
+ 'V', // [44] = GTA (Val)
+ 'V', // [45] = GTC (Val)
+ 'V', // [46] = GTG (Val)
+ 'V', // [47] = GTT (Val)
+ '*', // [48] = TAA (STP)
+ 'Y', // [49] = TAC (Tyr)
+ '*', // [50] = TAG (STP)
+ 'Y', // [51] = TAT (Tyr)
+ 'S', // [52] = TCA (Ser)
+ 'S', // [53] = TCC (Ser)
+ 'S', // [54] = TCG (Ser)
+ 'S', // [55] = TCT (Ser)
+ '*', // [56] = TGA (STP)
+ 'C', // [57] = TGC (Cys)
+ 'W', // [58] = TGG (Trp)
+ 'C', // [59] = TGT (Cys)
+ 'L', // [60] = TTA (Leu)
+ 'F', // [61] = TTC (Phe)
+ 'L', // [62] = TTG (Leu)
+ 'F', // [63] = TTT (Phe)
+ };
+
+unsigned char g_CharToCompChar[256] =
+ {
+ INVALID_CHAR, // [ 0]
+ INVALID_CHAR, // [ 1]
+ INVALID_CHAR, // [ 2]
+ INVALID_CHAR, // [ 3]
+ INVALID_CHAR, // [ 4]
+ INVALID_CHAR, // [ 5]
+ INVALID_CHAR, // [ 6]
+ INVALID_CHAR, // [ 7]
+ INVALID_CHAR, // [ 8]
+ INVALID_CHAR, // [ 9]
+ INVALID_CHAR, // [ 10]
+ INVALID_CHAR, // [ 11]
+ INVALID_CHAR, // [ 12]
+ INVALID_CHAR, // [ 13]
+ INVALID_CHAR, // [ 14]
+ INVALID_CHAR, // [ 15]
+ INVALID_CHAR, // [ 16]
+ INVALID_CHAR, // [ 17]
+ INVALID_CHAR, // [ 18]
+ INVALID_CHAR, // [ 19]
+ INVALID_CHAR, // [ 20]
+ INVALID_CHAR, // [ 21]
+ INVALID_CHAR, // [ 22]
+ INVALID_CHAR, // [ 23]
+ INVALID_CHAR, // [ 24]
+ INVALID_CHAR, // [ 25]
+ INVALID_CHAR, // [ 26]
+ INVALID_CHAR, // [ 27]
+ INVALID_CHAR, // [ 28]
+ INVALID_CHAR, // [ 29]
+ INVALID_CHAR, // [ 30]
+ INVALID_CHAR, // [ 31]
+ INVALID_CHAR, // [ 32]
+ INVALID_CHAR, // [ 33]
+ INVALID_CHAR, // [ 34]
+ INVALID_CHAR, // [ 35]
+ INVALID_CHAR, // [ 36]
+ INVALID_CHAR, // [ 37]
+ INVALID_CHAR, // [ 38]
+ INVALID_CHAR, // [ 39]
+ INVALID_CHAR, // [ 40]
+ INVALID_CHAR, // [ 41]
+ INVALID_CHAR, // [ 42]
+ INVALID_CHAR, // [ 43]
+ INVALID_CHAR, // [ 44]
+ INVALID_CHAR, // [ 45]
+ INVALID_CHAR, // [ 46]
+ INVALID_CHAR, // [ 47]
+ INVALID_CHAR, // [ 48]
+ INVALID_CHAR, // [ 49]
+ INVALID_CHAR, // [ 50]
+ INVALID_CHAR, // [ 51]
+ INVALID_CHAR, // [ 52]
+ INVALID_CHAR, // [ 53]
+ INVALID_CHAR, // [ 54]
+ INVALID_CHAR, // [ 55]
+ INVALID_CHAR, // [ 56]
+ INVALID_CHAR, // [ 57]
+ INVALID_CHAR, // [ 58]
+ INVALID_CHAR, // [ 59]
+ INVALID_CHAR, // [ 60]
+ INVALID_CHAR, // [ 61]
+ INVALID_CHAR, // [ 62]
+ INVALID_CHAR, // [ 63]
+ INVALID_CHAR, // [ 64]
+ 'T', // [ 65] A -> T
+ INVALID_CHAR, // [ 66]
+ 'G', // [ 67] C -> G
+ INVALID_CHAR, // [ 68]
+ INVALID_CHAR, // [ 69]
+ INVALID_CHAR, // [ 70]
+ 'C', // [ 71] G -> C
+ INVALID_CHAR, // [ 72]
+ INVALID_CHAR, // [ 73]
+ INVALID_CHAR, // [ 74]
+ INVALID_CHAR, // [ 75]
+ INVALID_CHAR, // [ 76]
+ INVALID_CHAR, // [ 77]
+ INVALID_CHAR, // [ 78]
+ INVALID_CHAR, // [ 79]
+ INVALID_CHAR, // [ 80]
+ INVALID_CHAR, // [ 81]
+ INVALID_CHAR, // [ 82]
+ INVALID_CHAR, // [ 83]
+ 'A', // [ 84] T -> A
+ 'A', // [ 85] U -> A
+ INVALID_CHAR, // [ 86]
+ INVALID_CHAR, // [ 87]
+ INVALID_CHAR, // [ 88]
+ INVALID_CHAR, // [ 89]
+ INVALID_CHAR, // [ 90]
+ INVALID_CHAR, // [ 91]
+ INVALID_CHAR, // [ 92]
+ INVALID_CHAR, // [ 93]
+ INVALID_CHAR, // [ 94]
+ INVALID_CHAR, // [ 95]
+ INVALID_CHAR, // [ 96]
+ 'T', // [ 97] a -> T
+ INVALID_CHAR, // [ 98]
+ 'G', // [ 99] c -> G
+ INVALID_CHAR, // [100]
+ INVALID_CHAR, // [101]
+ INVALID_CHAR, // [102]
+ 'C', // [103] g -> C
+ INVALID_CHAR, // [104]
+ INVALID_CHAR, // [105]
+ INVALID_CHAR, // [106]
+ INVALID_CHAR, // [107]
+ INVALID_CHAR, // [108]
+ INVALID_CHAR, // [109]
+ INVALID_CHAR, // [110]
+ INVALID_CHAR, // [111]
+ INVALID_CHAR, // [112]
+ INVALID_CHAR, // [113]
+ INVALID_CHAR, // [114]
+ INVALID_CHAR, // [115]
+ 'A', // [116] t -> A
+ 'A', // [117] u -> A
+ INVALID_CHAR, // [118]
+ INVALID_CHAR, // [119]
+ INVALID_CHAR, // [120]
+ INVALID_CHAR, // [121]
+ INVALID_CHAR, // [122]
+ INVALID_CHAR, // [123]
+ INVALID_CHAR, // [124]
+ INVALID_CHAR, // [125]
+ INVALID_CHAR, // [126]
+ INVALID_CHAR, // [127]
+ INVALID_CHAR, // [128]
+ INVALID_CHAR, // [129]
+ INVALID_CHAR, // [130]
+ INVALID_CHAR, // [131]
+ INVALID_CHAR, // [132]
+ INVALID_CHAR, // [133]
+ INVALID_CHAR, // [134]
+ INVALID_CHAR, // [135]
+ INVALID_CHAR, // [136]
+ INVALID_CHAR, // [137]
+ INVALID_CHAR, // [138]
+ INVALID_CHAR, // [139]
+ INVALID_CHAR, // [140]
+ INVALID_CHAR, // [141]
+ INVALID_CHAR, // [142]
+ INVALID_CHAR, // [143]
+ INVALID_CHAR, // [144]
+ INVALID_CHAR, // [145]
+ INVALID_CHAR, // [146]
+ INVALID_CHAR, // [147]
+ INVALID_CHAR, // [148]
+ INVALID_CHAR, // [149]
+ INVALID_CHAR, // [150]
+ INVALID_CHAR, // [151]
+ INVALID_CHAR, // [152]
+ INVALID_CHAR, // [153]
+ INVALID_CHAR, // [154]
+ INVALID_CHAR, // [155]
+ INVALID_CHAR, // [156]
+ INVALID_CHAR, // [157]
+ INVALID_CHAR, // [158]
+ INVALID_CHAR, // [159]
+ INVALID_CHAR, // [160]
+ INVALID_CHAR, // [161]
+ INVALID_CHAR, // [162]
+ INVALID_CHAR, // [163]
+ INVALID_CHAR, // [164]
+ INVALID_CHAR, // [165]
+ INVALID_CHAR, // [166]
+ INVALID_CHAR, // [167]
+ INVALID_CHAR, // [168]
+ INVALID_CHAR, // [169]
+ INVALID_CHAR, // [170]
+ INVALID_CHAR, // [171]
+ INVALID_CHAR, // [172]
+ INVALID_CHAR, // [173]
+ INVALID_CHAR, // [174]
+ INVALID_CHAR, // [175]
+ INVALID_CHAR, // [176]
+ INVALID_CHAR, // [177]
+ INVALID_CHAR, // [178]
+ INVALID_CHAR, // [179]
+ INVALID_CHAR, // [180]
+ INVALID_CHAR, // [181]
+ INVALID_CHAR, // [182]
+ INVALID_CHAR, // [183]
+ INVALID_CHAR, // [184]
+ INVALID_CHAR, // [185]
+ INVALID_CHAR, // [186]
+ INVALID_CHAR, // [187]
+ INVALID_CHAR, // [188]
+ INVALID_CHAR, // [189]
+ INVALID_CHAR, // [190]
+ INVALID_CHAR, // [191]
+ INVALID_CHAR, // [192]
+ INVALID_CHAR, // [193]
+ INVALID_CHAR, // [194]
+ INVALID_CHAR, // [195]
+ INVALID_CHAR, // [196]
+ INVALID_CHAR, // [197]
+ INVALID_CHAR, // [198]
+ INVALID_CHAR, // [199]
+ INVALID_CHAR, // [200]
+ INVALID_CHAR, // [201]
+ INVALID_CHAR, // [202]
+ INVALID_CHAR, // [203]
+ INVALID_CHAR, // [204]
+ INVALID_CHAR, // [205]
+ INVALID_CHAR, // [206]
+ INVALID_CHAR, // [207]
+ INVALID_CHAR, // [208]
+ INVALID_CHAR, // [209]
+ INVALID_CHAR, // [210]
+ INVALID_CHAR, // [211]
+ INVALID_CHAR, // [212]
+ INVALID_CHAR, // [213]
+ INVALID_CHAR, // [214]
+ INVALID_CHAR, // [215]
+ INVALID_CHAR, // [216]
+ INVALID_CHAR, // [217]
+ INVALID_CHAR, // [218]
+ INVALID_CHAR, // [219]
+ INVALID_CHAR, // [220]
+ INVALID_CHAR, // [221]
+ INVALID_CHAR, // [222]
+ INVALID_CHAR, // [223]
+ INVALID_CHAR, // [224]
+ INVALID_CHAR, // [225]
+ INVALID_CHAR, // [226]
+ INVALID_CHAR, // [227]
+ INVALID_CHAR, // [228]
+ INVALID_CHAR, // [229]
+ INVALID_CHAR, // [230]
+ INVALID_CHAR, // [231]
+ INVALID_CHAR, // [232]
+ INVALID_CHAR, // [233]
+ INVALID_CHAR, // [234]
+ INVALID_CHAR, // [235]
+ INVALID_CHAR, // [236]
+ INVALID_CHAR, // [237]
+ INVALID_CHAR, // [238]
+ INVALID_CHAR, // [239]
+ INVALID_CHAR, // [240]
+ INVALID_CHAR, // [241]
+ INVALID_CHAR, // [242]
+ INVALID_CHAR, // [243]
+ INVALID_CHAR, // [244]
+ INVALID_CHAR, // [245]
+ INVALID_CHAR, // [246]
+ INVALID_CHAR, // [247]
+ INVALID_CHAR, // [248]
+ INVALID_CHAR, // [249]
+ INVALID_CHAR, // [250]
+ INVALID_CHAR, // [251]
+ INVALID_CHAR, // [252]
+ INVALID_CHAR, // [253]
+ INVALID_CHAR, // [254]
+ INVALID_CHAR, // [255]
+};
+
+unsigned g_CharToCompLetter[256] =
+ {
+ INVALID_LETTER, // [ 0]
+ INVALID_LETTER, // [ 1]
+ INVALID_LETTER, // [ 2]
+ INVALID_LETTER, // [ 3]
+ INVALID_LETTER, // [ 4]
+ INVALID_LETTER, // [ 5]
+ INVALID_LETTER, // [ 6]
+ INVALID_LETTER, // [ 7]
+ INVALID_LETTER, // [ 8]
+ INVALID_LETTER, // [ 9]
+ INVALID_LETTER, // [ 10]
+ INVALID_LETTER, // [ 11]
+ INVALID_LETTER, // [ 12]
+ INVALID_LETTER, // [ 13]
+ INVALID_LETTER, // [ 14]
+ INVALID_LETTER, // [ 15]
+ INVALID_LETTER, // [ 16]
+ INVALID_LETTER, // [ 17]
+ INVALID_LETTER, // [ 18]
+ INVALID_LETTER, // [ 19]
+ INVALID_LETTER, // [ 20]
+ INVALID_LETTER, // [ 21]
+ INVALID_LETTER, // [ 22]
+ INVALID_LETTER, // [ 23]
+ INVALID_LETTER, // [ 24]
+ INVALID_LETTER, // [ 25]
+ INVALID_LETTER, // [ 26]
+ INVALID_LETTER, // [ 27]
+ INVALID_LETTER, // [ 28]
+ INVALID_LETTER, // [ 29]
+ INVALID_LETTER, // [ 30]
+ INVALID_LETTER, // [ 31]
+ INVALID_LETTER, // [ 32]
+ INVALID_LETTER, // [ 33]
+ INVALID_LETTER, // [ 34]
+ INVALID_LETTER, // [ 35]
+ INVALID_LETTER, // [ 36]
+ INVALID_LETTER, // [ 37]
+ INVALID_LETTER, // [ 38]
+ INVALID_LETTER, // [ 39]
+ INVALID_LETTER, // [ 40]
+ INVALID_LETTER, // [ 41]
+ INVALID_LETTER, // [ 42]
+ INVALID_LETTER, // [ 43]
+ INVALID_LETTER, // [ 44]
+ INVALID_LETTER, // [ 45]
+ INVALID_LETTER, // [ 46]
+ INVALID_LETTER, // [ 47]
+ INVALID_LETTER, // [ 48]
+ INVALID_LETTER, // [ 49]
+ INVALID_LETTER, // [ 50]
+ INVALID_LETTER, // [ 51]
+ INVALID_LETTER, // [ 52]
+ INVALID_LETTER, // [ 53]
+ INVALID_LETTER, // [ 54]
+ INVALID_LETTER, // [ 55]
+ INVALID_LETTER, // [ 56]
+ INVALID_LETTER, // [ 57]
+ INVALID_LETTER, // [ 58]
+ INVALID_LETTER, // [ 59]
+ INVALID_LETTER, // [ 60]
+ INVALID_LETTER, // [ 61]
+ INVALID_LETTER, // [ 62]
+ INVALID_LETTER, // [ 63]
+ INVALID_LETTER, // [ 64]
+ 3, // [ 65] A -> T
+ INVALID_LETTER, // [ 66]
+ 2, // [ 67] C -> G
+ INVALID_LETTER, // [ 68]
+ INVALID_LETTER, // [ 69]
+ INVALID_LETTER, // [ 70]
+ 1, // [ 71] G -> C
+ INVALID_LETTER, // [ 72]
+ INVALID_LETTER, // [ 73]
+ INVALID_LETTER, // [ 74]
+ INVALID_LETTER, // [ 75]
+ INVALID_LETTER, // [ 76]
+ INVALID_LETTER, // [ 77]
+ INVALID_LETTER, // [ 78]
+ INVALID_LETTER, // [ 79]
+ INVALID_LETTER, // [ 80]
+ INVALID_LETTER, // [ 81]
+ INVALID_LETTER, // [ 82]
+ INVALID_LETTER, // [ 83]
+ 0, // [ 84] T -> A
+ 0, // [ 85] U -> A
+ INVALID_LETTER, // [ 86]
+ INVALID_LETTER, // [ 87]
+ INVALID_LETTER, // [ 88]
+ INVALID_LETTER, // [ 89]
+ INVALID_LETTER, // [ 90]
+ INVALID_LETTER, // [ 91]
+ INVALID_LETTER, // [ 92]
+ INVALID_LETTER, // [ 93]
+ INVALID_LETTER, // [ 94]
+ INVALID_LETTER, // [ 95]
+ INVALID_LETTER, // [ 96]
+ 3, // [ 97] a -> T
+ INVALID_LETTER, // [ 98]
+ 2, // [ 99] c -> G
+ INVALID_LETTER, // [100]
+ INVALID_LETTER, // [101]
+ INVALID_LETTER, // [102]
+ 1, // [103] g -> C
+ INVALID_LETTER, // [104]
+ INVALID_LETTER, // [105]
+ INVALID_LETTER, // [106]
+ INVALID_LETTER, // [107]
+ INVALID_LETTER, // [108]
+ INVALID_LETTER, // [109]
+ INVALID_LETTER, // [110]
+ INVALID_LETTER, // [111]
+ INVALID_LETTER, // [112]
+ INVALID_LETTER, // [113]
+ INVALID_LETTER, // [114]
+ INVALID_LETTER, // [115]
+ 0, // [116] t -> A
+ 0, // [117] u -> A
+ INVALID_LETTER, // [118]
+ INVALID_LETTER, // [119]
+ INVALID_LETTER, // [120]
+ INVALID_LETTER, // [121]
+ INVALID_LETTER, // [122]
+ INVALID_LETTER, // [123]
+ INVALID_LETTER, // [124]
+ INVALID_LETTER, // [125]
+ INVALID_LETTER, // [126]
+ INVALID_LETTER, // [127]
+ INVALID_LETTER, // [128]
+ INVALID_LETTER, // [129]
+ INVALID_LETTER, // [130]
+ INVALID_LETTER, // [131]
+ INVALID_LETTER, // [132]
+ INVALID_LETTER, // [133]
+ INVALID_LETTER, // [134]
+ INVALID_LETTER, // [135]
+ INVALID_LETTER, // [136]
+ INVALID_LETTER, // [137]
+ INVALID_LETTER, // [138]
+ INVALID_LETTER, // [139]
+ INVALID_LETTER, // [140]
+ INVALID_LETTER, // [141]
+ INVALID_LETTER, // [142]
+ INVALID_LETTER, // [143]
+ INVALID_LETTER, // [144]
+ INVALID_LETTER, // [145]
+ INVALID_LETTER, // [146]
+ INVALID_LETTER, // [147]
+ INVALID_LETTER, // [148]
+ INVALID_LETTER, // [149]
+ INVALID_LETTER, // [150]
+ INVALID_LETTER, // [151]
+ INVALID_LETTER, // [152]
+ INVALID_LETTER, // [153]
+ INVALID_LETTER, // [154]
+ INVALID_LETTER, // [155]
+ INVALID_LETTER, // [156]
+ INVALID_LETTER, // [157]
+ INVALID_LETTER, // [158]
+ INVALID_LETTER, // [159]
+ INVALID_LETTER, // [160]
+ INVALID_LETTER, // [161]
+ INVALID_LETTER, // [162]
+ INVALID_LETTER, // [163]
+ INVALID_LETTER, // [164]
+ INVALID_LETTER, // [165]
+ INVALID_LETTER, // [166]
+ INVALID_LETTER, // [167]
+ INVALID_LETTER, // [168]
+ INVALID_LETTER, // [169]
+ INVALID_LETTER, // [170]
+ INVALID_LETTER, // [171]
+ INVALID_LETTER, // [172]
+ INVALID_LETTER, // [173]
+ INVALID_LETTER, // [174]
+ INVALID_LETTER, // [175]
+ INVALID_LETTER, // [176]
+ INVALID_LETTER, // [177]
+ INVALID_LETTER, // [178]
+ INVALID_LETTER, // [179]
+ INVALID_LETTER, // [180]
+ INVALID_LETTER, // [181]
+ INVALID_LETTER, // [182]
+ INVALID_LETTER, // [183]
+ INVALID_LETTER, // [184]
+ INVALID_LETTER, // [185]
+ INVALID_LETTER, // [186]
+ INVALID_LETTER, // [187]
+ INVALID_LETTER, // [188]
+ INVALID_LETTER, // [189]
+ INVALID_LETTER, // [190]
+ INVALID_LETTER, // [191]
+ INVALID_LETTER, // [192]
+ INVALID_LETTER, // [193]
+ INVALID_LETTER, // [194]
+ INVALID_LETTER, // [195]
+ INVALID_LETTER, // [196]
+ INVALID_LETTER, // [197]
+ INVALID_LETTER, // [198]
+ INVALID_LETTER, // [199]
+ INVALID_LETTER, // [200]
+ INVALID_LETTER, // [201]
+ INVALID_LETTER, // [202]
+ INVALID_LETTER, // [203]
+ INVALID_LETTER, // [204]
+ INVALID_LETTER, // [205]
+ INVALID_LETTER, // [206]
+ INVALID_LETTER, // [207]
+ INVALID_LETTER, // [208]
+ INVALID_LETTER, // [209]
+ INVALID_LETTER, // [210]
+ INVALID_LETTER, // [211]
+ INVALID_LETTER, // [212]
+ INVALID_LETTER, // [213]
+ INVALID_LETTER, // [214]
+ INVALID_LETTER, // [215]
+ INVALID_LETTER, // [216]
+ INVALID_LETTER, // [217]
+ INVALID_LETTER, // [218]
+ INVALID_LETTER, // [219]
+ INVALID_LETTER, // [220]
+ INVALID_LETTER, // [221]
+ INVALID_LETTER, // [222]
+ INVALID_LETTER, // [223]
+ INVALID_LETTER, // [224]
+ INVALID_LETTER, // [225]
+ INVALID_LETTER, // [226]
+ INVALID_LETTER, // [227]
+ INVALID_LETTER, // [228]
+ INVALID_LETTER, // [229]
+ INVALID_LETTER, // [230]
+ INVALID_LETTER, // [231]
+ INVALID_LETTER, // [232]
+ INVALID_LETTER, // [233]
+ INVALID_LETTER, // [234]
+ INVALID_LETTER, // [235]
+ INVALID_LETTER, // [236]
+ INVALID_LETTER, // [237]
+ INVALID_LETTER, // [238]
+ INVALID_LETTER, // [239]
+ INVALID_LETTER, // [240]
+ INVALID_LETTER, // [241]
+ INVALID_LETTER, // [242]
+ INVALID_LETTER, // [243]
+ INVALID_LETTER, // [244]
+ INVALID_LETTER, // [245]
+ INVALID_LETTER, // [246]
+ INVALID_LETTER, // [247]
+ INVALID_LETTER, // [248]
+ INVALID_LETTER, // [249]
+ INVALID_LETTER, // [250]
+ INVALID_LETTER, // [251]
+ INVALID_LETTER, // [252]
+ INVALID_LETTER, // [253]
+ INVALID_LETTER, // [254]
+ INVALID_LETTER, // [255]
+};
+
+bool g_IsAminoChar[256] =
+ {
+ false, // [ 0] 0x00
+ false, // [ 1] 0x01
+ false, // [ 2] 0x02
+ false, // [ 3] 0x03
+ false, // [ 4] 0x04
+ false, // [ 5] 0x05
+ false, // [ 6] 0x06
+ false, // [ 7] 0x07
+ false, // [ 8] 0x08
+ false, // [ 9] 0x09
+ false, // [ 10] 0x0a
+ false, // [ 11] 0x0b
+ false, // [ 12] 0x0c
+ false, // [ 13] 0x0d
+ false, // [ 14] 0x0e
+ false, // [ 15] 0x0f
+ false, // [ 16] 0x10
+ false, // [ 17] 0x11
+ false, // [ 18] 0x12
+ false, // [ 19] 0x13
+ false, // [ 20] 0x14
+ false, // [ 21] 0x15
+ false, // [ 22] 0x16
+ false, // [ 23] 0x17
+ false, // [ 24] 0x18
+ false, // [ 25] 0x19
+ false, // [ 26] 0x1a
+ false, // [ 27] 0x1b
+ false, // [ 28] 0x1c
+ false, // [ 29] 0x1d
+ false, // [ 30] 0x1e
+ false, // [ 31] 0x1f
+ false, // [ 32] ' '
+ false, // [ 33] '!'
+ false, // [ 34] '"'
+ false, // [ 35] '#'
+ false, // [ 36] '$'
+ false, // [ 37] '%'
+ false, // [ 38] '&'
+ false, // [ 39] '''
+ false, // [ 40] '('
+ false, // [ 41] ')'
+ true, // [ 42] '*' = STP
+ false, // [ 43] '+'
+ false, // [ 44] ','
+ false, // [ 45] '-'
+ false, // [ 46] '.'
+ false, // [ 47] '/'
+ false, // [ 48] '0'
+ false, // [ 49] '1'
+ false, // [ 50] '2'
+ false, // [ 51] '3'
+ false, // [ 52] '4'
+ false, // [ 53] '5'
+ false, // [ 54] '6'
+ false, // [ 55] '7'
+ false, // [ 56] '8'
+ false, // [ 57] '9'
+ false, // [ 58] ':'
+ false, // [ 59] ';'
+ false, // [ 60] '<'
+ false, // [ 61] '='
+ false, // [ 62] '>'
+ false, // [ 63] '?'
+ false, // [ 64] '@'
+ true, // [ 65] 'A' = Ala
+ false, // [ 66] 'B'
+ true, // [ 67] 'C' = Cys
+ true, // [ 68] 'D' = Asp
+ true, // [ 69] 'E' = Glu
+ true, // [ 70] 'F' = Phe
+ true, // [ 71] 'G' = Gly
+ true, // [ 72] 'H' = His
+ true, // [ 73] 'I' = Ile
+ false, // [ 74] 'J'
+ true, // [ 75] 'K' = Lys
+ true, // [ 76] 'L' = Leu
+ true, // [ 77] 'M' = Met
+ true, // [ 78] 'N' = Asn
+ false, // [ 79] 'O'
+ true, // [ 80] 'P' = Pro
+ true, // [ 81] 'Q' = Gln
+ true, // [ 82] 'R' = Arg
+ true, // [ 83] 'S' = Ser
+ true, // [ 84] 'T' = Thr
+ false, // [ 85] 'U'
+ true, // [ 86] 'V' = Val
+ true, // [ 87] 'W' = Trp
+ false, // [ 88] 'X'
+ true, // [ 89] 'Y' = Tyr
+ false, // [ 90] 'Z'
+ false, // [ 91] '['
+ false, // [ 92] '\'
+ false, // [ 93] ']'
+ false, // [ 94] '^'
+ false, // [ 95] '_'
+ false, // [ 96] '`'
+ true, // [ 97] 'A' = Ala
+ false, // [ 98] 'B'
+ true, // [ 99] 'C' = Cys
+ true, // [100] 'D' = Asp
+ true, // [101] 'E' = Glu
+ true, // [102] 'F' = Phe
+ true, // [103] 'G' = Gly
+ true, // [104] 'H' = His
+ true, // [105] 'I' = Ile
+ false, // [106] 'J'
+ true, // [107] 'K' = Lys
+ true, // [108] 'L' = Leu
+ true, // [109] 'M' = Met
+ true, // [110] 'N' = Asn
+ false, // [111] 'O'
+ true, // [112] 'P' = Pro
+ true, // [113] 'Q' = Gln
+ true, // [114] 'R' = Arg
+ true, // [115] 'S' = Ser
+ true, // [116] 'T' = Thr
+ false, // [117] 'U'
+ true, // [118] 'V' = Val
+ true, // [119] 'W' = Trp
+ false, // [120] 'X'
+ true, // [121] 'Y' = Tyr
+ false, // [122] 'Z'
+ false, // [123] '{'
+ false, // [124] '|'
+ false, // [125] '}'
+ false, // [126] '~'
+ false, // [127] 0x7f
+ false, // [128] 0x80
+ false, // [129] 0x81
+ false, // [130] 0x82
+ false, // [131] 0x83
+ false, // [132] 0x84
+ false, // [133] 0x85
+ false, // [134] 0x86
+ false, // [135] 0x87
+ false, // [136] 0x88
+ false, // [137] 0x89
+ false, // [138] 0x8a
+ false, // [139] 0x8b
+ false, // [140] 0x8c
+ false, // [141] 0x8d
+ false, // [142] 0x8e
+ false, // [143] 0x8f
+ false, // [144] 0x90
+ false, // [145] 0x91
+ false, // [146] 0x92
+ false, // [147] 0x93
+ false, // [148] 0x94
+ false, // [149] 0x95
+ false, // [150] 0x96
+ false, // [151] 0x97
+ false, // [152] 0x98
+ false, // [153] 0x99
+ false, // [154] 0x9a
+ false, // [155] 0x9b
+ false, // [156] 0x9c
+ false, // [157] 0x9d
+ false, // [158] 0x9e
+ false, // [159] 0x9f
+ false, // [160] 0xa0
+ false, // [161] 0xa1
+ false, // [162] 0xa2
+ false, // [163] 0xa3
+ false, // [164] 0xa4
+ false, // [165] 0xa5
+ false, // [166] 0xa6
+ false, // [167] 0xa7
+ false, // [168] 0xa8
+ false, // [169] 0xa9
+ false, // [170] 0xaa
+ false, // [171] 0xab
+ false, // [172] 0xac
+ false, // [173] 0xad
+ false, // [174] 0xae
+ false, // [175] 0xaf
+ false, // [176] 0xb0
+ false, // [177] 0xb1
+ false, // [178] 0xb2
+ false, // [179] 0xb3
+ false, // [180] 0xb4
+ false, // [181] 0xb5
+ false, // [182] 0xb6
+ false, // [183] 0xb7
+ false, // [184] 0xb8
+ false, // [185] 0xb9
+ false, // [186] 0xba
+ false, // [187] 0xbb
+ false, // [188] 0xbc
+ false, // [189] 0xbd
+ false, // [190] 0xbe
+ false, // [191] 0xbf
+ false, // [192] 0xc0
+ false, // [193] 0xc1
+ false, // [194] 0xc2
+ false, // [195] 0xc3
+ false, // [196] 0xc4
+ false, // [197] 0xc5
+ false, // [198] 0xc6
+ false, // [199] 0xc7
+ false, // [200] 0xc8
+ false, // [201] 0xc9
+ false, // [202] 0xca
+ false, // [203] 0xcb
+ false, // [204] 0xcc
+ false, // [205] 0xcd
+ false, // [206] 0xce
+ false, // [207] 0xcf
+ false, // [208] 0xd0
+ false, // [209] 0xd1
+ false, // [210] 0xd2
+ false, // [211] 0xd3
+ false, // [212] 0xd4
+ false, // [213] 0xd5
+ false, // [214] 0xd6
+ false, // [215] 0xd7
+ false, // [216] 0xd8
+ false, // [217] 0xd9
+ false, // [218] 0xda
+ false, // [219] 0xdb
+ false, // [220] 0xdc
+ false, // [221] 0xdd
+ false, // [222] 0xde
+ false, // [223] 0xdf
+ false, // [224] 0xe0
+ false, // [225] 0xe1
+ false, // [226] 0xe2
+ false, // [227] 0xe3
+ false, // [228] 0xe4
+ false, // [229] 0xe5
+ false, // [230] 0xe6
+ false, // [231] 0xe7
+ false, // [232] 0xe8
+ false, // [233] 0xe9
+ false, // [234] 0xea
+ false, // [235] 0xeb
+ false, // [236] 0xec
+ false, // [237] 0xed
+ false, // [238] 0xee
+ false, // [239] 0xef
+ false, // [240] 0xf0
+ false, // [241] 0xf1
+ false, // [242] 0xf2
+ false, // [243] 0xf3
+ false, // [244] 0xf4
+ false, // [245] 0xf5
+ false, // [246] 0xf6
+ false, // [247] 0xf7
+ false, // [248] 0xf8
+ false, // [249] 0xf9
+ false, // [250] 0xfa
+ false, // [251] 0xfb
+ false, // [252] 0xfc
+ false, // [253] 0xfd
+ false, // [254] 0xfe
+ false, // [255] 0xff
+ };
+
+bool g_IsNucleoChar[256] =
+ {
+ false, // [ 0] 0x00
+ false, // [ 1] 0x01
+ false, // [ 2] 0x02
+ false, // [ 3] 0x03
+ false, // [ 4] 0x04
+ false, // [ 5] 0x05
+ false, // [ 6] 0x06
+ false, // [ 7] 0x07
+ false, // [ 8] 0x08
+ false, // [ 9] 0x09
+ false, // [ 10] 0x0a
+ false, // [ 11] 0x0b
+ false, // [ 12] 0x0c
+ false, // [ 13] 0x0d
+ false, // [ 14] 0x0e
+ false, // [ 15] 0x0f
+ false, // [ 16] 0x10
+ false, // [ 17] 0x11
+ false, // [ 18] 0x12
+ false, // [ 19] 0x13
+ false, // [ 20] 0x14
+ false, // [ 21] 0x15
+ false, // [ 22] 0x16
+ false, // [ 23] 0x17
+ false, // [ 24] 0x18
+ false, // [ 25] 0x19
+ false, // [ 26] 0x1a
+ false, // [ 27] 0x1b
+ false, // [ 28] 0x1c
+ false, // [ 29] 0x1d
+ false, // [ 30] 0x1e
+ false, // [ 31] 0x1f
+ false, // [ 32] ' '
+ false, // [ 33] '!'
+ false, // [ 34] '"'
+ false, // [ 35] '#'
+ false, // [ 36] '$'
+ false, // [ 37] '%'
+ false, // [ 38] '&'
+ false, // [ 39] '''
+ false, // [ 40] '('
+ false, // [ 41] ')'
+ false, // [ 42] '*'
+ false, // [ 43] '+'
+ false, // [ 44] ','
+ false, // [ 45] '-'
+ false, // [ 46] '.'
+ false, // [ 47] '/'
+ false, // [ 48] '0'
+ false, // [ 49] '1'
+ false, // [ 50] '2'
+ false, // [ 51] '3'
+ false, // [ 52] '4'
+ false, // [ 53] '5'
+ false, // [ 54] '6'
+ false, // [ 55] '7'
+ false, // [ 56] '8'
+ false, // [ 57] '9'
+ false, // [ 58] ':'
+ false, // [ 59] ';'
+ false, // [ 60] '<'
+ false, // [ 61] '='
+ false, // [ 62] '>'
+ false, // [ 63] '?'
+ false, // [ 64] '@'
+ true, // [ 65] 'A' (Nucleotide)
+ false, // [ 66] 'B'
+ true, // [ 67] 'C' (Nucleotide)
+ false, // [ 68] 'D'
+ false, // [ 69] 'E'
+ false, // [ 70] 'F'
+ true, // [ 71] 'G' (Nucleotide)
+ false, // [ 72] 'H'
+ false, // [ 73] 'I'
+ false, // [ 74] 'J'
+ false, // [ 75] 'K'
+ false, // [ 76] 'L'
+ false, // [ 77] 'M'
+ true, // [ 78] 'N' (Nucleotide)
+ false, // [ 79] 'O'
+ false, // [ 80] 'P'
+ false, // [ 81] 'Q'
+ false, // [ 82] 'R'
+ false, // [ 83] 'S'
+ true, // [ 84] 'T' (Nucleotide)
+ true, // [ 85] 'U' (Nucleotide)
+ false, // [ 86] 'V'
+ false, // [ 87] 'W'
+ false, // [ 88] 'X'
+ false, // [ 89] 'Y'
+ false, // [ 90] 'Z'
+ false, // [ 91] '['
+ false, // [ 92] '\'
+ false, // [ 93] ']'
+ false, // [ 94] '^'
+ false, // [ 95] '_'
+ false, // [ 96] '`'
+ true, // [ 97] 'A' (Nucleotide)
+ false, // [ 98] 'B'
+ true, // [ 99] 'C' (Nucleotide)
+ false, // [100] 'D'
+ false, // [101] 'E'
+ false, // [102] 'F'
+ true, // [103] 'G' (Nucleotide)
+ false, // [104] 'H'
+ false, // [105] 'I'
+ false, // [106] 'J'
+ false, // [107] 'K'
+ false, // [108] 'L'
+ false, // [109] 'M'
+ true, // [110] 'N' (Nucleotide)
+ false, // [111] 'O'
+ false, // [112] 'P'
+ false, // [113] 'Q'
+ false, // [114] 'R'
+ false, // [115] 'S'
+ true, // [116] 'T' (Nucleotide)
+ true, // [117] 'U' (Nucleotide)
+ false, // [118] 'V'
+ false, // [119] 'W'
+ false, // [120] 'X'
+ false, // [121] 'Y'
+ false, // [122] 'Z'
+ false, // [123] '{'
+ false, // [124] '|'
+ false, // [125] '}'
+ false, // [126] '~'
+ false, // [127] 0x7f
+ false, // [128] 0x80
+ false, // [129] 0x81
+ false, // [130] 0x82
+ false, // [131] 0x83
+ false, // [132] 0x84
+ false, // [133] 0x85
+ false, // [134] 0x86
+ false, // [135] 0x87
+ false, // [136] 0x88
+ false, // [137] 0x89
+ false, // [138] 0x8a
+ false, // [139] 0x8b
+ false, // [140] 0x8c
+ false, // [141] 0x8d
+ false, // [142] 0x8e
+ false, // [143] 0x8f
+ false, // [144] 0x90
+ false, // [145] 0x91
+ false, // [146] 0x92
+ false, // [147] 0x93
+ false, // [148] 0x94
+ false, // [149] 0x95
+ false, // [150] 0x96
+ false, // [151] 0x97
+ false, // [152] 0x98
+ false, // [153] 0x99
+ false, // [154] 0x9a
+ false, // [155] 0x9b
+ false, // [156] 0x9c
+ false, // [157] 0x9d
+ false, // [158] 0x9e
+ false, // [159] 0x9f
+ false, // [160] 0xa0
+ false, // [161] 0xa1
+ false, // [162] 0xa2
+ false, // [163] 0xa3
+ false, // [164] 0xa4
+ false, // [165] 0xa5
+ false, // [166] 0xa6
+ false, // [167] 0xa7
+ false, // [168] 0xa8
+ false, // [169] 0xa9
+ false, // [170] 0xaa
+ false, // [171] 0xab
+ false, // [172] 0xac
+ false, // [173] 0xad
+ false, // [174] 0xae
+ false, // [175] 0xaf
+ false, // [176] 0xb0
+ false, // [177] 0xb1
+ false, // [178] 0xb2
+ false, // [179] 0xb3
+ false, // [180] 0xb4
+ false, // [181] 0xb5
+ false, // [182] 0xb6
+ false, // [183] 0xb7
+ false, // [184] 0xb8
+ false, // [185] 0xb9
+ false, // [186] 0xba
+ false, // [187] 0xbb
+ false, // [188] 0xbc
+ false, // [189] 0xbd
+ false, // [190] 0xbe
+ false, // [191] 0xbf
+ false, // [192] 0xc0
+ false, // [193] 0xc1
+ false, // [194] 0xc2
+ false, // [195] 0xc3
+ false, // [196] 0xc4
+ false, // [197] 0xc5
+ false, // [198] 0xc6
+ false, // [199] 0xc7
+ false, // [200] 0xc8
+ false, // [201] 0xc9
+ false, // [202] 0xca
+ false, // [203] 0xcb
+ false, // [204] 0xcc
+ false, // [205] 0xcd
+ false, // [206] 0xce
+ false, // [207] 0xcf
+ false, // [208] 0xd0
+ false, // [209] 0xd1
+ false, // [210] 0xd2
+ false, // [211] 0xd3
+ false, // [212] 0xd4
+ false, // [213] 0xd5
+ false, // [214] 0xd6
+ false, // [215] 0xd7
+ false, // [216] 0xd8
+ false, // [217] 0xd9
+ false, // [218] 0xda
+ false, // [219] 0xdb
+ false, // [220] 0xdc
+ false, // [221] 0xdd
+ false, // [222] 0xde
+ false, // [223] 0xdf
+ false, // [224] 0xe0
+ false, // [225] 0xe1
+ false, // [226] 0xe2
+ false, // [227] 0xe3
+ false, // [228] 0xe4
+ false, // [229] 0xe5
+ false, // [230] 0xe6
+ false, // [231] 0xe7
+ false, // [232] 0xe8
+ false, // [233] 0xe9
+ false, // [234] 0xea
+ false, // [235] 0xeb
+ false, // [236] 0xec
+ false, // [237] 0xed
+ false, // [238] 0xee
+ false, // [239] 0xef
+ false, // [240] 0xf0
+ false, // [241] 0xf1
+ false, // [242] 0xf2
+ false, // [243] 0xf3
+ false, // [244] 0xf4
+ false, // [245] 0xf5
+ false, // [246] 0xf6
+ false, // [247] 0xf7
+ false, // [248] 0xf8
+ false, // [249] 0xf9
+ false, // [250] 0xfa
+ false, // [251] 0xfb
+ false, // [252] 0xfc
+ false, // [253] 0xfd
+ false, // [254] 0xfe
+ false, // [255] 0xff
+ };
+
+bool g_IsACGTU[256] =
+ {
+ false, // [ 0] 0x00
+ false, // [ 1] 0x01
+ false, // [ 2] 0x02
+ false, // [ 3] 0x03
+ false, // [ 4] 0x04
+ false, // [ 5] 0x05
+ false, // [ 6] 0x06
+ false, // [ 7] 0x07
+ false, // [ 8] 0x08
+ false, // [ 9] 0x09
+ false, // [ 10] 0x0a
+ false, // [ 11] 0x0b
+ false, // [ 12] 0x0c
+ false, // [ 13] 0x0d
+ false, // [ 14] 0x0e
+ false, // [ 15] 0x0f
+ false, // [ 16] 0x10
+ false, // [ 17] 0x11
+ false, // [ 18] 0x12
+ false, // [ 19] 0x13
+ false, // [ 20] 0x14
+ false, // [ 21] 0x15
+ false, // [ 22] 0x16
+ false, // [ 23] 0x17
+ false, // [ 24] 0x18
+ false, // [ 25] 0x19
+ false, // [ 26] 0x1a
+ false, // [ 27] 0x1b
+ false, // [ 28] 0x1c
+ false, // [ 29] 0x1d
+ false, // [ 30] 0x1e
+ false, // [ 31] 0x1f
+ false, // [ 32] ' '
+ false, // [ 33] '!'
+ false, // [ 34] '"'
+ false, // [ 35] '#'
+ false, // [ 36] '$'
+ false, // [ 37] '%'
+ false, // [ 38] '&'
+ false, // [ 39] '''
+ false, // [ 40] '('
+ false, // [ 41] ')'
+ false, // [ 42] '*'
+ false, // [ 43] '+'
+ false, // [ 44] ','
+ false, // [ 45] '-'
+ false, // [ 46] '.'
+ false, // [ 47] '/'
+ false, // [ 48] '0'
+ false, // [ 49] '1'
+ false, // [ 50] '2'
+ false, // [ 51] '3'
+ false, // [ 52] '4'
+ false, // [ 53] '5'
+ false, // [ 54] '6'
+ false, // [ 55] '7'
+ false, // [ 56] '8'
+ false, // [ 57] '9'
+ false, // [ 58] ':'
+ false, // [ 59] ';'
+ false, // [ 60] '<'
+ false, // [ 61] '='
+ false, // [ 62] '>'
+ false, // [ 63] '?'
+ false, // [ 64] '@'
+ true, // [ 65] 'A' (ACGT)
+ false, // [ 66] 'B'
+ true, // [ 67] 'C' (ACGT)
+ false, // [ 68] 'D'
+ false, // [ 69] 'E'
+ false, // [ 70] 'F'
+ true, // [ 71] 'G' (ACGT)
+ false, // [ 72] 'H'
+ false, // [ 73] 'I'
+ false, // [ 74] 'J'
+ false, // [ 75] 'K'
+ false, // [ 76] 'L'
+ false, // [ 77] 'M'
+ false, // [ 78] 'N'
+ false, // [ 79] 'O'
+ false, // [ 80] 'P'
+ false, // [ 81] 'Q'
+ false, // [ 82] 'R'
+ false, // [ 83] 'S'
+ true, // [ 84] 'T' (ACGT)
+ true, // [ 85] 'U' (ACGT)
+ false, // [ 86] 'V'
+ false, // [ 87] 'W'
+ false, // [ 88] 'X'
+ false, // [ 89] 'Y'
+ false, // [ 90] 'Z'
+ false, // [ 91] '['
+ false, // [ 92] '\'
+ false, // [ 93] ']'
+ false, // [ 94] '^'
+ false, // [ 95] '_'
+ false, // [ 96] '`'
+ true, // [ 97] 'A' (ACGT)
+ false, // [ 98] 'B'
+ true, // [ 99] 'C' (ACGT)
+ false, // [100] 'D'
+ false, // [101] 'E'
+ false, // [102] 'F'
+ true, // [103] 'G' (ACGT)
+ false, // [104] 'H'
+ false, // [105] 'I'
+ false, // [106] 'J'
+ false, // [107] 'K'
+ false, // [108] 'L'
+ false, // [109] 'M'
+ false, // [110] 'N'
+ false, // [111] 'O'
+ false, // [112] 'P'
+ false, // [113] 'Q'
+ false, // [114] 'R'
+ false, // [115] 'S'
+ true, // [116] 'T' (ACGT)
+ true, // [117] 'U' (ACGT)
+ false, // [118] 'V'
+ false, // [119] 'W'
+ false, // [120] 'X'
+ false, // [121] 'Y'
+ false, // [122] 'Z'
+ false, // [123] '{'
+ false, // [124] '|'
+ false, // [125] '}'
+ false, // [126] '~'
+ false, // [127] 0x7f
+ false, // [128] 0x80
+ false, // [129] 0x81
+ false, // [130] 0x82
+ false, // [131] 0x83
+ false, // [132] 0x84
+ false, // [133] 0x85
+ false, // [134] 0x86
+ false, // [135] 0x87
+ false, // [136] 0x88
+ false, // [137] 0x89
+ false, // [138] 0x8a
+ false, // [139] 0x8b
+ false, // [140] 0x8c
+ false, // [141] 0x8d
+ false, // [142] 0x8e
+ false, // [143] 0x8f
+ false, // [144] 0x90
+ false, // [145] 0x91
+ false, // [146] 0x92
+ false, // [147] 0x93
+ false, // [148] 0x94
+ false, // [149] 0x95
+ false, // [150] 0x96
+ false, // [151] 0x97
+ false, // [152] 0x98
+ false, // [153] 0x99
+ false, // [154] 0x9a
+ false, // [155] 0x9b
+ false, // [156] 0x9c
+ false, // [157] 0x9d
+ false, // [158] 0x9e
+ false, // [159] 0x9f
+ false, // [160] 0xa0
+ false, // [161] 0xa1
+ false, // [162] 0xa2
+ false, // [163] 0xa3
+ false, // [164] 0xa4
+ false, // [165] 0xa5
+ false, // [166] 0xa6
+ false, // [167] 0xa7
+ false, // [168] 0xa8
+ false, // [169] 0xa9
+ false, // [170] 0xaa
+ false, // [171] 0xab
+ false, // [172] 0xac
+ false, // [173] 0xad
+ false, // [174] 0xae
+ false, // [175] 0xaf
+ false, // [176] 0xb0
+ false, // [177] 0xb1
+ false, // [178] 0xb2
+ false, // [179] 0xb3
+ false, // [180] 0xb4
+ false, // [181] 0xb5
+ false, // [182] 0xb6
+ false, // [183] 0xb7
+ false, // [184] 0xb8
+ false, // [185] 0xb9
+ false, // [186] 0xba
+ false, // [187] 0xbb
+ false, // [188] 0xbc
+ false, // [189] 0xbd
+ false, // [190] 0xbe
+ false, // [191] 0xbf
+ false, // [192] 0xc0
+ false, // [193] 0xc1
+ false, // [194] 0xc2
+ false, // [195] 0xc3
+ false, // [196] 0xc4
+ false, // [197] 0xc5
+ false, // [198] 0xc6
+ false, // [199] 0xc7
+ false, // [200] 0xc8
+ false, // [201] 0xc9
+ false, // [202] 0xca
+ false, // [203] 0xcb
+ false, // [204] 0xcc
+ false, // [205] 0xcd
+ false, // [206] 0xce
+ false, // [207] 0xcf
+ false, // [208] 0xd0
+ false, // [209] 0xd1
+ false, // [210] 0xd2
+ false, // [211] 0xd3
+ false, // [212] 0xd4
+ false, // [213] 0xd5
+ false, // [214] 0xd6
+ false, // [215] 0xd7
+ false, // [216] 0xd8
+ false, // [217] 0xd9
+ false, // [218] 0xda
+ false, // [219] 0xdb
+ false, // [220] 0xdc
+ false, // [221] 0xdd
+ false, // [222] 0xde
+ false, // [223] 0xdf
+ false, // [224] 0xe0
+ false, // [225] 0xe1
+ false, // [226] 0xe2
+ false, // [227] 0xe3
+ false, // [228] 0xe4
+ false, // [229] 0xe5
+ false, // [230] 0xe6
+ false, // [231] 0xe7
+ false, // [232] 0xe8
+ false, // [233] 0xe9
+ false, // [234] 0xea
+ false, // [235] 0xeb
+ false, // [236] 0xec
+ false, // [237] 0xed
+ false, // [238] 0xee
+ false, // [239] 0xef
+ false, // [240] 0xf0
+ false, // [241] 0xf1
+ false, // [242] 0xf2
+ false, // [243] 0xf3
+ false, // [244] 0xf4
+ false, // [245] 0xf5
+ false, // [246] 0xf6
+ false, // [247] 0xf7
+ false, // [248] 0xf8
+ false, // [249] 0xf9
+ false, // [250] 0xfa
+ false, // [251] 0xfb
+ false, // [252] 0xfc
+ false, // [253] 0xfd
+ false, // [254] 0xfe
+ false, // [255] 0xff
+ };
+
+float g_AminoFreqs[20] =
+ {
+ 0.0777f, // 'A' = Ala
+ 0.0161f, // 'C' = Cys
+ 0.0527f, // 'D' = Asp
+ 0.0631f, // 'E' = Glu
+ 0.0417f, // 'F' = Phe
+ 0.0718f, // 'G' = Gly
+ 0.0238f, // 'H' = His
+ 0.0606f, // 'I' = Ile
+ 0.0601f, // 'K' = Lys
+ 0.0906f, // 'L' = Leu
+ 0.0233f, // 'M' = Met
+ 0.0439f, // 'N' = Asn
+ 0.0456f, // 'P' = Pro
+ 0.0368f, // 'Q' = Gln
+ 0.0526f, // 'R' = Arg
+ 0.0639f, // 'S' = Ser
+ 0.0570f, // 'T' = Thr
+ 0.0712f, // 'V' = Val
+ 0.0134f, // 'W' = Trp
+ 0.0339f, // 'Y' = Tyr
+ };
--- /dev/null
+#ifndef alpha_h\r
+#define alpha_h\r
+\r
+#include <limits.h>\r
+#include <string>\r
+\r
+using namespace std;\r
+\r
+const unsigned INVALID_LETTER = 0;\r
+const unsigned char INVALID_CHAR = '?';\r
+\r
+extern unsigned g_CharToLetterAmino[];\r
+extern unsigned g_CharToLetterAminoStop[];\r
+extern unsigned char g_LetterToCharAmino[];\r
+extern unsigned g_CharToLetterNucleo[];\r
+extern unsigned char g_LetterToCharNucleo[];\r
+extern unsigned g_CodonWordToAminoLetter[];\r
+extern char g_CodonWordToAminoChar[];\r
+extern unsigned char g_CharToCompChar[];\r
+extern unsigned g_CharToCompLetter[];\r
+extern bool g_IsAminoChar[];\r
+extern bool g_IsNucleoChar[];\r
+extern bool g_IsACGTU[];\r
+extern float g_AminoFreqs[];\r
+\r
+extern unsigned g_CharToLetterRed[];\r
+extern unsigned char g_LetterToCharRed[];\r
+extern unsigned g_RedAlphaSize;\r
+\r
+void LogRedAlphaRed();\r
+void ReadRedAlphaFromFile(const string &FileName);\r
+unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2,\r
+ unsigned char c3);\r
+\r
+static inline bool AminoLetterIsStartCodon(unsigned char Letter)\r
+ {\r
+ return Letter == 10;\r
+ }\r
+\r
+static inline bool AminoLetterIsStopCodon(unsigned char Letter)\r
+ {\r
+ return Letter == 20;\r
+ }\r
+\r
+const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo);\r
+const char *WordToStrNucleo(unsigned Word, unsigned WordLength);\r
+const char *WordToStrAmino(unsigned Word, unsigned WordLength);\r
+const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str);\r
+\r
+#endif // alpha_h\r
--- /dev/null
+#include "myutils.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+\r
+bool isgap(byte c)\r
+ {\r
+ return c == '-' || c == '.';\r
+ }\r
+\r
+const char *WordToStrAmino(unsigned Word, unsigned WordLength)\r
+ {\r
+ static char Str[32];\r
+ for (unsigned i = 0; i < WordLength; ++i)\r
+ {\r
+ unsigned Letter = Word%20;\r
+ Str[WordLength-i-1] = g_LetterToCharAmino[Letter];\r
+ Word /= 20;\r
+ }\r
+ Str[WordLength] = 0;\r
+ return Str;\r
+ }\r
+\r
+const char *WordToStrAmino2(unsigned Word, unsigned WordLength, char *Str)\r
+ {\r
+ for (unsigned i = 0; i < WordLength; ++i)\r
+ {\r
+ unsigned Letter = Word%20;\r
+ Str[WordLength-i-1] = g_LetterToCharAmino[Letter];\r
+ Word /= 20;\r
+ }\r
+ Str[WordLength] = 0;\r
+ return Str;\r
+ }\r
+\r
+const char *WordToStrNucleo(unsigned Word, unsigned WordLength)\r
+ {\r
+ static char Str[32];\r
+ for (unsigned i = 0; i < WordLength; ++i)\r
+ {\r
+ unsigned Letter = Word%4;\r
+ Str[WordLength-i-1] = g_LetterToCharNucleo[Letter];\r
+ Word /= 4;\r
+ }\r
+ Str[WordLength] = 0;\r
+ return Str;\r
+ }\r
+\r
+const char *WordToStr(unsigned Word, unsigned WordLength, bool Nucleo)\r
+ {\r
+ return (Nucleo ? WordToStrNucleo : WordToStrAmino)(Word, WordLength);\r
+ }\r
+\r
+byte *RevCompAlloc(const byte *Seq, unsigned L)\r
+ {\r
+ byte *RCSeq = MYALLOC(byte, L, Alpha);\r
+\r
+ for (unsigned i = 0; i < L; ++i)\r
+ RCSeq[L-i-1] = g_CharToCompChar[Seq[i]];\r
+\r
+ return RCSeq;\r
+ }\r
+\r
+void RevCompInPlace(byte *Seq, unsigned L)\r
+ {\r
+ unsigned L1 = L - 1;\r
+ unsigned L2 = L/2;\r
+ for (unsigned i = 0; i < L2; ++i)\r
+ {\r
+ unsigned j = L1 - i;\r
+ unsigned ci = Seq[i];\r
+ unsigned cj = Seq[j];\r
+\r
+ unsigned ri = g_CharToCompChar[ci];\r
+ unsigned rj = g_CharToCompChar[cj];\r
+\r
+ Seq[i] = rj;\r
+ Seq[j] = ri;\r
+ }\r
+\r
+ if (L%2 == 1)\r
+ Seq[L2] = g_CharToCompChar[Seq[L2]];\r
+ }\r
+\r
+void RevComp(const byte *Seq, unsigned L, byte *RCSeq)\r
+ {\r
+ for (unsigned i = 0; i < L; ++i)\r
+ RCSeq[L-i-1] = g_CharToCompChar[Seq[i]];\r
+ }\r
+\r
+unsigned char GetAminoCharFrom3NucChars(unsigned char c1, unsigned char c2,\r
+ unsigned char c3)\r
+ {\r
+ unsigned Letter1 = g_CharToLetterNucleo[c1];\r
+ unsigned Letter2 = g_CharToLetterNucleo[c2];\r
+ unsigned Letter3 = g_CharToLetterNucleo[c3];\r
+ unsigned Word = Letter1*(4*4) + Letter2*4 + Letter3;\r
+\r
+ unsigned Letter = g_CodonWordToAminoLetter[Word];\r
+ return g_LetterToCharAmino[Letter];\r
+ }\r
// wordsize used in megablast. I'm sure we're sacrificing accuracy for speed, but anyother way would take way too
// long. With this setting, it seems comparable in speed to the suffix tree approach.
- string blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
- blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+ string blastCommand;
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+
+ blastCommand = path + "blast/bin/blastall -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
+ blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+ #else
+ blastCommand = "\"" + path + "blast\\bin\\blastall\" -p blastn -d " + dbFileName + " -m 8 -W 28 -v " + toString(n) + " -b " + toString(n);;
+ blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
+ #endif
system(blastCommand.c_str());
ifstream m8FileHandle;
blastCommand = path + "blast/bin/megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
#else
- blastCommand = path + "blast\\bin\\megablast -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
+ blastCommand = "\"" + path + "blast\\bin\\megablast\" -e 1e-10 -d " + dbFileName + " -m 8 -b " + toString(n) + " -v " + toString(n); //-W 28 -p blastn
blastCommand += (" -i " + (queryFileName+seq->getName()) + " -o " + blastFileName+seq->getName());
#endif
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
formatdbCommand = path + "blast/bin/formatdb -p F -o T -i " + dbFileName; // format the database, -o option gives us the ability
#else
- formatdbCommand = path + "blast\\bin\\formatdb -p F -o T -i " + dbFileName;
+ formatdbCommand = "\"" + path + "blast\\bin\\formatdb\" -p F -o T -i " + dbFileName;
#endif
system(formatdbCommand.c_str()); // to get the right sequence names, i think. -p F
// option tells formatdb that seqs are DNA, not prot
--- /dev/null
+#ifndef chainer_h\r
+#define chainer_h\r
+\r
+#include "hsp.h"\r
+#include "seq.h"\r
+#include <list>\r
+\r
+const float BAD_SCORE = -9e9f;\r
+\r
+struct TargetHit\r
+ {\r
+ unsigned TargetIndex;\r
+ unsigned TargetLo;\r
+ unsigned TargetHi;\r
+ int QueryFrame;\r
+ float RawScore; // SOMETIMES USED FOR BIT SCORE!!!\r
+// unsigned TargetLength;\r
+\r
+ void LogMe() const\r
+ {\r
+ Log("lo %u, hi %u, frame %d, score %.1f\n",\r
+ TargetLo, TargetHi, QueryFrame, RawScore);\r
+ }\r
+ };\r
+\r
+struct ChainData\r
+ {\r
+ unsigned LastHSPIndex;\r
+ unsigned Ahi;\r
+ unsigned Bhi;\r
+ float Score;\r
+ };\r
+\r
+class Chainer\r
+ {\r
+public:\r
+ HSPData **m_HSPs; // memory owned elsewhere\r
+ unsigned m_HSPCount;\r
+ unsigned m_MaxHSPCount;\r
+\r
+ BPData *m_BPs;\r
+\r
+ unsigned *m_PrevHSPIndexes; // Predecessor in chain\r
+ float *m_HSPIndexToChainScore;\r
+\r
+ list<unsigned> m_Chains; // Live HSP indexes\r
+\r
+public:\r
+ Chainer();\r
+ ~Chainer();\r
+ void Reset();\r
+ void Clear(bool ctor = false);\r
+ float Chain(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain,\r
+ unsigned &OptChainLength);\r
+ bool ResolveOverlaps(const SeqData &SA, const SeqData &SB, double MinScore,\r
+ const float * const *SubstMx, HSPData **InHSPs, unsigned InHSPCount,\r
+ HSPData **OutHSPs, unsigned &OutHSPCount);\r
+ void ResolveOverlap(HSPData &HSP1, HSPData &HSP2);\r
+\r
+ float ChainBrute(HSPData **HSPs, unsigned HSPCount, HSPData **OptChain,\r
+ unsigned &OptChainLength);\r
+ void LogMe() const;\r
+ void LogHSPs(HSPData **HSPs, unsigned HSPCount) const;\r
+ void LogBPs() const;\r
+\r
+ static bool IsValidChain(HSPData **HSPs, unsigned HSPCount);\r
+ static void AssertValidChain(HSPData **HSPs, unsigned HSPCount);\r
+ static void LogChain(HSPData **HSPs, unsigned HSPCount);\r
+ static void LogChain2(HSPData **HSPs, unsigned HSPCount);\r
+ static float GetChainScore(HSPData **HSPs, unsigned HSPCount);\r
+\r
+private:\r
+ void AllocHSPCount(unsigned MaxHSPCount);\r
+ void SetBPs();\r
+ void SortBPs();\r
+ unsigned FindBestChainLT(unsigned Ahi, unsigned Bhi);\r
+ };\r
+\r
+#endif // chainer_h\r
--- /dev/null
+#ifndef chime_h\r
+#define chime_h\r
+\r
+#include "seq.h"\r
+\r
+struct ChimeHit2\r
+ {\r
+ string QLabel;\r
+ string ALabel;\r
+ string BLabel;\r
+ string Q3;\r
+ string A3;\r
+ string B3;\r
+\r
+ //unsigned LY, LN, LA, LD;\r
+ //unsigned RY, RN, RA, RD;\r
+ double PctIdQT, PctIdQA, PctIdQB, PctIdQM, PctIdAB;\r
+\r
+ unsigned ColLo;\r
+ unsigned ColXLo;\r
+ unsigned ColXHi;\r
+ unsigned ColHi;\r
+ unsigned QXLo;\r
+ unsigned QXHi;\r
+\r
+ double Div;\r
+ double Score;\r
+ double H;\r
+\r
+ unsigned CS_LY, CS_LN, CS_LA, CS_RY, CS_RN, CS_RA;\r
+\r
+ float AbQ;\r
+ float AbA;\r
+ float AbB;\r
+\r
+ ChimeHit2()\r
+ {\r
+ Clear();\r
+ }\r
+\r
+ void Clear()\r
+ {\r
+ Q3.clear();\r
+ A3.clear();\r
+ B3.clear();\r
+ QLabel.clear();\r
+ ALabel.clear();\r
+ BLabel.clear();\r
+\r
+ //LY = LN = LA = LD = UINT_MAX;\r
+ //RY = RN = RA = RD = UINT_MAX;\r
+ ColLo = ColHi = QXLo = QXHi = ColXLo = ColXHi = UINT_MAX;\r
+ CS_LY = CS_LN = CS_LA = CS_RY = CS_RN = CS_RA = UINT_MAX;\r
+ PctIdQT = PctIdQA = PctIdQB = PctIdQM = PctIdAB = -1.0;\r
+ Div = -1.0;\r
+ H = -1.0;\r
+ Score = -1.0;\r
+ AbQ = AbA = AbB = -1.0f;\r
+ };\r
+\r
+ bool Accept() const\r
+ {\r
+ return Score >= opt_minh && Div >= opt_mindiv && CS_LY >= opt_mindiffs && CS_RY >= opt_mindiffs;\r
+ }\r
+\r
+ void LogMe() const\r
+ {\r
+ Log("@L %c ", yon(Score >= 1.0 && Div >= 1.0));\r
+ Log(" %.4f", Score);\r
+ Log(" LY %u LN %u LA %u", CS_LY, CS_LN, CS_LA);\r
+ Log(" RY %u RN %u RA %u", CS_RY, CS_RN, CS_RA);\r
+ Log(" Div %.1f%%", Div);\r
+ Log(" Q=%s", QLabel.c_str());\r
+ Log(" A=%s", ALabel.c_str());\r
+ Log(" B=%s", BLabel.c_str());\r
+ Log(" QA %.1f%% QB=%.1f%% AB=%.1f%% QM=%.1f%%", PctIdQA, PctIdQB, PctIdAB, PctIdQM);\r
+ Log("\n");\r
+ }\r
+\r
+ bool operator<(const ChimeHit2 &rhs) const\r
+ {\r
+ if (Score == rhs.Score)\r
+ return Div > rhs.Div;\r
+ return Score > rhs.Score;\r
+ }\r
+ };\r
+\r
+static inline bool isacgt(char c)\r
+ {\r
+ return c == 'A' || c == 'C' || c == 'G' || c == 'T';\r
+ }\r
+\r
+static bool inline isgap(char c)\r
+ {\r
+ return c == '-' || c == '.';\r
+ }\r
+\r
+void GetChunkInfo(unsigned L, unsigned &Length, vector<unsigned> &Los);\r
+float GetAbFromLabel(const string &Label);\r
+void WriteChimeHitCS(FILE *f, const ChimeHit2 &Hit);\r
+void WriteChimeHit(FILE *f, const ChimeHit2 &Hit);\r
+void WriteChimeFileHdr(FILE *f);\r
+\r
+#endif // chime_h\r
remove((accnos + toString(processIDS[i]) + ".temp").c_str());
}
#endif
+ //get rid of the file pieces.
+ for (int i = 0; i < files.size(); i++) { remove(files[i].c_str()); }
+
return num;
}
catch(exception& e) {
--- /dev/null
+#ifndef CHIMERAUCHIMECOMMAND_H
+#define CHIMERAUCHIMECOMMAND_H
+
+
+/*
+ * chimerauchimecommand.h
+ * Mothur
+ *
+ * Created by westcott on 5/13/11.
+ * Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "command.hpp"
+
+/***********************************************************/
+
+class ChimeraUchimeCommand : public Command {
+public:
+ ChimeraUchimeCommand(string);
+ ChimeraUchimeCommand();
+ ~ChimeraUchimeCommand() {}
+
+ vector<string> setParameters();
+ string getCommandName() { return "chimera.uchime"; }
+ string getCommandCategory() { return "Sequence Processing"; }
+ string getHelpString();
+ string getCitation() { return "http://drive5.com/uchime/ \nhttp://www.mothur.org/wiki/Chimera.uchime"; }
+
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ vector<int> processIDS; //processid
+ int driver(string, string, string);
+ int createProcesses(string, string, string);
+
+#ifdef USE_MPI
+ int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
+#endif
+
+ bool abort;
+ string fastafile, templatefile, outputDir, namefile;
+ int processors;
+
+ vector<string> outputNames;
+ vector<string> fastaFileNames;
+ vector<string> nameFileNames;
+
+};
+
+/***********************************************************/
+
+#endif
+
+
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
- CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-skulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
+ CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
outputTypes["sharednseqs"] = tempOutNames;
outputTypes["ochiai"] = tempOutNames;
outputTypes["anderberg"] = tempOutNames;
- outputTypes["skulczynski"] = tempOutNames;
+ outputTypes["kulczynski"] = tempOutNames;
outputTypes["kulczynskicody"] = tempOutNames;
outputTypes["lennon"] = tempOutNames;
outputTypes["morisitahorn"] = tempOutNames;
outputTypes["sharednseqs"] = tempOutNames;
outputTypes["ochiai"] = tempOutNames;
outputTypes["anderberg"] = tempOutNames;
- outputTypes["skulczynski"] = tempOutNames;
+ outputTypes["kulczynski"] = tempOutNames;
outputTypes["kulczynskicody"] = tempOutNames;
outputTypes["lennon"] = tempOutNames;
outputTypes["morisitahorn"] = tempOutNames;
}else if (Estimators[i] == "anderberg") {
cDisplays.push_back(new CollectDisplay(new Anderberg(), new SharedOneColumnFile(fileNameRoot+"anderberg")));
outputNames.push_back(fileNameRoot+"anderberg"); outputTypes["anderberg"].push_back(fileNameRoot+"anderberg");
- }else if (Estimators[i] == "skulczynski") {
+ }else if (Estimators[i] == "kulczynski") {
cDisplays.push_back(new CollectDisplay(new Kulczynski(), new SharedOneColumnFile(fileNameRoot+"kulczynski")));
outputNames.push_back(fileNameRoot+"kulczynski"); outputTypes["kulczynski"].push_back(fileNameRoot+"kulczynski");
}else if (Estimators[i] == "kulczynskicody") {
#include "chimeraslayercommand.h"
#include "chimerapintailcommand.h"
#include "chimerabellerophoncommand.h"
+#include "chimerauchimecommand.h"
#include "setlogfilecommand.h"
#include "phylodiversitycommand.h"
#include "makegroupcommand.h"
commands["chimera.ccode"] = "MPIEnabled";
commands["chimera.check"] = "MPIEnabled";
commands["chimera.slayer"] = "MPIEnabled";
+ commands["chimera.uchime"] = "MPIEnabled";
commands["chimera.pintail"] = "MPIEnabled";
commands["chimera.bellerophon"] = "MPIEnabled";
commands["screen.seqs"] = "MPIEnabled";
else if(commandName == "chimera.ccode") { command = new ChimeraCcodeCommand(optionString); }
else if(commandName == "chimera.check") { command = new ChimeraCheckCommand(optionString); }
else if(commandName == "chimera.slayer") { command = new ChimeraSlayerCommand(optionString); }
+ else if(commandName == "chimera.uchime") { command = new ChimeraUchimeCommand(optionString); }
else if(commandName == "chimera.pintail") { command = new ChimeraPintailCommand(optionString); }
else if(commandName == "chimera.bellerophon") { command = new ChimeraBellerophonCommand(optionString); }
else if(commandName == "phylotype") { command = new PhylotypeCommand(optionString); }
else if(commandName == "classify.seqs") { pipecommand = new ClassifySeqsCommand(optionString); }
else if(commandName == "chimera.ccode") { pipecommand = new ChimeraCcodeCommand(optionString); }
else if(commandName == "chimera.check") { pipecommand = new ChimeraCheckCommand(optionString); }
+ else if(commandName == "chimera.uchime") { pipecommand = new ChimeraUchimeCommand(optionString); }
else if(commandName == "chimera.slayer") { pipecommand = new ChimeraSlayerCommand(optionString); }
else if(commandName == "chimera.pintail") { pipecommand = new ChimeraPintailCommand(optionString); }
else if(commandName == "chimera.bellerophon") { pipecommand = new ChimeraBellerophonCommand(optionString); }
else if(commandName == "chimera.ccode") { shellcommand = new ChimeraCcodeCommand(); }
else if(commandName == "chimera.check") { shellcommand = new ChimeraCheckCommand(); }
else if(commandName == "chimera.slayer") { shellcommand = new ChimeraSlayerCommand(); }
+ else if(commandName == "chimera.uchime") { shellcommand = new ChimeraUchimeCommand(); }
else if(commandName == "chimera.pintail") { shellcommand = new ChimeraPintailCommand(); }
else if(commandName == "chimera.bellerophon") { shellcommand = new ChimeraBellerophonCommand(); }
else if(commandName == "phylotype") { shellcommand = new PhylotypeCommand(); }
--- /dev/null
+#ifndef diagbox_h\r
+#define diagbox_h\r
+\r
+struct DiagBox;\r
+\r
+void GetDiagBox(unsigned LA, unsigned LB, unsigned DiagLo, unsigned DiagHi, DiagBox &Box);\r
+void GetDiagRange(unsigned LA, unsigned LB, unsigned d,\r
+ unsigned &mini, unsigned &minj, unsigned &maxi, unsigned &maxj);\r
+void GetDiagLoHi(unsigned LA, unsigned LB, const char *Path,\r
+ unsigned &dlo, unsigned &dhi);\r
+\r
+struct DiagBox\r
+ {\r
+ DiagBox()\r
+ {\r
+ }\r
+\r
+ DiagBox(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi)\r
+ {\r
+ //GetDiagBox(LA, LB, DiagLo, DiagHi, *this);\r
+ //Validate();\r
+ Init(LA_, LB_, DiagLo, DiagHi);\r
+ }\r
+\r
+ void Init(unsigned LA_, unsigned LB_, unsigned DiagLo, unsigned DiagHi)\r
+ {\r
+ GetDiagBox(LA_, LB_, DiagLo, DiagHi, *this);\r
+ Validate();\r
+ }\r
+\r
+ unsigned LA;\r
+ unsigned LB;\r
+\r
+ unsigned dlo;\r
+ unsigned dhi;\r
+\r
+ unsigned dlo_mini;\r
+ unsigned dlo_minj;\r
+\r
+ unsigned dlo_maxi;\r
+ unsigned dlo_maxj;\r
+\r
+ unsigned dhi_mini;\r
+ unsigned dhi_minj;\r
+\r
+ unsigned dhi_maxi;\r
+ unsigned dhi_maxj;\r
+\r
+ unsigned GetDiag(unsigned i, unsigned j) const\r
+ {\r
+ return LA - i + j;\r
+ }\r
+\r
+// i, j are positions 0..LA-1, 0..LB-1.\r
+ bool InBox(unsigned i, unsigned j) const\r
+ {\r
+ unsigned d = GetDiag(i, j);\r
+ return d >= dlo && d <= dhi;\r
+ }\r
+\r
+/***\r
+i, j are 0-based prefix lengths 0..LA, 0..LB.\r
+\r
+A full path is in the box iff all match pairs are in the box.\r
+\r
+A partial path that aligns a prefix of A to a prefix of B as\r
+in D.P.) is in the box iff it is is the prefix of at least\r
+one full path that is in the box.\r
+\r
+A D.P. matrix entry X[i][j] is in the box iff there is at\r
+least one full path aligning the first i letters of A and\r
+the first j letters of B ending in a column of type X, i.e.\r
+if there exists a partial path in the box that ends in X.\r
+\r
+Assume terminals appear in all paths, and DI/ID forbidden.\r
+\r
+Intuitively seems that by these definitions D is in box iff\r
+DM or MD is in box, I is in box iff IM or MI is in box.\r
+Don't have proof..\r
+***/\r
+ bool InBoxDPM(unsigned i, unsigned j) const\r
+ {\r
+ // Special case for M[0][0]\r
+ if (i == 0 && j == 0)\r
+ return true;\r
+ if (i == 0 || j == 0)\r
+ return false;\r
+ unsigned d = GetDiag(i-1, j-1);\r
+ return d >= dlo && d <= dhi;\r
+ }\r
+\r
+ bool InBoxDPD(unsigned i, unsigned j) const\r
+ {\r
+ bool MD = i == 0 ? false : InBoxDPM(i-1, j);\r
+ bool DM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);\r
+ return MD || DM;\r
+ }\r
+\r
+ bool InBoxDPI(unsigned i, unsigned j) const\r
+ {\r
+ bool MI = j == 0 ? false : InBoxDPM(i, j-1);\r
+ bool IM = (i == LA || j == LB) ? false : InBoxDPM(i+1, j+1);\r
+ return MI || IM;\r
+ }\r
+\r
+ // d = LA - i + j = 1 .. LA+LB-1\r
+ void Validate() const\r
+ {\r
+ asserta(dlo <= dhi);\r
+ asserta(dlo >= GetDiag(LA-1, 0));\r
+ asserta(dhi <= GetDiag(0, LB-1));\r
+\r
+ asserta(GetDiag(dlo_mini, dlo_minj) == dlo);\r
+ asserta(GetDiag(dlo_maxi, dlo_maxj) == dlo);\r
+ asserta(GetDiag(dhi_mini, dhi_minj) == dhi);\r
+ asserta(GetDiag(dhi_maxi, dhi_maxj) == dhi);\r
+\r
+ asserta(dlo_mini >= dhi_mini);\r
+ asserta(dlo_minj <= dhi_minj);\r
+ asserta(dlo_maxi >= dhi_maxi);\r
+ asserta(dlo_maxj <= dhi_maxj);\r
+ }\r
+\r
+ unsigned GetMini() const\r
+ {\r
+ return dhi_mini;\r
+ }\r
+\r
+ unsigned GetMaxi() const\r
+ {\r
+ return dlo_maxi;\r
+ }\r
+\r
+ unsigned GetMinj() const\r
+ {\r
+ return dlo_minj;\r
+ }\r
+\r
+ unsigned GetMaxj() const\r
+ {\r
+ return dhi_maxj;\r
+ }\r
+/***\r
+ i = 0..LA-1\r
+ j = 0..LB-1\r
+ d = LA - i + j = 1 .. LA+LB-1\r
+ j = d - LA + i\r
+ i = LA - d + j\r
+***/\r
+ void GetRange_j(unsigned i, unsigned &Startj, unsigned &Endj) const\r
+ {\r
+ // j = d - LA + i\r
+ if (dlo + i >= LA)\r
+ Startj = dlo + i - LA;\r
+ else\r
+ Startj = 0;\r
+\r
+ if (Startj >= LB)\r
+ Startj = LB - 1;\r
+\r
+ if (dhi + i + 1 >= LA)\r
+ Endj = dhi + i + 1 - LA;\r
+ else\r
+ Endj = 0;\r
+\r
+ if (Endj > LB)\r
+ Endj = LB;\r
+\r
+ asserta(Endj >= Startj);\r
+ }\r
+\r
+ void LogMe() const\r
+ {\r
+ Log("LA=%u LB=%d dlo(%u): (%u,%u)-(%u,%u) dhi(%u): (%u,%u)-(%u,%u) i=[%u-%u] j=[%u-%u]\n",\r
+ LA, LB,\r
+ dlo,\r
+ dlo_mini, dlo_minj,\r
+ dlo_maxi, dlo_maxj,\r
+ dhi,\r
+ dhi_mini, dhi_minj,\r
+ dhi_maxi, dhi_maxj,\r
+ GetMini(), GetMaxi(),\r
+ GetMinj(), GetMaxj());\r
+ }\r
+ };\r
+\r
+typedef const char *(*NWDIAG)(const byte *A, unsigned LA, const byte *B, unsigned LB,
+ unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm);
+
+const char *NWBandWrap(NWDIAG NW, const byte *A, unsigned LA, const byte *B, unsigned LB,
+ unsigned DiagLo, unsigned DiagHi, bool LeftTerm, bool RightTerm);
+\r
+#endif // diagbox_h\r
--- /dev/null
+#ifndef dp_h\r
+#define dp_h\r
+\r
+#define SAVE_FAST 0\r
+\r
+#include "myutils.h"\r
+#include "mx.h"\r
+#include "seqdb.h"\r
+#include "diagbox.h"\r
+#include "path.h"\r
+#include "alnparams.h"\r
+#include "alnheuristics.h"\r
+#include "hspfinder.h"\r
+\r
+typedef void (*OnPathFn)(const string &Path, bool Full);\r
+\r
+enum XType\r
+ {\r
+ XType_Full=1,\r
+ XType_Fwd=2,\r
+ XType_Bwd=3,\r
+ };\r
+\r
+// public\r
+float ViterbiBrute(const byte *A, unsigned LA, const byte *B, unsigned LB, \r
+ unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiSimpleBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, unsigned DiagLo, unsigned DiagHi, PathData &PD);\r
+\r
+float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiFastBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ unsigned DiagLo, unsigned DiagHi, const AlnParams &AP, PathData &PD);\r
+\r
+float ViterbiFastMainDiag(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ unsigned BandRadius, const AlnParams &AP, PathData &PD);\r
+\r
+float XDropFwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropBwdSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropFwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+float XDropBwdFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, float XDrop, unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+\r
+void XDropAlign(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ unsigned AncLoi, unsigned AncLoj, unsigned AncLen, const AlnParams &AP,\r
+ float XDrop, HSPData &HSP, PathData &PD);\r
+\r
+float SWSimple(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj,\r
+ unsigned &Hij, PathData &PD);\r
+\r
+float SWFast(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, unsigned &Loi, unsigned &Leni, unsigned &Lenj,\r
+ unsigned &Hij, PathData &PD);\r
+\r
+void SWFast2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+ HSPData &HSP, PathData &PD);\r
+\r
+void SWSimple2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+ HSPData &HSP, PathData &PD);\r
+\r
+float SWUngapped(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const float * const *SubstMx, unsigned &LoA, unsigned &LoB, unsigned &Len);\r
+\r
+void SWUngapped2(const SeqData &SA, const SeqData &SB, const AlnParams &AP,\r
+ HSPData &HSP);\r
+\r
+float SWFastNTB(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP);\r
+\r
+void GlobalAlignBand(const byte *A, unsigned LA, const byte *B, unsigned LB,\r
+ const AlnParams &AP, unsigned BandRadius, PathData &PD);\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &AP,\r
+ const AlnHeuristics &AH, HSPFinder &HF, float MinFractId, float &HSPFractId,\r
+ PathData &PD);\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path);\r
+\r
+void GetBruteMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetSimpleDPMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetSimpleBandMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetXDropFwdSimpleDPMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+#if SAVE_FAST\r
+void GetFastMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+void GetFastBandMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I);\r
+#endif\r
+\r
+// private\r
+void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD);\r
+void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj,\r
+ unsigned &Leni, unsigned &Lenj, PathData &PD);\r
+void EnumPaths(unsigned L1, unsigned L2, bool SubPaths, OnPathFn OnPath);\r
+void AllocBit(unsigned LA, unsigned LB);\r
+\r
+const byte TRACEBITS_DM = 0x01;\r
+const byte TRACEBITS_IM = 0x02;\r
+const byte TRACEBITS_MD = 0x04;\r
+const byte TRACEBITS_MI = 0x08;\r
+const byte TRACEBITS_SM = 0x10;\r
+const byte TRACEBITS_UNINIT = ~0x1f;\r
+\r
+extern Mx<byte> g_Mx_TBBit;\r
+extern float *g_DPRow1;\r
+extern float *g_DPRow2;\r
+extern byte **g_TBBit;\r
+\r
+static inline void Max_xM(float &Score, float MM, float DM, float IM, byte &State)\r
+ {\r
+ Score = MM;\r
+ State = 'M';\r
+\r
+ if (DM > Score)\r
+ {\r
+ Score = DM;\r
+ State = 'D';\r
+ }\r
+ if (IM > Score)\r
+ {\r
+ Score = IM;\r
+ State = 'I';\r
+ }\r
+ }\r
+\r
+static inline void Max_xD(float &Score, float MD, float DD, byte &State)\r
+ {\r
+ if (MD >= DD)\r
+ {\r
+ Score = MD;\r
+ State = 'M';\r
+ }\r
+ else\r
+ {\r
+ Score = DD;\r
+ State = 'D';\r
+ }\r
+ }\r
+\r
+static inline void Max_xI(float &Score, float MI, float II, byte &State)\r
+ {\r
+ if (MI >= II)\r
+ {\r
+ Score = MI;\r
+ State = 'M';\r
+ }\r
+ else\r
+ {\r
+ Score = II;\r
+ State = 'I';\r
+ }\r
+ }\r
+\r
+#endif // dp_h\r
--- /dev/null
+#ifndef evalue_h\r
+#define evalue_h\r
+\r
+#include <float.h>\r
+\r
+void SetKarlin(double GappedLambda, double UngappedLambda,\r
+ double GappedK, double UngappedK, double DBLength);\\r
+\r
+double GetKarlinDBLength();\r
+void SetKarlinDBLength(double DBLength);\r
+void LogKarlin();\r
+void SetKarlinAmino(double DBLength);\r
+void SetKarlinNucleo(double DBLength);\r
+void SetKarlin(double DBLength, bool Nucleo);\r
+double ComputeBitScoreGapped(double Score);\r
+double ComputeBitScoreUngapped(double Score);\r
+double ComputeEvalueGapped(double Score, unsigned QueryLength);\r
+double ComputeEvalueUngapped(double Score, unsigned QueryLength);\r
+double ComputeMinScoreGivenEvalueAGapped(double Evalue, unsigned Area);\r
+double ComputeMinScoreGivenEvalueAUngapped(double Evalue, unsigned Area);\r
+double ComputeMinScoreGivenEvalueQGapped(double Evalue, unsigned QueryLength);\r
+double ComputeMinScoreGivenEvalueQUngapped(double Evalue, unsigned QueryLength);\r
+double ComputeEvalueGappedFromBitScore(double BitScore, unsigned QueryLength);\r
+\r
+#endif // evalue_h\r
--- /dev/null
+#include "myutils.h"\r
+#include "alpha.h"\r
+\r
+//unsigned g_MaxL = 0;\r
+\r
+static bool *g_IsChar = g_IsAminoChar;\r
+\r
+// Term gaps allowed in query (A) only\r
+static double GetFractIdGivenPathDerep(const byte *A, const byte *B, const char *Path,\r
+ char *ptrDesc)\r
+ {\r
+ if (*Path == 'D')\r
+ {\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "(term gap in Query)");\r
+ return 0;\r
+ }\r
+\r
+ const char *LastM = 0;\r
+ for (const char *p = Path; *p; ++p)\r
+ if (*p == 'M')\r
+ LastM = p;\r
+\r
+ unsigned PosA = 0;\r
+ unsigned PosB = 0;\r
+ unsigned Ids = 0;\r
+ unsigned Diffs = 0;\r
+ unsigned Cols = 0;\r
+ for (const char *p = Path; *p && p != LastM; ++p)\r
+ {\r
+ ++Cols;\r
+ char c = *p;\r
+ if (c == 'M')\r
+ {\r
+ byte a = toupper(A[PosA]);\r
+ byte b = toupper(B[PosB]);\r
+ if (g_IsChar[a] && g_IsChar[b])\r
+ {\r
+ if (a == b)\r
+ ++Ids;\r
+ else\r
+ ++Diffs;\r
+ }\r
+ else\r
+ --Cols;\r
+ }\r
+ if (c == 'D' || c == 'I')\r
+ ++Diffs;\r
+ if (c == 'M' || c == 'D')\r
+ ++PosA;\r
+ if (c == 'M' || c == 'I')\r
+ ++PosB;\r
+ }\r
+\r
+ double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+ return FractId;\r
+ }\r
+\r
+static double GetFractIdGivenPathAllDiffs(const byte *A, const byte *B, const char *Path,\r
+ char *ptrDesc)\r
+ {\r
+ unsigned PosA = 0;\r
+ unsigned PosB = 0;\r
+ unsigned Ids = 0;\r
+ unsigned Diffs = 0;\r
+ unsigned Cols = 0;\r
+ for (const char *p = Path; *p; ++p)\r
+ {\r
+ ++Cols;\r
+ char c = *p;\r
+ if (c == 'M')\r
+ {\r
+ byte a = toupper(A[PosA]);\r
+ byte b = toupper(B[PosB]);\r
+ if (g_IsChar[a] && g_IsChar[b])\r
+ {\r
+ if (a == b)\r
+ ++Ids;\r
+ else\r
+ ++Diffs;\r
+ }\r
+ else\r
+ --Cols;\r
+ }\r
+ if (c == 'D' || c == 'I')\r
+ ++Diffs;\r
+ if (c == 'M' || c == 'D')\r
+ ++PosA;\r
+ if (c == 'M' || c == 'I')\r
+ ++PosB;\r
+ }\r
+\r
+ double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+ return FractId;\r
+ }\r
+\r
+static double GetFractIdGivenPathInternalDiffs(const byte *A, const byte *B,\r
+ const char *Path, char *ptrDesc)\r
+ {\r
+ unsigned i = 0;\r
+ unsigned FirstM = UINT_MAX;\r
+ unsigned LastM = UINT_MAX;\r
+ for (const char *p = Path; *p; ++p)\r
+ {\r
+ if (*p == 'M')\r
+ {\r
+ if (FirstM == UINT_MAX)\r
+ FirstM = i;\r
+ LastM = i;\r
+ }\r
+ ++i;\r
+ }\r
+ if (FirstM == UINT_MAX)\r
+ {\r
+ if (ptrDesc != 0)\r
+ strcpy(ptrDesc, "(no matches)");\r
+ return 0.0;\r
+ }\r
+\r
+ unsigned PosA = 0;\r
+ unsigned PosB = 0;\r
+ unsigned Ids = 0;\r
+ unsigned Diffs = 0;\r
+ unsigned Cols = 0;\r
+ for (unsigned i = 0; i < FirstM; ++i)\r
+ {\r
+ char c = Path[i];\r
+ if (c == 'M' || c == 'D')\r
+ ++PosA;\r
+ if (c == 'M' || c == 'I')\r
+ ++PosB;\r
+ }\r
+\r
+ for (unsigned i = FirstM; i <= LastM; ++i)\r
+ {\r
+ ++Cols;\r
+ char c = Path[i];\r
+ if (c == 'M')\r
+ {\r
+ byte a = toupper(A[PosA]);\r
+ byte b = toupper(B[PosB]);\r
+ if (g_IsChar[a] && g_IsChar[b])\r
+ {\r
+ if (a == b)\r
+ ++Ids;\r
+ else\r
+ ++Diffs;\r
+ }\r
+ else\r
+ --Cols;\r
+ }\r
+ if (c == 'D' || c == 'I')\r
+ ++Diffs;\r
+ if (c == 'M' || c == 'D')\r
+ ++PosA;\r
+ if (c == 'M' || c == 'I')\r
+ ++PosB;\r
+ }\r
+\r
+ double FractId = (Cols == 0 ? 0.0 : 1.0 - double(Diffs)/double(Cols));\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+ return FractId;\r
+ }\r
+\r
+static double GetFractIdGivenPathMBL(const byte *A, const byte *B, const char *Path,\r
+ char *ptrDesc)\r
+ {\r
+ unsigned PosA = 0;\r
+ unsigned PosB = 0;\r
+ unsigned Mismatches = 0;\r
+ unsigned Gaps = 0;\r
+ for (const char *p = Path; *p; ++p)\r
+ {\r
+ char c = *p;\r
+ if (c == 'M' && toupper(A[PosA]) != toupper(B[PosB]))\r
+ ++Mismatches;\r
+ if (c == 'D' || c == 'I' && (p == Path || p[-1] == 'M'))\r
+ ++Gaps;\r
+ if (c == 'M' || c == 'D')\r
+ ++PosA;\r
+ if (c == 'M' || c == 'I')\r
+ ++PosB;\r
+ }\r
+ unsigned Diffs = Gaps + Mismatches;\r
+ double FractDiffs = (PosB == 0 ? 0.0 : double(Diffs)/double(PosB));\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "Gap opens %u, Id=1 - [(diffs=%u)/(target_length=%u)]",\r
+ Gaps, Diffs, PosB);\r
+ double FractId = 1.0 - FractDiffs;\r
+ if (FractId < 0.0)\r
+ return 0.0;\r
+ return FractId;\r
+ }\r
+\r
+static double GetFractIdGivenPathBLAST(const byte *A, const byte *B, const char *Path,\r
+ char *ptrDesc)\r
+ {\r
+ unsigned PosA = 0;\r
+ unsigned PosB = 0;\r
+ unsigned Ids = 0;\r
+ unsigned Wilds = 0;\r
+ unsigned Cols = 0;\r
+ for (const char *p = Path; *p; ++p)\r
+ {\r
+ ++Cols;\r
+ char c = *p;\r
+ if (c == 'M')\r
+ {\r
+ byte a = toupper(A[PosA]);\r
+ byte b = toupper(B[PosB]);\r
+ if (g_IsChar[a] && g_IsChar[b])\r
+ {\r
+ if (a == b)\r
+ ++Ids;\r
+ }\r
+ else\r
+ ++Wilds;\r
+ }\r
+ if (c == 'M' || c == 'D')\r
+ ++PosA;\r
+ if (c == 'M' || c == 'I')\r
+ ++PosB;\r
+ }\r
+ asserta(Cols >= Wilds);\r
+ Cols -= Wilds;\r
+ double FractId = Cols == 0 ? 0.0f : float(Ids)/float(Cols);\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "(ids=%u/cols=%u)", Ids, Cols);\r
+ return FractId;\r
+ }\r
+\r
+static double GetFractIdGivenPathDefault(const byte *A, const byte *B, const char *Path,\r
+ char *ptrDesc)\r
+ {\r
+ unsigned PosA = 0;\r
+ unsigned PosB = 0;\r
+ unsigned Ids = 0;\r
+ unsigned Wilds = 0;\r
+ for (const char *p = Path; *p; ++p)\r
+ {\r
+ char c = *p;\r
+ if (c == 'M')\r
+ {\r
+ byte a = toupper(A[PosA]);\r
+ byte b = toupper(B[PosB]);\r
+ if (g_IsChar[a] && g_IsChar[b])\r
+ {\r
+ if (a == b)\r
+ ++Ids;\r
+ }\r
+ else\r
+ ++Wilds;\r
+ }\r
+ if (c == 'M' || c == 'D')\r
+ ++PosA;\r
+ if (c == 'M' || c == 'I')\r
+ ++PosB;\r
+ }\r
+ unsigned MinLen = min(PosA, PosB) - Wilds;\r
+ double FractId = (MinLen == 0 ? 0.0 : double(Ids)/double(MinLen));\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "(ids=%u/shorter_length=%u)", Ids, MinLen);\r
+ return FractId;\r
+ }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
+ bool Nucleo, char *ptrDesc, unsigned IdDef)\r
+ {\r
+ if (Nucleo)\r
+ g_IsChar = g_IsACGTU;\r
+ else\r
+ g_IsChar = g_IsAminoChar;\r
+\r
+ if (Path == 0)\r
+ {\r
+ if (ptrDesc != 0)\r
+ strcpy(ptrDesc, "(NULL path)");\r
+ return 0.0;\r
+ }\r
+\r
+ unsigned ColCount = (unsigned) strlen(Path);\r
+ if (ColCount == 0)\r
+ return 0.0;\r
+\r
+ if (opt_leftjust)\r
+ {\r
+ if (Path[0] != 'M' || Path[ColCount-1] == 'D')\r
+ {\r
+ if (ptrDesc != 0)\r
+ strcpy(ptrDesc, "(leftjust)");\r
+ return 0.0;\r
+ }\r
+ }\r
+\r
+ if (opt_rightjust)\r
+ {\r
+ if (Path[0] == 'D' || Path[ColCount-1] != 'M')\r
+ {\r
+ if (ptrDesc != 0)\r
+ strcpy(ptrDesc, "(rightjust)");\r
+ return 0.0;\r
+ }\r
+ }\r
+\r
+ double FractId = 0.0;\r
+ //if (opt_idprefix > 0)\r
+ // {\r
+ // for (unsigned i = 0; i < opt_idprefix; ++i)\r
+ // {\r
+ // char c = Path[i];\r
+ // if (c != 'M' || toupper(A[i]) != toupper(B[i]))\r
+ // {\r
+ // if (ptrDesc != 0)\r
+ // sprintf(ptrDesc, "Prefix ids %u < idprefix(%u)",\r
+ // i, opt_idprefix);\r
+ // return 0.0;\r
+ // }\r
+ // }\r
+ // }\r
+\r
+ //if (opt_idsuffix > 0)\r
+ // {\r
+ // unsigned Cols = strlen(Path);\r
+ // for (unsigned i = 0; i < opt_idsuffix && i > Cols; ++i)\r
+ // {\r
+ // unsigned k = Cols - 1 - i;\r
+ // char c = Path[k];\r
+ // if (c != 'M' || toupper(A[k]) != toupper(B[k]))\r
+ // {\r
+ // if (ptrDesc != 0)\r
+ // sprintf(ptrDesc, "Suffix ids %u < idsuffix(%u)",\r
+ // i, opt_idsuffix);\r
+ // return 0.0;\r
+ // }\r
+ // }\r
+ // }\r
+\r
+ if (opt_maxqgap > 0 || opt_maxtgap > 0)\r
+ {\r
+ unsigned L = 0;\r
+ const char *LastM = 0;\r
+ for (const char *p = Path; *p; ++p)\r
+ if (*p == 'M')\r
+ LastM = p;\r
+\r
+// g_MaxL = 0;\r
+ for (const char *p = Path; *p && p != LastM; ++p)\r
+ {\r
+ char c = *p;\r
+ switch (c)\r
+ {\r
+ case 'M':\r
+ if (L > 0)\r
+ {\r
+ if (p[-1] == 'D')\r
+ {\r
+ if (L > opt_maxtgap)\r
+ {\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "(maxtgap)");\r
+ return 0.0;\r
+ }\r
+ }\r
+ else if (p[-1] == 'I')\r
+ {\r
+ if (L > opt_maxqgap)\r
+ {\r
+ if (ptrDesc != 0)\r
+ sprintf(ptrDesc, "(maxqgap)");\r
+ return 0.0;\r
+ }\r
+ }\r
+ else\r
+ asserta(false);\r
+ }\r
+ L = 0;\r
+ break;\r
+\r
+ case 'D':\r
+ case 'I':\r
+ ++L;\r
+ //if (L > g_MaxL)\r
+ // g_MaxL = L;\r
+ break;\r
+\r
+ default:\r
+ asserta(false);\r
+ }\r
+ }\r
+ }\r
+\r
+ switch (IdDef)\r
+ {\r
+ case 0:\r
+ FractId = GetFractIdGivenPathDefault(A, B, Path, ptrDesc);\r
+ break;\r
+\r
+ case 1:\r
+ FractId = GetFractIdGivenPathAllDiffs(A, B, Path, ptrDesc);\r
+ break;\r
+\r
+ case 2:\r
+ FractId = GetFractIdGivenPathInternalDiffs(A, B, Path, ptrDesc);\r
+ break;\r
+\r
+ case 3:\r
+ FractId = GetFractIdGivenPathMBL(A, B, Path, ptrDesc);\r
+ break;\r
+\r
+ case 4:\r
+ FractId = GetFractIdGivenPathBLAST(A, B, Path, ptrDesc);\r
+ break;\r
+\r
+ case 5:\r
+ FractId = GetFractIdGivenPathDerep(A, B, Path, ptrDesc);\r
+ break;\r
+\r
+ default:\r
+ Die("--iddef %u invalid", opt_iddef);\r
+ }\r
+\r
+ return FractId;\r
+ }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path,\r
+ bool Nucleo, char *ptrDesc)\r
+ {\r
+ return GetFractIdGivenPath(A, B, Path, Nucleo, ptrDesc, opt_iddef);\r
+ }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo)\r
+ {\r
+ return GetFractIdGivenPath(A, B, Path, Nucleo, (char *) 0);\r
+ }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const string &Path)\r
+ {\r
+ return GetFractIdGivenPath(A, B, Path.c_str(), true);\r
+ }\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path)\r
+ {\r
+ return GetFractIdGivenPath(A, B, Path, true);\r
+ }\r
--- /dev/null
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "ultra.h"\r
+#include <set>\r
+\r
+void AddTargets(Ultra &U, const SeqData &Query, set<unsigned> &TargetIndexes);\r
+\r
+void GetChunkInfo(unsigned L, unsigned &Length, vector<unsigned> &Los)\r
+ {\r
+ Los.clear();\r
+\r
+ if (L <= opt_minchunk)\r
+ {\r
+ Length = L;\r
+ Los.push_back(0);\r
+ return;\r
+ }\r
+\r
+ Length = (L - 1)/opt_chunks + 1;\r
+ if (Length < opt_minchunk)\r
+ Length = opt_minchunk;\r
+\r
+ unsigned Lo = 0;\r
+ for (;;)\r
+ {\r
+ if (Lo + Length >= L)\r
+ {\r
+ Lo = L - Length - 1;\r
+ Los.push_back(Lo);\r
+ return;\r
+ }\r
+ Los.push_back(Lo);\r
+ Lo += Length;\r
+ }\r
+ }\r
+\r
+void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
+ vector<unsigned> &Parents)\r
+ {\r
+ Parents.clear();\r
+\r
+ set<unsigned> TargetIndexes;\r
+\r
+ unsigned QL = QSD.L;\r
+\r
+ SeqData QuerySD = QSD;\r
+\r
+ unsigned ChunkLength;\r
+ vector<unsigned> ChunkLos;\r
+ GetChunkInfo(QL, ChunkLength, ChunkLos);\r
+ unsigned ChunkCount = SIZE(ChunkLos);\r
+ for (unsigned ChunkIndex = 0; ChunkIndex < ChunkCount; ++ChunkIndex)\r
+ {\r
+ unsigned Lo = ChunkLos[ChunkIndex];\r
+ asserta(Lo + ChunkLength <= QL);\r
+\r
+ const byte *Chunk = QSD.Seq + Lo;\r
+\r
+ // THIS MESSES UP --self!!\r
+ //char Prefix[32];\r
+ //sprintf(Prefix, "%u|", Lo);\r
+ //string ChunkLabel = string(Prefix) + string(QSD.Label);\r
+\r
+ //QuerySD.Label = ChunkLabel.c_str();\r
+ QuerySD.Seq = Chunk;\r
+ QuerySD.L = ChunkLength;\r
+\r
+ AddTargets(U, QuerySD, TargetIndexes);\r
+\r
+ Lo += ChunkLength;\r
+ }\r
+\r
+ for (set<unsigned>::const_iterator p = TargetIndexes.begin();\r
+ p != TargetIndexes.end(); ++p)\r
+ {\r
+ unsigned TargetIndex = *p;\r
+ bool Accept = true;\r
+ if (AbQ > 0.0f)\r
+ {\r
+ const char *TargetLabel = U.GetSeedLabel(TargetIndex);\r
+ float AbT = GetAbFromLabel(string(TargetLabel));\r
+ if (AbT > 0.0f && AbT < opt_abskew*AbQ)\r
+ Accept = false;\r
+ }\r
+\r
+ if (Accept)\r
+ Parents.push_back(TargetIndex);\r
+ }\r
+ }\r
--- /dev/null
+//#if UCHIMES\r
+\r
+#include "dp.h"\r
+#include "seq.h"\r
+\r
+static AlnParams g_AP;\r
+static bool g_APInitDone = false;\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, PathData &PD)\r
+ {\r
+ if (!g_APInitDone)\r
+ {\r
+ g_AP.InitFromCmdLine(true);\r
+ g_APInitDone = true;\r
+ }\r
+\r
+ ViterbiFast(Query.Seq, Query.L, Target.Seq, Target.L, g_AP, PD);\r
+ return true;\r
+ }\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, string &Path)\r
+ {\r
+ PathData PD;\r
+ GlobalAlign(Query, Target, PD);\r
+ Path = string(PD.Start);\r
+ return true;\r
+ }\r
+\r
+bool GlobalAlign(const SeqData &Query, const SeqData &Target, const AlnParams &/*AP*/,\r
+ const AlnHeuristics &AH, HSPFinder &/*HF*/, float /*MinFractId*/, float &/*HSPId*/, PathData &PD)\r
+ {\r
+ PD.Clear();\r
+ string Path;\r
+ bool Found = GlobalAlign(Query, Target, Path);\r
+ if (!Found)\r
+ return false;\r
+ unsigned n = SIZE(Path);\r
+ PD.Alloc(n+1);\r
+ memcpy(PD.Front, Path.c_str(), n);\r
+ PD.Start = PD.Front;\r
+ PD.Start[n] = 0;\r
+ return true;\r
+ }\r
+\r
+//#endif // UCHIMES\r
--- /dev/null
+"\n"
+"Usage\n"
+"-----\n"
+"\n"
+"uchime --input query.fasta [--db db.fasta] [--uchimeout results.uchime]\n"
+" [--uchimealns results.alns]\n"
+"\n"
+"Options\n"
+"-------\n"
+"\n"
+"--input filename\n"
+" Query sequences in FASTA format.\n"
+" If the --db option is not specificed, uchime uses de novo\n"
+" detection. In de novo mode, relative abundance must be given\n"
+" by a string /ab=xxx/ somewhere in the label, where xxx is a\n"
+" floating-point number, e.g. >F00QGH67HG/ab=1.2/.\n"
+"\n"
+"--db filename\n"
+" Reference database in FASTA format.\n"
+" Optional, if not specified uchime uses de novo mode.\n"
+"\n"
+" ***WARNING*** The database is searched ONLY on the plus strand.\n"
+" You MUST include reverse-complemented sequences in the database\n"
+" if you want both strands to be searched.\n"
+"\n"
+"--abskew x\n"
+" Minimum abundance skew. Default 1.9. De novo mode only.\n"
+" Abundance skew is:\n"
+" min [ abund(parent1), abund(parent2) ] / abund(query).\n"
+"\n"
+"--uchimeout filename\n"
+" Output in tabbed format with one record per query sequence.\n"
+" First field is score (h), second field is query label.\n"
+" For details, see manual.\n"
+"\n"
+"--uchimealns filename\n"
+" Multiple alignments of query sequences to parents in human-\n"
+" readable format. Alignments show columns with differences\n"
+" that support or contradict a chimeric model.\n"
+"\n"
+"--minh h\n"
+" Mininum score to report chimera. Default 0.3. Values from 0.1\n"
+" to 5 might be reasonable. Lower values increase sensitivity\n"
+" but may report more false positives. If you decrease --xn,\n"
+" you may need to increase --minh, and vice versa.\n"
+"\n"
+"--mindiv div\n"
+" Minimum divergence ratio, default 0.5. Div ratio is 100%% - \n"
+" %%identity between query sequence and the closest candidate for\n"
+" being a parent. If you don't care about very close chimeras,\n"
+" then you could increase --mindiv to, say, 1.0 or 2.0, and\n"
+" also decrease --min h, say to 0.1, to increase sensitivity.\n"
+" How well this works will depend on your data. Best is to\n"
+" tune parameters on a good benchmark.\n"
+"\n"
+"--xn beta\n"
+" Weight of a no vote, also called the beta parameter. Default 8.0.\n"
+" Decreasing this weight to around 3 or 4 may give better\n"
+" performance on denoised data.\n"
+"\n"
+"--dn n\n"
+" Pseudo-count prior on number of no votes. Default 1.4. Probably\n"
+" no good reason to change this unless you can retune to a good\n"
+" benchmark for your data. Reasonable values are probably in the\n"
+" range from 0.2 to 2.\n"
+"\n"
+"--xa w\n"
+" Weight of an abstain vote. Default 1. So far, results do not\n"
+" seem to be very sensitive to this parameter, but if you have\n"
+" a good training set might be worth trying. Reasonable values\n"
+" might range from 0.1 to 2.\n"
+"\n"
+"--chunks n\n"
+" Number of chunks to extract from the query sequence when searching\n"
+" for parents. Default 4.\n"
+"\n"
+"--[no]ovchunks\n"
+" [Do not] use overlapping chunks. Default do not.\n"
+"\n"
+"--minchunk n\n"
+" Minimum length of a chunk. Default 64.\n"
+"\n"
+"--idsmoothwindow w\n"
+" Length of id smoothing window. Default 32.\n"
+"\n"
+"--minsmoothid f\n"
+" Minimum factional identity over smoothed window of candidate parent.\n"
+" Default 0.95.\n"
+"\n"
+"--maxp n\n"
+" Maximum number of candidate parents to consider. Default 2. In tests so\n"
+" far, increasing --maxp gives only a very small improvement in sensivity\n"
+" but tends to increase the error rate quite a bit.\n"
+"\n"
+"--[no]skipgaps\n"
+"--[no]skipgaps2\n"
+" These options control how gapped columns affect counting of diffs.\n"
+" If --skipgaps is specified, columns containing gaps do not found as diffs.\n"
+" If --skipgaps2 is specified, if column is immediately adjacent to\n"
+" a column containing a gap, it is not counted as a diff.\n"
+" Default is --skipgaps --skipgaps2.\n"
+"\n"
+"--minlen L\n"
+"--maxlen L\n"
+" Minimum and maximum sequence length. Defaults 10, 10000.\n"
+" Applies to both query and reference sequences.\n"
+"\n"
+"--ucl\n"
+" Use local-X alignments. Default is global-X. On tests so far, global-X\n"
+" is always better; this option is retained because it just might work\n"
+" well on some future type of data.\n"
+"\n"
+"--queryfract f\n"
+" Minimum fraction of the query sequence that must be covered by a local-X\n"
+" alignment. Default 0.5. Applies only when --ucl is specified.\n"
+"\n"
+"--quiet\n"
+" Do not display progress messages on stderr.\n"
+"\n"
+"--log filename\n"
+" Write miscellaneous information to the log file. Mostly of interest\n"
+" to me (the algorithm developer). Use --verbose to get more info.\n"
+"\n"
+"--self\n"
+" In reference database mode, exclude a reference sequence if it has\n"
+" the same label as the query. This is useful for benchmarking by using\n"
+" the ref db as a query to test for false positives.\n"
--- /dev/null
+#ifndef hsp_h\r
+#define hsp_h 1\r
+\r
+struct HSPData\r
+ {\r
+ unsigned Loi;\r
+ unsigned Loj;\r
+ unsigned Leni;\r
+ unsigned Lenj;\r
+ float Score;\r
+ unsigned User;\r
+\r
+ unsigned GetLength() const\r
+ {\r
+ if (Leni != Lenj)\r
+ Die("HSP::GetLength(): Leni %u, Lenj %u, Loi %u, Loj %u, Score %.1f",\r
+ Leni, Lenj, Loi, Loj, Score);\r
+\r
+ return Leni;\r
+ }\r
+\r
+ unsigned GetHii() const\r
+ {\r
+ assert(Leni > 0);\r
+ return Loi + Leni - 1;\r
+ }\r
+\r
+ unsigned GetHij() const\r
+ {\r
+ assert(Lenj > 0);\r
+ return Loj + Lenj - 1;\r
+ }\r
+\r
+ bool LeftA() const\r
+ {\r
+ return Loi == 0;\r
+ }\r
+\r
+ bool LeftB() const\r
+ {\r
+ return Loj == 0;\r
+ }\r
+\r
+ bool RightA(unsigned LA) const\r
+ {\r
+ return Loi + Leni == LA;\r
+ }\r
+\r
+ bool RightB(unsigned LB) const\r
+ {\r
+ return Loj + Lenj == LB;\r
+ }\r
+\r
+ unsigned GetIdCount(const byte *A, const byte *B) const\r
+ {\r
+ unsigned Count = 0;\r
+ unsigned K = GetLength();\r
+ for (unsigned k = 0; k < K; ++k)\r
+ {\r
+ byte a = A[Loi+k];\r
+ byte b = B[Loj+k];\r
+ if (toupper(a) == toupper(b))\r
+ Count++;\r
+ }\r
+ return Count;\r
+ }\r
+\r
+ double OverlapFract(const HSPData &HSP) const\r
+ {\r
+ if (Leni == 0 || Lenj == 0)\r
+ return 0.0;\r
+\r
+ unsigned MaxLoi = max(Loi, HSP.Loi);\r
+ unsigned MaxLoj = max(Loj, HSP.Loj);\r
+ unsigned MinHii = min(GetHii(), HSP.GetHii());\r
+ unsigned MinHij = min(GetHij(), HSP.GetHij());\r
+\r
+ unsigned Ovi = (MinHii < MaxLoi) ? 0 : MinHii - MaxLoi;\r
+ unsigned Ovj = (MinHij < MaxLoj) ? 0 : MinHij - MaxLoj;\r
+\r
+ asserta(Ovi <= Leni && Ovj <= Lenj);\r
+ return double(Ovi*Ovj)/double(Leni*Lenj);\r
+ }\r
+\r
+ bool operator<(const HSPData &rhs) const\r
+ {\r
+ return Loi < rhs.Loi;\r
+ }\r
+\r
+ void LogMe() const\r
+ {\r
+ Log("Loi=%u Loj=%u Li=%u Lj=%u Score=%.1f\n", Loi, Loj, Leni, Lenj, Score);\r
+ }\r
+\r
+ void LogMe2() const\r
+ {\r
+ Log("(%u-%u,%u-%u/%.1f)", Loi, GetHii(), Loj, GetHij(), Score);\r
+ }\r
+ };\r
+\r
+// Bendpoint\r
+struct BPData\r
+ {\r
+ unsigned Pos;\r
+ bool IsLo;\r
+ unsigned Index;\r
+\r
+ void LogMe() const\r
+ {\r
+ Log("BP%s Pos %u Ix %u", (IsLo ? "lo" : "hi"), Pos, Index);\r
+ }\r
+ };\r
+\r
+#endif // hsp_h\r
--- /dev/null
+#ifndef hspfinder_h
+#define hspfinder_h
+
+#include "seq.h"
+
+class HSPFinder
+ {
+public:
+ void SetA(const SeqData &/*SD*/) {}
+ void SetB(const SeqData &/*SD*/) {}
+ };
+
+#endif // hspfinder_h
--- /dev/null
+#include "myutils.h"\r
+#include "sfasta.h"\r
+#include "path.h"\r
+#include "dp.h"\r
+\r
+void Make3Way(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+ const string &PathQA, const string &PathQB,\r
+ string &Q3, string &A3, string &B3)\r
+ {\r
+ Q3.clear();\r
+ A3.clear();\r
+ B3.clear();\r
+\r
+#if DEBUG\r
+ {\r
+ unsigned QLen = 0;\r
+ unsigned ALen = 0;\r
+ for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+ {\r
+ char c = PathQA[i];\r
+ if (c == 'M' || c == 'D')\r
+ ++QLen;\r
+ if (c == 'M' || c == 'I')\r
+ ++ALen;\r
+ }\r
+ asserta(QLen == QSD.L);\r
+ asserta(ALen == ASD.L);\r
+ }\r
+ {\r
+ unsigned QLen = 0;\r
+ unsigned BLen = 0;\r
+ for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+ {\r
+ char c = PathQB[i];\r
+ if (c == 'M' || c == 'D')\r
+ ++QLen;\r
+ if (c == 'M' || c == 'I')\r
+ ++BLen;\r
+ }\r
+ asserta(QLen == QSD.L);\r
+ asserta(BLen == BSD.L);\r
+ }\r
+#endif\r
+\r
+ const byte *Q = QSD.Seq;\r
+ const byte *A = ASD.Seq;\r
+ const byte *B = BSD.Seq;\r
+\r
+ unsigned LQ = QSD.L;\r
+ unsigned LA = ASD.L;\r
+ unsigned LB = BSD.L;\r
+\r
+ vector<unsigned> InsertCountsA(LQ+1, 0);\r
+ unsigned QPos = 0;\r
+ for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+ {\r
+ char c = PathQA[i];\r
+ if (c == 'M' || c == 'D')\r
+ ++QPos;\r
+ else\r
+ {\r
+ asserta(c == 'I');\r
+ asserta(QPos <= LQ);\r
+ ++(InsertCountsA[QPos]);\r
+ }\r
+ }\r
+\r
+ vector<unsigned> InsertCountsB(LQ+1, 0);\r
+ QPos = 0;\r
+ for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+ {\r
+ char c = PathQB[i];\r
+ if (c == 'M' || c == 'D')\r
+ ++QPos;\r
+ else\r
+ {\r
+ asserta(c == 'I');\r
+ asserta(QPos <= LQ);\r
+ ++(InsertCountsB[QPos]);\r
+ }\r
+ }\r
+\r
+ vector<unsigned> InsertCounts;\r
+ for (unsigned i = 0; i <= LQ; ++i)\r
+ {\r
+ unsigned is = max(InsertCountsA[i], InsertCountsB[i]);\r
+ InsertCounts.push_back(is);\r
+ }\r
+\r
+ for (unsigned i = 0; i < LQ; ++i)\r
+ {\r
+ for (unsigned k = 0; k < InsertCounts[i]; ++k)\r
+ Q3.push_back('-');\r
+ asserta(i < LQ);\r
+ Q3.push_back(toupper(Q[i]));\r
+ }\r
+ for (unsigned k = 0; k < InsertCounts[LQ]; ++k)\r
+ Q3.push_back('-');\r
+\r
+// A\r
+ QPos = 0;\r
+ unsigned APos = 0;\r
+ unsigned is = 0;\r
+ for (unsigned i = 0; i < SIZE(PathQA); ++i)\r
+ {\r
+ char c = PathQA[i];\r
+ if (c == 'M' || c == 'D')\r
+ {\r
+ unsigned isq = InsertCounts[QPos];\r
+ asserta(is <= isq);\r
+ for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
+ A3.push_back('-');\r
+ is = 0;\r
+ ++QPos;\r
+ }\r
+ if (c == 'M')\r
+ {\r
+ asserta(APos < LA);\r
+ A3.push_back(toupper(A[APos++]));\r
+ }\r
+ else if (c == 'D')\r
+ A3.push_back('-');\r
+ else if (c == 'I')\r
+ {\r
+ ++is;\r
+ asserta(APos < LA);\r
+ A3.push_back(toupper(A[APos++]));\r
+ }\r
+ }\r
+ asserta(is <= InsertCounts[LQ]);\r
+ for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
+ A3.push_back('-');\r
+ asserta(QPos == LQ);\r
+ asserta(APos == LA);\r
+\r
+// B\r
+ QPos = 0;\r
+ unsigned BPos = 0;\r
+ is = 0;\r
+ for (unsigned i = 0; i < SIZE(PathQB); ++i)\r
+ {\r
+ char c = PathQB[i];\r
+ if (c == 'M' || c == 'D')\r
+ {\r
+ asserta(is <= InsertCounts[QPos]);\r
+ for (unsigned i = 0; i < InsertCounts[QPos]-is; ++i)\r
+ B3.push_back('-');\r
+ is = 0;\r
+ ++QPos;\r
+ }\r
+ if (c == 'M')\r
+ {\r
+ asserta(BPos < LB);\r
+ B3.push_back(toupper(B[BPos++]));\r
+ }\r
+ else if (c == 'D')\r
+ B3.push_back('-');\r
+ else if (c == 'I')\r
+ {\r
+ ++is;\r
+ asserta(BPos < LB);\r
+ B3.push_back(toupper(B[BPos++]));\r
+ }\r
+ }\r
+ asserta(is <= InsertCounts[LQ]);\r
+ for (unsigned k = 0; k < InsertCounts[LQ]-is; ++k)\r
+ B3.push_back('-');\r
+ asserta(APos == LA);\r
+ asserta(BPos == LB);\r
+\r
+ asserta(SIZE(Q3) == SIZE(A3));\r
+ asserta(SIZE(Q3) == SIZE(B3));\r
+ }\r
exit(1);
}
}
-
+/**************************************************************************************************/
+int MothurOut::divideFile(string filename, int& proc, vector<string>& files) {
+ try{
+
+ vector<unsigned long int> filePos = divideFile(filename, proc);
+
+ for (int i = 0; i < (filePos.size()-1); i++) {
+
+ //read file chunk
+ ifstream in;
+ openInputFile(filename, in);
+ in.seekg(filePos[i]);
+ unsigned long int size = filePos[(i+1)] - filePos[i];
+ char* chunk = new char[size];
+ in.read(chunk, size);
+ in.close();
+
+ //open new file
+ string fileChunkName = filename + "." + toString(i) + ".tmp";
+ ofstream out;
+ openOutputFile(fileChunkName, out);
+
+ out << chunk << endl;
+ out.close();
+ delete[] chunk;
+
+ //save name
+ files.push_back(fileChunkName);
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "divideFile");
+ exit(1);
+ }
+}
/***********************************************************************/
bool MothurOut::isTrue(string f){
//functions from mothur.h
//file operations
vector<unsigned long int> divideFile(string, int&);
+ int divideFile(string, int&, vector<string>&);
vector<unsigned long int> setFilePosEachLine(string, int&);
vector<unsigned long int> setFilePosFasta(string, int&);
string sortFile(string, string);
--- /dev/null
+#include "myutils.h"\r
+#include "mx.h"\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+\r
+char ProbToChar(float p);\r
+\r
+list<MxBase *> *MxBase::m_Matrices = 0;\r
+unsigned MxBase::m_AllocCount;\r
+unsigned MxBase::m_ZeroAllocCount;\r
+unsigned MxBase::m_GrowAllocCount;\r
+double MxBase::m_TotalBytes;\r
+double MxBase::m_MaxBytes;\r
+\r
+static const char *LogizeStr(const char *s)\r
+ {\r
+ double d = atof(s);\r
+ d = log(d);\r
+ return TypeToStr<float>(float(d));\r
+ }\r
+\r
+static const char *ExpizeStr(const char *s)\r
+ {\r
+ double d = atof(s);\r
+ d = exp(d);\r
+ return TypeToStr<float>(float(d));\r
+ }\r
+\r
+void MxBase::OnCtor(MxBase *Mx)\r
+ {\r
+ if (m_Matrices == 0)\r
+ m_Matrices = new list<MxBase *>;\r
+ asserta(m_Matrices != 0);\r
+ m_Matrices->push_front(Mx);\r
+ }\r
+\r
+void MxBase::OnDtor(MxBase *Mx)\r
+ {\r
+ if (m_Matrices == 0)\r
+ {\r
+ Warning("MxBase::OnDtor, m_Matrices = 0");\r
+ return;\r
+ }\r
+ for (list<MxBase*>::iterator p = m_Matrices->begin();\r
+ p != m_Matrices->end(); ++p)\r
+ {\r
+ if (*p == Mx)\r
+ {\r
+ m_Matrices->erase(p);\r
+ if (m_Matrices->empty())\r
+ delete m_Matrices;\r
+ return;\r
+ }\r
+ }\r
+ Warning("MxBase::OnDtor, not found");\r
+ }\r
+\r
+//float **MxBase::Getf(const string &Name)\r
+// {\r
+// Mx<float> *m = (Mx<float> *) Get(Name);\r
+// asserta(m->GetTypeSize() == sizeof(float));\r
+// return m->GetData();\r
+// }\r
+//\r
+//double **MxBase::Getd(const string &Name)\r
+// {\r
+// Mx<double> *m = (Mx<double> *) Get(Name);\r
+// asserta(m->GetTypeSize() == sizeof(double));\r
+// return m->GetData();\r
+// }\r
+//\r
+//char **MxBase::Getc(const string &Name)\r
+// {\r
+// Mx<char> *m = (Mx<char> *) Get(Name);\r
+// asserta(m->GetTypeSize() == sizeof(char));\r
+// return m->GetData();\r
+// }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+ const SeqDB *DB, unsigned IdA, unsigned IdB)\r
+ {\r
+ Alloc(Name, RowCount, ColCount, DB, IdA, IdB, 0, 0);\r
+ }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+ const SeqData *SA, const SeqData *SB)\r
+ {\r
+ Alloc(Name, RowCount, ColCount, 0, UINT_MAX, UINT_MAX, SA, SB);\r
+ }\r
+\r
+void MxBase::Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+ const SeqDB *DB, unsigned IdA, unsigned IdB, const SeqData *SA, const SeqData *SB)\r
+ {\r
+ StartTimer(MxBase_Alloc);\r
+\r
+ ++m_AllocCount;\r
+ if (m_AllocatedRowCount == 0)\r
+ ++m_ZeroAllocCount;\r
+\r
+ if (DB != 0)\r
+ {\r
+ asserta(IdA != UINT_MAX);\r
+ asserta(IdB != UINT_MAX);\r
+ asserta(RowCount >= DB->GetSeqLength(IdA) + 1);\r
+ asserta(ColCount >= DB->GetSeqLength(IdB) + 1);\r
+ }\r
+ if (RowCount > m_AllocatedRowCount || ColCount > m_AllocatedColCount)\r
+ {\r
+ if (m_AllocatedRowCount > 0)\r
+ {\r
+ if (opt_logmemgrows)\r
+ Log("MxBase::Alloc grow %s %u x %u -> %u x %u, %s bytes\n",\r
+ Name, m_AllocatedRowCount, m_AllocatedColCount,\r
+ RowCount, ColCount,\r
+ IntToStr(GetBytes()));\r
+ ++m_GrowAllocCount;\r
+ }\r
+\r
+ m_TotalBytes -= GetBytes();\r
+\r
+ PauseTimer(MxBase_Alloc);\r
+ StartTimer(MxBase_FreeData);\r
+ FreeData();\r
+ EndTimer(MxBase_FreeData);\r
+ StartTimer(MxBase_Alloc);\r
+\r
+ unsigned N = max(RowCount + 16, m_AllocatedRowCount);\r
+ unsigned M = max(ColCount + 16, m_AllocatedColCount);\r
+ N = max(N, M);\r
+\r
+ PauseTimer(MxBase_Alloc);\r
+ StartTimer(MxBase_AllocData);\r
+ AllocData(N, N);\r
+ EndTimer(MxBase_AllocData);\r
+ StartTimer(MxBase_Alloc);\r
+\r
+ m_TotalBytes += GetBytes();\r
+ if (m_TotalBytes > m_MaxBytes)\r
+ m_MaxBytes = m_TotalBytes;\r
+ }\r
+ \r
+ unsigned n = sizeof(m_Name)-1;\r
+ strncpy(m_Name, Name, n);\r
+ m_Name[n] = 0;\r
+ m_RowCount = RowCount;\r
+ m_ColCount = ColCount;\r
+ m_SeqDB = DB;\r
+ m_IdA = IdA;\r
+ m_IdB = IdB;\r
+ m_SA = SA;\r
+ m_SB = SB;\r
+\r
+ EndTimer(MxBase_Alloc);\r
+ }\r
+\r
+void MxBase::LogMe(bool WithData, int Opts) const\r
+ {\r
+ Log("\n");\r
+ if (Opts & OPT_EXP)\r
+ Log("Exp ");\r
+ else if (Opts & OPT_LOG)\r
+ Log("Log ");\r
+ bool ZeroBased = ((Opts & OPT_ZERO_BASED) != 0);\r
+ Log("%s(%p) Rows %u/%u, Cols %u/%u",\r
+ m_Name, this,\r
+ m_RowCount, m_AllocatedRowCount,\r
+ m_ColCount, m_AllocatedColCount);\r
+ if (m_SeqDB != 0 && m_IdA != UINT_MAX)\r
+ Log(", A=%s", m_SeqDB->GetLabel(m_IdA));\r
+ else if (m_SA != 0)\r
+ Log(", A=%s", m_SA->Label);\r
+ if (m_SeqDB != 0 && m_IdB != UINT_MAX)\r
+ Log(", B=%s", m_SeqDB->GetLabel(m_IdB));\r
+ else if (m_SB != 0)\r
+ Log(", B=%s", m_SB->Label);\r
+ Log("\n");\r
+ if (!WithData || m_RowCount == 0 || m_ColCount == 0)\r
+ return;\r
+\r
+ const char *z = GetAsStr(0, 0);\r
+ unsigned Width = strlen(z);\r
+ unsigned Mod = 1;\r
+ for (unsigned i = 0; i < Width; ++i)\r
+ Mod *= 10;\r
+\r
+ if (m_Alpha[0] != 0)\r
+ {\r
+ Log("// Alphabet=%s\n", m_Alpha);\r
+ Log("// ");\r
+ unsigned n = strlen(m_Alpha);\r
+ for (unsigned j = 0; j < n; ++j)\r
+ Log(" %*c", Width, m_Alpha[j]);\r
+ Log("\n");\r
+ for (unsigned i = 0; i < n; ++i)\r
+ {\r
+ Log("/* %c */ {", m_Alpha[i]);\r
+ unsigned ci = m_Alpha[i];\r
+ for (unsigned j = 0; j < n; ++j)\r
+ {\r
+ unsigned cj = m_Alpha[j];\r
+ Log("%s,", GetAsStr(ci, cj));\r
+ }\r
+ Log("}, // %c\n", m_Alpha[i]);\r
+ }\r
+ return;\r
+ }\r
+ else if (m_Alpha2[0] != 0)\r
+ {\r
+ unsigned n = strlen(m_Alpha2);\r
+ Log("// Alphabet=%s\n", m_Alpha2);\r
+ Log("// ");\r
+ for (unsigned j = 0; j < n; ++j)\r
+ Log(" %*c", Width, m_Alpha2[j]);\r
+ Log("\n");\r
+ for (unsigned i = 0; i < n; ++i)\r
+ {\r
+ Log("/* %c */ {", m_Alpha2[i]);\r
+ unsigned ci = m_Alpha2[i];\r
+ for (unsigned j = 0; j < n; ++j)\r
+ Log("%s,", GetAsStr(i, j));\r
+ Log("}, // %c\n", m_Alpha2[i]);\r
+ }\r
+ return;\r
+ }\r
+\r
+ const byte *A = 0;\r
+ const byte *B = 0;\r
+ if (m_SeqDB != 0 && m_IdA != UINT_MAX)\r
+ A = m_SeqDB->GetSeq(m_IdA);\r
+ else if (m_SA != 0)\r
+ A = m_SA->Seq;\r
+ if (m_SeqDB != 0 && m_IdB != UINT_MAX)\r
+ B = m_SeqDB->GetSeq(m_IdB);\r
+ else if (m_SB != 0)\r
+ B = m_SB->Seq;\r
+\r
+ if (B != 0)\r
+ {\r
+ if (A != 0)\r
+ Log(" ");\r
+ Log("%5.5s", "");\r
+ if (ZeroBased)\r
+ for (unsigned j = 0; j < m_ColCount; ++j)\r
+ Log("%*c", Width, B[j]);\r
+ else\r
+ for (unsigned j = 0; j < m_ColCount; ++j)\r
+ Log("%*c", Width, j == 0 ? ' ' : B[j-1]);\r
+ Log("\n");\r
+ }\r
+\r
+ if (A != 0)\r
+ Log(" ");\r
+ Log("%5.5s", "");\r
+ for (unsigned j = 0; j < m_ColCount; ++j)\r
+ Log("%*u", Width, j%Mod);\r
+ Log("\n");\r
+\r
+ for (unsigned i = 0; i < m_RowCount; ++i)\r
+ {\r
+ if (A != 0)\r
+ {\r
+ if (ZeroBased)\r
+ Log("%c ", A[i]);\r
+ else\r
+ Log("%c ", i == 0 ? ' ' : A[i-1]);\r
+ }\r
+ Log("%4u ", i);\r
+ \r
+ for (unsigned j = 0; j < m_ColCount; ++j)\r
+ {\r
+ const char *s = GetAsStr(i, j);\r
+ if (Opts & OPT_LOG)\r
+ s = LogizeStr(s);\r
+ else if (Opts & OPT_EXP)\r
+ s = ExpizeStr(s);\r
+ Log("%s", s);\r
+ }\r
+ Log("\n");\r
+ }\r
+ }\r
+static unsigned g_MatrixFileCount;\r
+\r
+void MxBase::LogCounts()\r
+ {\r
+ Log("\n");\r
+ Log("MxBase::LogCounts()\n");\r
+ Log(" What N\n");\r
+ Log("---------- ----------\n");\r
+ Log(" Allocs %10u\n", m_AllocCount);\r
+ Log("ZeroAllocs %10u\n", m_ZeroAllocCount);\r
+ Log(" Grows %10u\n", m_GrowAllocCount);\r
+ Log(" Bytes %10.10s\n", MemBytesToStr(m_TotalBytes));\r
+ Log(" Max bytes %10.10s\n", MemBytesToStr(m_MaxBytes));\r
+ }\r
--- /dev/null
+#ifndef mx_h\r
+#define mx_h\r
+\r
+#include <list>\r
+#include <limits.h>\r
+#include <math.h>\r
+#include "timing.h"\r
+#include "myutils.h"\r
+\r
+const int OPT_LOG = 0x01;\r
+const int OPT_EXP = 0x02;\r
+const int OPT_ZERO_BASED = 0x04;\r
+const float MINUS_INFINITY = -9e9f;\r
+const float UNINIT = -8e8f;\r
+\r
+struct SeqData;\r
+\r
+template<class T> const char *TypeToStr(T t)\r
+ {\r
+ Die("Unspecialised TypeToStr() called");\r
+ ureturn(0);\r
+ }\r
+\r
+template<> inline const char *TypeToStr<unsigned short>(unsigned short f)\r
+ {\r
+ static char s[16];\r
+\r
+ sprintf(s, "%12u", f);\r
+ return s;\r
+ }\r
+\r
+template<> inline const char *TypeToStr<short>(short f)\r
+ {\r
+ static char s[16];\r
+\r
+ sprintf(s, "%12d", f);\r
+ return s;\r
+ }\r
+\r
+template<> inline const char *TypeToStr<int>(int f)\r
+ {\r
+ static char s[16];\r
+\r
+ sprintf(s, "%5d", f);\r
+ return s;\r
+ }\r
+\r
+template<> inline const char *TypeToStr<float>(float f)\r
+ {\r
+ static char s[16];\r
+\r
+ if (f == UNINIT)\r
+ sprintf(s, "%12.12s", "?");\r
+ else if (f < MINUS_INFINITY/2)\r
+ sprintf(s, "%12.12s", "*");\r
+ else if (f == 0.0f)\r
+ sprintf(s, "%12.12s", ".");\r
+ else if (f >= -1e5 && f <= 1e5)\r
+ sprintf(s, "%12.5f", f);\r
+ else\r
+ sprintf(s, "%12.4g", f);\r
+ return s;\r
+ }\r
+\r
+template<> inline const char *TypeToStr<double>(double f)\r
+ {\r
+ static char s[16];\r
+\r
+ if (f < -1e9)\r
+ sprintf(s, "%12.12s", "*");\r
+ else if (f == 0.0f)\r
+ sprintf(s, "%12.12s", ".");\r
+ else if (f >= -1e-5 && f <= 1e5)\r
+ sprintf(s, "%12.5f", f);\r
+ else\r
+ sprintf(s, "%12.4g", f);\r
+ return s;\r
+ }\r
+\r
+static inline const char *FloatToStr(float f, string &s)\r
+ {\r
+ s = TypeToStr<float>(f);\r
+ return s.c_str();\r
+ }\r
+\r
+template<> inline const char *TypeToStr<char>(char c)\r
+ {\r
+ static char s[2];\r
+ s[0] = c;\r
+ return s;\r
+ }\r
+\r
+template<> inline const char *TypeToStr<byte>(byte c)\r
+ {\r
+ static char s[2];\r
+ s[0] = c;\r
+ return s;\r
+ }\r
+\r
+template<> inline const char *TypeToStr<bool>(bool tof)\r
+ {\r
+ static char s[2];\r
+ s[0] = tof ? 'T' : 'F';\r
+ return s;\r
+ }\r
+\r
+struct SeqDB;\r
+\r
+struct MxBase\r
+ {\r
+private:\r
+ MxBase(const MxBase &rhs);\r
+ MxBase &operator=(const MxBase &rhs);\r
+\r
+public:\r
+ char m_Name[32];\r
+ char m_Alpha[32];\r
+ char m_Alpha2[32];\r
+ unsigned m_RowCount;\r
+ unsigned m_ColCount;\r
+ unsigned m_AllocatedRowCount;\r
+ unsigned m_AllocatedColCount;\r
+ const SeqDB *m_SeqDB;\r
+ unsigned m_IdA;\r
+ unsigned m_IdB;\r
+ const SeqData *m_SA;\r
+ const SeqData *m_SB;\r
+\r
+ static list<MxBase *> *m_Matrices;\r
+ //static MxBase *Get(const string &Name);\r
+ //static float **Getf(const string &Name);\r
+ //static double **Getd(const string &Name);\r
+ //static char **Getc(const string &Name);\r
+\r
+ static unsigned m_AllocCount;\r
+ static unsigned m_ZeroAllocCount;\r
+ static unsigned m_GrowAllocCount;\r
+ static double m_TotalBytes;\r
+ static double m_MaxBytes;\r
+\r
+ static void OnCtor(MxBase *Mx);\r
+ static void OnDtor(MxBase *Mx);\r
+\r
+ MxBase()\r
+ {\r
+ m_AllocatedRowCount = 0;\r
+ m_AllocatedColCount = 0;\r
+ m_RowCount = 0;\r
+ m_ColCount = 0;\r
+ m_IdA = UINT_MAX;\r
+ m_IdB = UINT_MAX;\r
+ m_SeqDB = 0;\r
+ OnCtor(this);\r
+ }\r
+ virtual ~MxBase()\r
+ {\r
+ OnDtor(this);\r
+ }\r
+\r
+ virtual unsigned GetTypeSize() const = 0;\r
+ virtual unsigned GetBytes() const = 0;\r
+\r
+ void Clear()\r
+ {\r
+ FreeData();\r
+ m_AllocatedRowCount = 0;\r
+ m_AllocatedColCount = 0;\r
+ m_RowCount = 0;\r
+ m_ColCount = 0;\r
+ m_IdA = UINT_MAX;\r
+ m_IdB = UINT_MAX;\r
+ m_SA = 0;\r
+ m_SB = 0;\r
+ }\r
+\r
+ bool Empty() const\r
+ {\r
+ return m_RowCount == 0;\r
+ }\r
+\r
+ virtual void AllocData(unsigned RowCount, unsigned ColCount) = 0;\r
+ virtual void FreeData() = 0;\r
+ virtual const char *GetAsStr(unsigned i, unsigned j) const = 0;\r
+\r
+ void SetAlpha(const char *Alpha)\r
+ {\r
+ unsigned n = sizeof(m_Alpha);\r
+ strncpy(m_Alpha, Alpha, n);\r
+ m_Alpha[n] = 0;\r
+ }\r
+\r
+ void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+ const SeqDB *DB, unsigned IdA, unsigned IdB,\r
+ const SeqData *SA, const SeqData *SB);\r
+\r
+ void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+ const SeqDB *DB = 0, unsigned IdA = UINT_MAX, unsigned IdB = UINT_MAX);\r
+\r
+ void Alloc(const char *Name, unsigned RowCount, unsigned ColCount,\r
+ const SeqData *SA, const SeqData *SB);\r
+\r
+ static void LogAll()\r
+ {\r
+ Log("\n");\r
+ if (m_Matrices == 0)\r
+ {\r
+ Log("MxBase::m_Matrices=0\n");\r
+ return;\r
+ }\r
+ Log("\n");\r
+ Log("AllRows AllCols Sz MB Name\n");\r
+ Log("------- ------- ---- -------- ----\n");\r
+ double TotalMB = 0;\r
+ for (list<MxBase *>::const_iterator p = m_Matrices->begin();\r
+ p != m_Matrices->end(); ++p)\r
+ {\r
+ const MxBase *Mx = *p;\r
+ if (Mx == 0)\r
+ continue;\r
+ //if (Mx->m_RowCount != 0 || ShowEmpty)\r
+ // Mx->LogMe(WithData);\r
+ unsigned ar = Mx->m_AllocatedRowCount;\r
+ if (ar == 0)\r
+ continue;\r
+ unsigned ac = Mx->m_AllocatedColCount;\r
+ unsigned sz = Mx->GetTypeSize();\r
+ double MB = (double) ar*(double) ac*(double) sz/1e6;\r
+ TotalMB += MB;\r
+ Log("%7u %7u %4u %8.2f %s\n", ar, ac, sz, MB, Mx->m_Name);\r
+ }\r
+ Log(" --------\n");\r
+ Log("%7.7s %7.7s %4.4s %8.2f\n", "", "", "", TotalMB);\r
+ }\r
+\r
+ void LogMe(bool WithData = true, int Opts = 0) const;\r
+ static void LogCounts();\r
+ };\r
+\r
+template<class T> struct Mx : public MxBase\r
+ {\r
+// Disable unimplemented stuff\r
+private:\r
+ Mx(Mx &rhs);\r
+ Mx &operator=(Mx &rhs);\r
+ // const Mx &operator=(const Mx &rhs) const;\r
+\r
+public:\r
+ T **m_Data;\r
+\r
+ Mx()\r
+ {\r
+ m_Data = 0;\r
+ }\r
+ \r
+ ~Mx()\r
+ {\r
+ FreeData();\r
+ }\r
+\r
+ virtual void AllocData(unsigned RowCount, unsigned ColCount)\r
+ {\r
+ if (opt_logmemgrows)\r
+ Log("MxBase::AllocData(%u,%u) %s bytes, Name=%s\n",\r
+ RowCount, ColCount, IntToStr(GetBytes()), m_Name);\r
+ // m_Data = myalloc<T *>(RowCount);\r
+ m_Data = MYALLOC(T *, RowCount, Mx);\r
+ for (unsigned i = 0; i < RowCount; ++i)\r
+ // m_Data[i] = myalloc<T>(ColCount);\r
+ m_Data[i] = MYALLOC(T, ColCount, Mx);\r
+ AddBytes("Mx_AllocData", RowCount*sizeof(T *) + RowCount*ColCount*sizeof(T));\r
+\r
+ m_AllocatedRowCount = RowCount;\r
+ m_AllocatedColCount = ColCount;\r
+ }\r
+\r
+ virtual void FreeData()\r
+ {\r
+ for (unsigned i = 0; i < m_AllocatedRowCount; ++i)\r
+ MYFREE(m_Data[i], m_AllocatedColCount, Mx);\r
+ MYFREE(m_Data, m_AllocatedRowCount, Mx);\r
+ SubBytes("Mx_AllocData",\r
+ m_AllocatedRowCount*sizeof(T *) + m_AllocatedRowCount*m_AllocatedColCount*sizeof(T));\r
+\r
+ m_Data = 0;\r
+ m_RowCount = 0;\r
+ m_ColCount = 0;\r
+ m_AllocatedRowCount = 0;\r
+ m_AllocatedColCount = 0;\r
+ }\r
+\r
+ T **GetData()\r
+ {\r
+ return (T **) m_Data;\r
+ }\r
+\r
+ T Get(unsigned i, unsigned j) const\r
+ {\r
+ assert(i < m_RowCount);\r
+ assert(j < m_ColCount);\r
+ return m_Data[i][j];\r
+ }\r
+\r
+ void Put(unsigned i, unsigned j, T x) const\r
+ {\r
+ assert(i < m_RowCount);\r
+ assert(j < m_ColCount);\r
+ m_Data[i][j] = x;\r
+ }\r
+\r
+ T GetOffDiagAvgs(vector<T> &Avgs) const\r
+ {\r
+ if (m_RowCount != m_ColCount)\r
+ Die("GetOffDiagAvgs, not symmetrical");\r
+ Avgs.clear();\r
+ T Total = T(0);\r
+ for (unsigned i = 0; i < m_RowCount; ++i)\r
+ {\r
+ T Sum = T(0);\r
+ for (unsigned j = 0; j < m_ColCount; ++j)\r
+ {\r
+ if (j == i)\r
+ continue;\r
+ Sum += m_Data[i][j];\r
+ }\r
+ T Avg = Sum/(m_RowCount-1);\r
+ Total += Avg;\r
+ Avgs.push_back(Avg);\r
+ }\r
+ return m_RowCount == 0 ? T(0) : Total/m_RowCount;\r
+ }\r
+\r
+ unsigned GetTypeSize() const\r
+ {\r
+ return sizeof(T);\r
+ }\r
+\r
+ virtual unsigned GetBytes() const\r
+ {\r
+ return m_AllocatedRowCount*m_AllocatedColCount*GetTypeSize() +\r
+ m_AllocatedRowCount*sizeof(T *);\r
+ }\r
+\r
+ const char *GetAsStr(unsigned i, unsigned j) const\r
+ {\r
+ return TypeToStr<T>(Get(i, j));\r
+ }\r
+\r
+ const T *const *const GetData() const\r
+ {\r
+ return (const T *const *) m_Data;\r
+ }\r
+\r
+ void Copy(const Mx<T> &rhs)\r
+ {\r
+ Alloc("Copy", rhs.m_RowCount, rhs.m_ColCount, rhs.m_SeqDB, rhs.m_IdA, rhs.m_IdB);\r
+ const T * const *Data = rhs.GetData();\r
+ for (unsigned i = 0; i < m_RowCount; ++i)\r
+ for (unsigned j = 0; j < m_ColCount; ++j)\r
+ m_Data[i][j] = Data[i][j];\r
+ }\r
+\r
+ void Assign(T v)\r
+ {\r
+ for (unsigned i = 0; i < m_RowCount; ++i)\r
+ for (unsigned j = 0; j < m_ColCount; ++j)\r
+ m_Data[i][j] = v;\r
+ }\r
+\r
+ bool Eq(const Mx &rhs, bool Bwd = false) const\r
+ {\r
+ if (rhs.m_ColCount != m_ColCount)\r
+ return false;\r
+ if (rhs.m_RowCount != m_RowCount)\r
+ return false;\r
+ const T * const*d = rhs.GetData();\r
+ int i1 = Bwd ? m_RowCount : 0;\r
+ int j1 = Bwd ? m_ColCount : 0;\r
+ int i2 = Bwd ? -1 : m_RowCount;\r
+ int j2 = Bwd ? -1 : m_ColCount;\r
+ for (int i = i1; i != i2; Bwd ? --i : ++i)\r
+ for (int j = j1; j != j2; Bwd ? --j : ++j)\r
+ {\r
+ float x = m_Data[i][j];\r
+ float y = d[i][j];\r
+ if (x < -1e10 && y < -1e10)\r
+ continue;\r
+ if (!feq(x, y))\r
+ {\r
+ Warning("%s[%d][%d] = %g, %s = %g",\r
+ m_Name, i, j, x, rhs.m_Name, y);\r
+ return false;\r
+ }\r
+ }\r
+ return true;\r
+ }\r
+\r
+ bool EqMask(const Mx &rhs, const Mx<bool> &Mask) const\r
+ {\r
+ if (rhs.m_ColCount != m_ColCount)\r
+ return false;\r
+ if (rhs.m_RowCount != m_RowCount)\r
+ return false;\r
+\r
+ if (Mask.m_ColCount != m_ColCount)\r
+ return false;\r
+ if (Mask.m_RowCount != m_RowCount)\r
+ return false;\r
+\r
+ const T * const*d = rhs.GetData();\r
+ bool Bwd = false;\r
+ int i1 = Bwd ? m_RowCount : 0;\r
+ int j1 = Bwd ? m_ColCount : 0;\r
+ int i2 = Bwd ? -1 : m_RowCount;\r
+ int j2 = Bwd ? -1 : m_ColCount;\r
+ for (int i = i1; i != i2; Bwd ? --i : ++i)\r
+ for (int j = j1; j != j2; Bwd ? --j : ++j)\r
+ {\r
+ if (!Mask.m_Data[i][j])\r
+ continue;\r
+ float x = m_Data[i][j];\r
+ float y = d[i][j];\r
+ if (x < -1e10 && y < -1e10)\r
+ continue;\r
+ if (!feq(x, y))\r
+ {\r
+ Warning("%s[%d][%d] = %g, %s = %g",\r
+ m_Name, i, j, x, rhs.m_Name, y);\r
+ return false;\r
+ }\r
+ }\r
+ return true;\r
+ }\r
+\r
+ void Init(T v)\r
+ {\r
+ for (unsigned i = 0; i < m_RowCount; ++i)\r
+ for (unsigned j = 0; j < m_ColCount; ++j)\r
+ m_Data[i][j] = v;\r
+ }\r
+ };\r
+\r
+void WriteMx(const string &Name, Mx<float> &Mxf);\r
+\r
+template<class T> void ReserveMx(Mx<T> &Mxf, unsigned N = UINT_MAX)\r
+ {\r
+ if (Mxf.m_AllocatedRowCount > 0)\r
+ return;\r
+ extern unsigned g_MaxInputSeqLength;\r
+ if (N == UINT_MAX)\r
+ N = g_MaxInputSeqLength+1;\r
+ Mxf.Alloc("(Reserved)", N, N);\r
+ }\r
+\r
+#endif // mx_h\r
--- /dev/null
+#ifndef MY_VERSION\r
+#define MY_VERSION "4.2"\r
+#endif\r
+\r
+STR_OPT( input, 0)\r
+STR_OPT( query, 0)\r
+STR_OPT( db, 0)\r
+STR_OPT( sort, 0)\r
+STR_OPT( output, 0)\r
+STR_OPT( uc, 0)\r
+STR_OPT( clstr2uc, 0)\r
+STR_OPT( uc2clstr, 0)\r
+STR_OPT( uc2fasta, 0)\r
+STR_OPT( uc2fastax, 0)\r
+STR_OPT( mergesort, 0)\r
+STR_OPT( tmpdir, ".")\r
+STR_OPT( staralign, 0)\r
+STR_OPT( sortuc, 0)\r
+STR_OPT( blastout, 0)\r
+STR_OPT( blast6out, 0)\r
+STR_OPT( fastapairs, 0)\r
+STR_OPT( idchar, "|")\r
+STR_OPT( diffchar, " ")\r
+STR_OPT( uchime, 0)\r
+STR_OPT( gapopen, 0)\r
+STR_OPT( gapext, 0)\r
+STR_OPT( uhire, 0)\r
+STR_OPT( ids, "99,98,95,90,85,80,70,50,35")\r
+STR_OPT( seeds, 0)\r
+STR_OPT( clump, 0)\r
+STR_OPT( clumpout, 0)\r
+STR_OPT( clump2fasta, 0)\r
+STR_OPT( clumpfasta, 0)\r
+STR_OPT( hireout, 0)\r
+STR_OPT( mergeclumps, 0)\r
+STR_OPT( alpha, 0)\r
+STR_OPT( hspalpha, 0)\r
+STR_OPT( probmx, 0)\r
+STR_OPT( matrix, 0)\r
+STR_OPT( tracestate, 0)\r
+STR_OPT( chainout, 0)\r
+STR_OPT( cluster, 0)\r
+STR_OPT( computekl, 0)\r
+STR_OPT( userout, 0)\r
+STR_OPT( userfields, 0)\r
+STR_OPT( seedsout, 0)\r
+STR_OPT( chainhits, 0)\r
+STR_OPT( findorfs, 0)\r
+STR_OPT( strand, 0)\r
+STR_OPT( getseqs, 0)\r
+STR_OPT( labels, 0)\r
+STR_OPT( doug, 0)\r
+STR_OPT( makeindex, 0)\r
+STR_OPT( indexstats, 0)\r
+STR_OPT( uchimeout, 0)\r
+STR_OPT( uchimealns, 0)\r
+STR_OPT( xframe, 0)\r
+STR_OPT( mkctest, 0)\r
+STR_OPT( allpairs, 0)\r
+STR_OPT( fastq2fasta, 0)\r
+STR_OPT( otusort, 0)\r
+STR_OPT( sparsedist, 0)\r
+STR_OPT( sparsedistparams, 0)\r
+STR_OPT( mcc, 0)\r
+STR_OPT( utax, 0)\r
+STR_OPT( simcl, 0)\r
+STR_OPT( absort, 0)\r
+STR_OPT( cc, 0)\r
+STR_OPT( uslink, 0)\r
+\r
+UNS_OPT( band, 16, 0, UINT_MAX)\r
+UNS_OPT( minlen, 10, 1, UINT_MAX)\r
+UNS_OPT( maxlen, 10000, 1, UINT_MAX)\r
+UNS_OPT( w, 0, 1, UINT_MAX)\r
+UNS_OPT( k, 0, 1, UINT_MAX)\r
+UNS_OPT( stepwords, 8, 0, UINT_MAX)\r
+UNS_OPT( maxaccepts, 1, 0, UINT_MAX)\r
+UNS_OPT( maxrejects, 8, 0, UINT_MAX)\r
+UNS_OPT( maxtargets, 0, 0, UINT_MAX)\r
+UNS_OPT( minhsp, 32, 1, UINT_MAX)\r
+UNS_OPT( bump, 50, 0, 100)\r
+UNS_OPT( rowlen, 64, 8, UINT_MAX)\r
+UNS_OPT( idprefix, 0, 0, UINT_MAX)\r
+UNS_OPT( idsuffix, 0, 0, UINT_MAX)\r
+UNS_OPT( chunks, 4, 2, UINT_MAX)\r
+UNS_OPT( minchunk, 64, 2, UINT_MAX)\r
+UNS_OPT( maxclump, 1000, 1, UINT_MAX)\r
+UNS_OPT( iddef, 0, 0, UINT_MAX)\r
+UNS_OPT( mincodons, 20, 1, UINT_MAX)\r
+UNS_OPT( maxovd, 8, 0, UINT_MAX)\r
+UNS_OPT( max2, 40, 0, UINT_MAX)\r
+UNS_OPT( querylen, 500, 0, UINT_MAX)\r
+UNS_OPT( targetlen, 500, 0, UINT_MAX)\r
+UNS_OPT( orfstyle, (1+2+4), 0, UINT_MAX)\r
+UNS_OPT( dbstep, 1, 1, UINT_MAX)\r
+UNS_OPT( randseed, 1, 0, UINT_MAX)\r
+UNS_OPT( maxp, 2, 2, UINT_MAX)\r
+UNS_OPT( idsmoothwindow, 32, 1, UINT_MAX)\r
+UNS_OPT( mindiffs, 3, 1, UINT_MAX)\r
+UNS_OPT( maxspan1, 24, 1, UINT_MAX)\r
+UNS_OPT( maxspan2, 24, 1, UINT_MAX)\r
+UNS_OPT( minorfcov, 16, 1, UINT_MAX)\r
+UNS_OPT( hashsize, 4195879, 1, UINT_MAX)\r
+UNS_OPT( maxpoly, 0, 0, UINT_MAX)\r
+UNS_OPT( droppct, 50, 0, 100)\r
+UNS_OPT( secs, 10, 0, UINT_MAX)\r
+UNS_OPT( maxqgap, 0, 0, UINT_MAX)\r
+UNS_OPT( maxtgap, 0, 0, UINT_MAX)\r
+\r
+INT_OPT( frame, 0, -3, +3)\r
+\r
+TOG_OPT( trace, false)\r
+TOG_OPT( logmemgrows, false)\r
+TOG_OPT( trunclabels, false)\r
+TOG_OPT( verbose, false)\r
+TOG_OPT( wordcountreject, true)\r
+TOG_OPT( rev, false)\r
+TOG_OPT( output_rejects, false)\r
+TOG_OPT( blast_termgaps, false)\r
+TOG_OPT( fastalign, true)\r
+TOG_OPT( flushuc, false)\r
+TOG_OPT( stable_sort, false)\r
+TOG_OPT( minus_frames, true)\r
+TOG_OPT( usort, true)\r
+TOG_OPT( nb, false)\r
+TOG_OPT( twohit, true)\r
+TOG_OPT( ssort, false)\r
+TOG_OPT( log_query, false)\r
+TOG_OPT( log_hothits, false)\r
+TOG_OPT( logwordstats, false)\r
+TOG_OPT( ucl, false)\r
+TOG_OPT( skipgaps2, true)\r
+TOG_OPT( skipgaps, true)\r
+TOG_OPT( denovo, false)\r
+TOG_OPT( cartoon_orfs, false)\r
+TOG_OPT( label_ab, false)\r
+TOG_OPT( wordweight, false)\r
+TOG_OPT( isort, false)\r
+TOG_OPT( selfid, false)\r
+TOG_OPT( leftjust, false)\r
+TOG_OPT( rightjust, false)\r
+\r
+FLT_OPT( id, 0.0, 0.0, 1.0)\r
+FLT_OPT( weak_id, 0.0, 0.0, 1.0)\r
+FLT_OPT( match, 1.0, 0.0, FLT_MAX)\r
+FLT_OPT( mismatch, -2.0, 0.0, FLT_MAX)\r
+FLT_OPT( split, 1000.0, 1.0, FLT_MAX)\r
+FLT_OPT( evalue, 10.0, 0.0, FLT_MAX)\r
+FLT_OPT( weak_evalue, 10.0, 0.0, FLT_MAX)\r
+FLT_OPT( evalue_g, 10.0, 0.0, FLT_MAX)\r
+FLT_OPT( chain_evalue, 10.0, 0.0, FLT_MAX)\r
+FLT_OPT( xdrop_u, 16.0, 0.0, FLT_MAX)\r
+FLT_OPT( xdrop_g, 32.0, 0.0, FLT_MAX)\r
+FLT_OPT( xdrop_ug, 16.0, 0.0, FLT_MAX)\r
+FLT_OPT( xdrop_nw, 16.0, 0.0, FLT_MAX)\r
+FLT_OPT( ka_gapped_lambda, 0.0, 0.0, FLT_MAX)\r
+FLT_OPT( ka_ungapped_lambda, 0.0, 0.0, FLT_MAX)\r
+FLT_OPT( ka_gapped_k, 0.0, 0.0, FLT_MAX)\r
+FLT_OPT( ka_ungapped_k, 0.0, 0.0, FLT_MAX)\r
+FLT_OPT( ka_dbsize, 0.0, 0.0, FLT_MAX)\r
+FLT_OPT( chain_targetfract, 0.0, 0.0, 1.0)\r
+FLT_OPT( targetfract, 0.0, 0.0, 1.0)\r
+FLT_OPT( queryfract, 0.0, 0.0, 1.0)\r
+FLT_OPT( fspenalty, 16.0, 0.0, FLT_MAX)\r
+FLT_OPT( sspenalty, 20.0, 0.0, FLT_MAX)\r
+FLT_OPT( seedt1, 13.0, 0.0, FLT_MAX)\r
+FLT_OPT( seedt2, 11.0, 0.0, FLT_MAX)\r
+FLT_OPT( lopen, 11.0, 0.0, FLT_MAX)\r
+FLT_OPT( lext, 1.0, 0.0, FLT_MAX)\r
+FLT_OPT( minh, 0.3, 0.0, FLT_MAX)\r
+FLT_OPT( xn, 8.0, 0.0, FLT_MAX)\r
+FLT_OPT( dn, 1.4, 0.0, FLT_MAX)\r
+FLT_OPT( xa, 1.0, 0.0, FLT_MAX)\r
+FLT_OPT( mindiv, 0.5, 0.0, 100.0)\r
+FLT_OPT( abskew, 2, 0.0, 100.0)\r
+FLT_OPT( abx, 8.0, 0.0, 100.0)\r
+FLT_OPT( minspanratio1, 0.7, 0.0, 1.0)\r
+FLT_OPT( minspanratio2, 0.7, 0.0, 1.0)\r
+\r
+FLAG_OPT( usersort)\r
+FLAG_OPT( exact)\r
+FLAG_OPT( optimal)\r
+FLAG_OPT( self)\r
+FLAG_OPT( ungapped)\r
+FLAG_OPT( global)\r
+FLAG_OPT( local)\r
+FLAG_OPT( xlat)\r
+FLAG_OPT( realign)\r
+FLAG_OPT( hash)\r
+FLAG_OPT( derep)\r
--- /dev/null
+#include <time.h>\r
+#include <stdarg.h>\r
+#include <sys/stat.h>\r
+#include <errno.h>\r
+#include <string.h>\r
+#include <ctype.h>\r
+#include <string>\r
+#include <vector>\r
+#include <set>\r
+#include <map>\r
+#include <signal.h>\r
+#include <float.h>\r
+\r
+#ifdef _MSC_VER\r
+#include <crtdbg.h>\r
+#include <process.h>\r
+#include <windows.h>\r
+#include <psapi.h>\r
+#include <io.h>\r
+#else\r
+#include <sys/time.h>\r
+#include <sys/resource.h>\r
+#include <unistd.h>\r
+#include <errno.h>\r
+#include <fcntl.h>\r
+#include <stdlib.h>\r
+#endif\r
+\r
+#include "myutils.h"\r
+\r
+const char *SVN_VERSION =\r
+#include "svnversion.h"\r
+;\r
+\r
+#define TEST_UTILS 0\r
+\r
+using namespace std;\r
+\r
+const unsigned MY_IO_BUFSIZ = 32000;\r
+const unsigned MAX_FORMATTED_STRING_LENGTH = 64000;\r
+\r
+static char *g_IOBuffers[256];\r
+static time_t g_StartTime = time(0);\r
+static vector<string> g_Argv;\r
+static double g_PeakMemUseBytes;\r
+\r
+#if TEST_UTILS\r
+void TestUtils()\r
+ {\r
+ const int C = 100000000;\r
+ for (int i = 0; i < C; ++i)\r
+ ProgressStep(i, C, "something or other");\r
+\r
+ Progress("\n");\r
+ Progress("Longer message\r");\r
+ Sleep(1000);\r
+ Progress("Short\r");\r
+ Sleep(1000);\r
+ Progress("And longer again\r");\r
+ Sleep(1000);\r
+ Progress("Shrt\n");\r
+ Sleep(1000);\r
+ const unsigned N = 10;\r
+ unsigned M = 10;\r
+ for (unsigned i = 0; i < N; ++i)\r
+ {\r
+ ProgressStep(i, N, "Allocating 1MB blocks");\r
+ for (unsigned j = 0; j < M; ++j)\r
+ {\r
+ ProgressStep(j, M, "Inner loop"); \r
+ malloc(100000);\r
+ Sleep(500);\r
+ }\r
+ }\r
+ }\r
+#endif // TEST_UTILS\r
+\r
+static void AllocBuffer(FILE *f)\r
+ {\r
+ int fd = fileno(f);\r
+ if (fd < 0 || fd >= 256)\r
+ return;\r
+ if (g_IOBuffers[fd] == 0)\r
+ g_IOBuffers[fd] = myalloc(char, MY_IO_BUFSIZ);\r
+ setvbuf(f, g_IOBuffers[fd], _IOFBF, MY_IO_BUFSIZ);\r
+ }\r
+\r
+static void FreeBuffer(FILE *f)\r
+ {\r
+ int fd = fileno(f);\r
+ if (fd < 0 || fd >= 256)\r
+ return;\r
+ if (g_IOBuffers[fd] == 0)\r
+ return;\r
+ myfree(g_IOBuffers[fd]);\r
+ g_IOBuffers[fd] = 0;\r
+ }\r
+\r
+unsigned GetElapsedSecs()\r
+ {\r
+ return (unsigned) (time(0) - g_StartTime);\r
+ }\r
+\r
+static unsigned g_NewCalls;\r
+static unsigned g_FreeCalls;\r
+static double g_InitialMemUseBytes;\r
+static double g_TotalAllocBytes;\r
+static double g_TotalFreeBytes;\r
+static double g_NetBytes;\r
+static double g_MaxNetBytes;\r
+\r
+void LogAllocStats()\r
+ {\r
+ Log("\n");\r
+ Log(" Allocs %u\n", g_NewCalls);\r
+ Log(" Frees %u\n", g_FreeCalls);\r
+ Log("Initial alloc %s\n", MemBytesToStr(g_InitialMemUseBytes));\r
+ Log(" Total alloc %s\n", MemBytesToStr(g_TotalAllocBytes));\r
+ Log(" Total free %s\n", MemBytesToStr(g_TotalFreeBytes));\r
+ Log(" Net bytes %s\n", MemBytesToStr(g_NetBytes));\r
+ Log("Max net bytes %s\n", MemBytesToStr(g_MaxNetBytes));\r
+ Log(" Peak total %s\n", MemBytesToStr(g_MaxNetBytes + g_InitialMemUseBytes));\r
+ }\r
+\r
+bool StdioFileExists(const string &FileName)\r
+ {\r
+ struct stat SD;\r
+ int i = stat(FileName.c_str(), &SD);\r
+ return i == 0;\r
+ }\r
+\r
+void myassertfail(const char *Exp, const char *File, unsigned Line)\r
+ {\r
+ Die("%s(%u) assert failed: %s", File, Line, Exp);\r
+ }\r
+\r
+bool myisatty(int fd)\r
+ {\r
+ return isatty(fd) != 0;\r
+ }\r
+\r
+#ifdef _MSC_VER\r
+#include <io.h>\r
+int fseeko(FILE *stream, off_t offset, int whence)\r
+ {\r
+ off_t FilePos = _fseeki64(stream, offset, whence);\r
+ return (FilePos == -1L) ? -1 : 0;\r
+ }\r
+#define ftello(fm) (off_t) _ftelli64(fm)\r
+#endif\r
+\r
+void LogStdioFileState(FILE *f)\r
+ {\r
+ unsigned long tellpos = (unsigned long) ftello(f);\r
+ long fseek_pos = fseek(f, 0, SEEK_CUR);\r
+ int fd = fileno(f);\r
+ Log("FILE * %p\n", f);\r
+ Log("fileno %d\n", fd);\r
+ Log("feof %d\n", feof(f));\r
+ Log("ferror %d\n", ferror(f));\r
+ Log("ftell %ld\n", tellpos);\r
+ Log("fseek %ld\n", fseek_pos);\r
+#if !defined(_GNU_SOURCE) && !defined(__APPLE_CC__)\r
+ fpos_t fpos;\r
+ int fgetpos_retval = fgetpos(f, &fpos);\r
+ Log("fpos %ld (retval %d)\n", (long) fpos, fgetpos_retval);\r
+// Log("eof %d\n", _eof(fd));\r
+#endif\r
+#ifdef _MSC_VER\r
+ __int64 pos64 = _ftelli64(f);\r
+ Log("_ftelli64 %lld\n", pos64);\r
+#endif\r
+ }\r
+\r
+FILE *OpenStdioFile(const string &FileName)\r
+ {\r
+ const char *Mode = "rb";\r
+ FILE *f = fopen(FileName.c_str(), Mode);\r
+ if (f == 0)\r
+ {\r
+ if (errno == EFBIG)\r
+ {\r
+ if (sizeof(off_t) == 4)\r
+ Die("File too big, off_t is 32 bits, recompile needed");\r
+ else\r
+ Die("Cannot open '%s', file too big (off_t=%u bits)",\r
+ FileName.c_str(), sizeof(off_t)*8);\r
+ }\r
+ Die("Cannot open %s, errno=%d %s",\r
+ FileName.c_str(), errno, strerror(errno));\r
+ }\r
+ AllocBuffer(f);\r
+ return f;\r
+ }\r
+\r
+FILE *CreateStdioFile(const string &FileName)\r
+ {\r
+ FILE *f = fopen(FileName.c_str(), "wb+");\r
+ if (0 == f)\r
+ Die("Cannot create %s, errno=%d %s",\r
+ FileName.c_str(), errno, strerror(errno));\r
+ AllocBuffer(f);\r
+ return f;\r
+ }\r
+\r
+void SetStdioFilePos(FILE *f, off_t Pos)\r
+ {\r
+ if (0 == f)\r
+ Die("SetStdioFilePos failed, f=NULL");\r
+ int Ok = fseeko(f, Pos, SEEK_SET);\r
+ off_t NewPos = ftello(f);\r
+ if (Ok != 0 || Pos != NewPos)\r
+ {\r
+ LogStdioFileState(f);\r
+ Die("SetStdioFilePos(%d) failed, Ok=%d NewPos=%d",\r
+ (int) Pos, Ok, (int) NewPos);\r
+ }\r
+ }\r
+\r
+void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes)\r
+ {\r
+ if (0 == f)\r
+ Die("ReadStdioFile failed, f=NULL");\r
+ SetStdioFilePos(f, Pos);\r
+ unsigned BytesRead = fread(Buffer, 1, Bytes, f);\r
+ if (BytesRead != Bytes)\r
+ {\r
+ LogStdioFileState(f);\r
+ Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d",\r
+ (int) Bytes, (int) BytesRead, errno);\r
+ }\r
+ }\r
+\r
+void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes)\r
+ {\r
+ if (0 == f)\r
+ Die("ReadStdioFile failed, f=NULL");\r
+ unsigned BytesRead = fread(Buffer, 1, Bytes, f);\r
+ if (BytesRead != Bytes)\r
+ {\r
+ LogStdioFileState(f);\r
+ Die("ReadStdioFile failed, attempted %d bytes, read %d bytes, errno=%d",\r
+ (int) Bytes, (int) BytesRead, errno);\r
+ }\r
+ }\r
+\r
+// Return values from functions like lseek, ftell, fgetpos are\r
+// "undefined" for files that cannot seek. Attempt to detect\r
+// whether a file can seek by checking for error returns.\r
+bool CanSetStdioFilePos(FILE *f)\r
+ {\r
+// Common special cases\r
+ if (f == stdin || f == stdout || f == stderr)\r
+ return false;\r
+\r
+ fpos_t CurrPos;\r
+ int ok1 = fgetpos(f, &CurrPos);\r
+ if (ok1 < 0)\r
+ return false;\r
+ int ok2 = fseek(f, 0, SEEK_END);\r
+ if (ok2 < 0)\r
+ return false;\r
+ fpos_t EndPos;\r
+ int ok3 = fgetpos(f, &EndPos);\r
+ int ok4 = fsetpos(f, &CurrPos);\r
+ if (!ok3 || !ok4)\r
+ return false;\r
+ return true;\r
+ }\r
+\r
+byte *ReadAllStdioFile(FILE *f, unsigned &FileSize)\r
+ {\r
+ const unsigned BUFF_SIZE = 1024*1024;\r
+\r
+ if (CanSetStdioFilePos(f))\r
+ {\r
+ off_t Pos = GetStdioFilePos(f);\r
+ off_t FileSize = GetStdioFileSize(f);\r
+ if (FileSize > UINT_MAX)\r
+ Die("ReadAllStdioFile: file size > UINT_MAX");\r
+ SetStdioFilePos(f, 0);\r
+ byte *Buffer = myalloc(byte, unsigned(FileSize));\r
+ ReadStdioFile(f, Buffer, unsigned(FileSize));\r
+ SetStdioFilePos(f, Pos);\r
+ FileSize = unsigned(FileSize);\r
+ return Buffer;\r
+ }\r
+\r
+// Can't seek, read one buffer at a time.\r
+ FileSize = 0;\r
+\r
+// Just to initialize so that first call to realloc works.\r
+ byte *Buffer = (byte *) malloc(4);\r
+ if (Buffer == 0)\r
+ Die("ReadAllStdioFile, out of memory");\r
+ for (;;)\r
+ {\r
+ Buffer = (byte *) realloc(Buffer, FileSize + BUFF_SIZE);\r
+ unsigned BytesRead = fread(Buffer + FileSize, 1, BUFF_SIZE, f);\r
+ FileSize += BytesRead;\r
+ if (BytesRead < BUFF_SIZE)\r
+ {\r
+ Buffer = (byte *) realloc(Buffer, FileSize);\r
+ return Buffer;\r
+ }\r
+ }\r
+ }\r
+\r
+byte *ReadAllStdioFile(const std::string &FileName, off_t &FileSize)\r
+ {\r
+#if WIN32\r
+ FILE *f = OpenStdioFile(FileName);\r
+ FileSize = GetStdioFileSize(f);\r
+ CloseStdioFile(f);\r
+\r
+ HANDLE h = CreateFile(FileName.c_str(), GENERIC_READ, FILE_SHARE_READ,\r
+ NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);\r
+ if (h == INVALID_HANDLE_VALUE)\r
+ Die("ReadAllStdioFile:Open(%s) failed", FileName.c_str());\r
+\r
+ unsigned uFileSize = (unsigned) FileSize;\r
+ if ((off_t) uFileSize != FileSize)\r
+ Die("File too big (%.1f Gb): %s", double(FileSize)/1e9, FileName.c_str());\r
+\r
+ byte *Buffer = myalloc(byte, uFileSize);\r
+ DWORD BytesRead;\r
+ ReadFile(h, Buffer, uFileSize, &BytesRead, NULL);\r
+ if (FileSize != BytesRead)\r
+ Die("ReadAllStdioFile:Error reading %s, attempted %u got %u",\r
+ FileName.c_str(), FileSize, (unsigned) BytesRead);\r
+\r
+ CloseHandle(h);\r
+ return Buffer;\r
+#else\r
+ int h = open(FileName.c_str(), O_RDONLY);\r
+ if (h < 0)\r
+ Die("ReadAllStdioFile:Cannot open %s", FileName.c_str());\r
+ FileSize = lseek(h, 0, SEEK_END);\r
+ if (FileSize == (off_t) (-1))\r
+ Die("ReadAllStdioFile:Error seeking %s", FileName.c_str());\r
+ // byte *Buffer = myalloc<byte>(FileSize);\r
+ size_t stBytes = (size_t) FileSize;\r
+ if ((off_t) stBytes != FileSize)\r
+ Die("ReadAllStdioFile: off_t overflow");\r
+ byte *Buffer = (byte *) malloc(stBytes);\r
+ if (Buffer == 0)\r
+ Die("ReadAllStdioFile: failed to allocate %s", MemBytesToStr(stBytes));\r
+ lseek(h, 0, SEEK_SET);\r
+ size_t n = read(h, Buffer, stBytes);\r
+ if (n != FileSize)\r
+ Die("ReadAllStdioFile, Error reading %s, attempted %g got %g",\r
+ FileName.c_str(), (double) FileSize, (double) n);\r
+ close(h);\r
+ return Buffer;\r
+#endif\r
+ }\r
+\r
+void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes)\r
+ {\r
+ if (0 == f)\r
+ Die("WriteStdioFile failed, f=NULL");\r
+ SetStdioFilePos(f, Pos);\r
+ unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f);\r
+ if (BytesWritten != Bytes)\r
+ {\r
+ LogStdioFileState(f);\r
+ Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d",\r
+ (int) Bytes, (int) BytesWritten, errno);\r
+ }\r
+ }\r
+\r
+void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes)\r
+ {\r
+ if (0 == f)\r
+ Die("WriteStdioFile failed, f=NULL");\r
+ unsigned BytesWritten = fwrite(Buffer, 1, Bytes, f);\r
+ if (BytesWritten != Bytes)\r
+ {\r
+ LogStdioFileState(f);\r
+ Die("WriteStdioFile failed, attempted %d bytes, wrote %d bytes, errno=%d",\r
+ (int) Bytes, (int) BytesWritten, errno);\r
+ }\r
+ }\r
+\r
+// Return false on EOF, true if line successfully read.\r
+bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes)\r
+ {\r
+ if (feof(f))\r
+ return false;\r
+ if ((int) Bytes < 0)\r
+ Die("ReadLineStdioFile: Bytes < 0");\r
+ char *RetVal = fgets(Line, (int) Bytes, f);\r
+ if (NULL == RetVal)\r
+ {\r
+ if (feof(f))\r
+ return false;\r
+ if (ferror(f))\r
+ Die("ReadLineStdioFile: errno=%d", errno);\r
+ Die("ReadLineStdioFile: fgets=0, feof=0, ferror=0");\r
+ }\r
+\r
+ if (RetVal != Line)\r
+ Die("ReadLineStdioFile: fgets != Buffer");\r
+ unsigned n = strlen(Line);\r
+ if (n < 1 || Line[n-1] != '\n')\r
+ Die("ReadLineStdioFile: line too long or missing end-of-line");\r
+ if (n > 0 && (Line[n-1] == '\r' || Line[n-1] == '\n'))\r
+ Line[n-1] = 0;\r
+ if (n > 1 && (Line[n-2] == '\r' || Line[n-2] == '\n'))\r
+ Line[n-2] = 0;\r
+ return true;\r
+ }\r
+\r
+// Return false on EOF, true if line successfully read.\r
+bool ReadLineStdioFile(FILE *f, string &Line)\r
+ {\r
+ Line.clear();\r
+ for (;;)\r
+ {\r
+ int c = fgetc(f);\r
+ if (c == -1)\r
+ {\r
+ if (feof(f))\r
+ {\r
+ if (!Line.empty())\r
+ return true;\r
+ return false;\r
+ }\r
+ Die("ReadLineStdioFile, errno=%d", errno);\r
+ }\r
+ if (c == '\r')\r
+ continue;\r
+ if (c == '\n')\r
+ return true;\r
+ Line.push_back((char) c);\r
+ }\r
+ }\r
+\r
+// Copies all of fFrom regardless of current\r
+// file position, appends to fTo.\r
+void AppendStdioFileToFile(FILE *fFrom, FILE *fTo)\r
+ {\r
+ off_t SavedFromPos = GetStdioFilePos(fFrom);\r
+ off_t FileSize = GetStdioFileSize(fFrom);\r
+ const off_t BUFF_SIZE = 1024*1024;\r
+ char *Buffer = myalloc(char, BUFF_SIZE);\r
+ SetStdioFilePos(fFrom, 0);\r
+ off_t BytesRemaining = FileSize;\r
+ while (BytesRemaining > 0)\r
+ {\r
+ off_t BytesToRead = BytesRemaining;\r
+ if (BytesToRead > BUFF_SIZE)\r
+ BytesToRead = BUFF_SIZE;\r
+ ReadStdioFile(fFrom, Buffer, (unsigned) BytesToRead);\r
+ WriteStdioFile(fTo, Buffer, (unsigned) BytesToRead);\r
+ BytesRemaining -= BytesToRead;\r
+ }\r
+ SetStdioFilePos(fFrom, SavedFromPos);\r
+ }\r
+\r
+void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo)\r
+ {\r
+ int Ok = rename(FileNameFrom.c_str(), FileNameTo.c_str());\r
+ if (Ok != 0)\r
+ Die("RenameStdioFile(%s,%s) failed, errno=%d %s",\r
+ FileNameFrom.c_str(), FileNameTo.c_str(), errno, strerror(errno));\r
+ }\r
+\r
+void FlushStdioFile(FILE *f)\r
+ {\r
+ int Ok = fflush(f);\r
+ if (Ok != 0)\r
+ Die("fflush(%p)=%d,", f, Ok);\r
+ }\r
+\r
+void CloseStdioFile(FILE *f)\r
+ {\r
+ if (f == 0)\r
+ return;\r
+ int Ok = fclose(f);\r
+ if (Ok != 0)\r
+ Die("fclose(%p)=%d", f, Ok);\r
+ FreeBuffer(f);\r
+ }\r
+\r
+off_t GetStdioFilePos(FILE *f)\r
+ {\r
+ off_t FilePos = ftello(f);\r
+ if (FilePos < 0)\r
+ Die("ftello=%d", (int) FilePos);\r
+ return FilePos;\r
+ }\r
+\r
+off_t GetStdioFileSize(FILE *f)\r
+ {\r
+ off_t CurrentPos = GetStdioFilePos(f);\r
+ int Ok = fseeko(f, 0, SEEK_END);\r
+ if (Ok < 0)\r
+ Die("fseek in GetFileSize");\r
+\r
+ off_t Length = ftello(f);\r
+ if (Length < 0)\r
+ Die("ftello in GetFileSize");\r
+ SetStdioFilePos(f, CurrentPos);\r
+ return Length;\r
+ }\r
+\r
+void DeleteStdioFile(const string &FileName)\r
+ {\r
+ int Ok = remove(FileName.c_str());\r
+ if (Ok != 0)\r
+ Die("remove(%s) failed, errno=%d %s", FileName.c_str(), errno, strerror(errno));\r
+ }\r
+\r
+void myvstrprintf(string &Str, const char *Format, va_list ArgList)\r
+ {\r
+ static char szStr[MAX_FORMATTED_STRING_LENGTH];\r
+ vsnprintf(szStr, MAX_FORMATTED_STRING_LENGTH-1, Format, ArgList);\r
+ szStr[MAX_FORMATTED_STRING_LENGTH - 1] = '\0';\r
+ Str.assign(szStr);\r
+ }\r
+\r
+void myvstrprintf(string &Str, const char *Format, ...)\r
+ {\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ myvstrprintf(Str, Format, ArgList);\r
+ va_end(ArgList);\r
+ }\r
+\r
+FILE *g_fLog = 0;\r
+\r
+void SetLogFileName(const string &FileName)\r
+ {\r
+ if (g_fLog != 0)\r
+ CloseStdioFile(g_fLog);\r
+ g_fLog = 0;\r
+ if (FileName.empty())\r
+ return;\r
+ g_fLog = CreateStdioFile(FileName);\r
+ }\r
+\r
+void Log(const char *Format, ...)\r
+ {\r
+ if (g_fLog == 0)\r
+ return;\r
+\r
+ static bool InLog = false;\r
+ if (InLog)\r
+ return;\r
+\r
+ InLog = true;\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ vfprintf(g_fLog, Format, ArgList);\r
+ va_end(ArgList);\r
+ fflush(g_fLog);\r
+ InLog = false;\r
+ }\r
+\r
+void Die(const char *Format, ...)\r
+ {\r
+ static bool InDie = false;\r
+ if (InDie)\r
+ exit(1);\r
+ InDie = true;\r
+ string Msg;\r
+\r
+ if (g_fLog != 0)\r
+ setbuf(g_fLog, 0);\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ myvstrprintf(Msg, Format, ArgList);\r
+ va_end(ArgList);\r
+\r
+ fprintf(stderr, "\n\n");\r
+ Log("\n");\r
+ time_t t = time(0);\r
+ Log("%s", asctime(localtime(&t)));\r
+ for (unsigned i = 0; i < g_Argv.size(); i++)\r
+ {\r
+ fprintf(stderr, (i == 0) ? "%s" : " %s", g_Argv[i].c_str());\r
+ Log((i == 0) ? "%s" : " %s", g_Argv[i].c_str());\r
+ }\r
+ fprintf(stderr, "\n");\r
+ Log("\n");\r
+\r
+ time_t CurrentTime = time(0);\r
+ unsigned ElapsedSeconds = unsigned(CurrentTime - g_StartTime);\r
+ const char *sstr = SecsToStr(ElapsedSeconds);\r
+ Log("Elapsed time: %s\n", sstr);\r
+\r
+ const char *szStr = Msg.c_str();\r
+ fprintf(stderr, "\n---Fatal error---\n%s\n", szStr);\r
+ Log("\n---Fatal error---\n%s\n", szStr);\r
+\r
+#ifdef _MSC_VER\r
+ if (IsDebuggerPresent())\r
+ __debugbreak();\r
+ _CrtSetDbgFlag(0);\r
+#endif\r
+\r
+ exit(1);\r
+ }\r
+\r
+void Warning(const char *Format, ...)\r
+ {\r
+ string Msg;\r
+\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ myvstrprintf(Msg, Format, ArgList);\r
+ va_end(ArgList);\r
+\r
+ const char *szStr = Msg.c_str();\r
+\r
+ fprintf(stderr, "\nWARNING: %s\n", szStr);\r
+ if (g_fLog != stdout)\r
+ {\r
+ Log("\nWARNING: %s\n", szStr);\r
+ fflush(g_fLog);\r
+ }\r
+ }\r
+\r
+#ifdef _MSC_VER\r
+double GetMemUseBytes()\r
+ {\r
+ HANDLE hProc = GetCurrentProcess();\r
+ PROCESS_MEMORY_COUNTERS PMC;\r
+ BOOL bOk = GetProcessMemoryInfo(hProc, &PMC, sizeof(PMC));\r
+ if (!bOk)\r
+ return 1000000;\r
+ double Bytes = (double) PMC.WorkingSetSize;\r
+ if (Bytes > g_PeakMemUseBytes)\r
+ g_PeakMemUseBytes = Bytes;\r
+ return Bytes;\r
+ }\r
+#elif linux || __linux__\r
+double GetMemUseBytes()\r
+ {\r
+ static char statm[64];\r
+ static int PageSize = 1;\r
+ if (0 == statm[0])\r
+ {\r
+ PageSize = sysconf(_SC_PAGESIZE);\r
+ pid_t pid = getpid();\r
+ sprintf(statm, "/proc/%d/statm", (int) pid);\r
+ }\r
+\r
+ int fd = open(statm, O_RDONLY);\r
+ if (-1 == fd)\r
+ return 1000000;\r
+ char Buffer[64];\r
+ int n = read(fd, Buffer, sizeof(Buffer) - 1);\r
+ close(fd);\r
+ fd = -1;\r
+\r
+ if (n <= 0)\r
+ return 1000000;\r
+\r
+ Buffer[n] = 0;\r
+ double Pages = atof(Buffer);\r
+\r
+ double Bytes = Pages*PageSize;\r
+ if (Bytes > g_PeakMemUseBytes)\r
+ g_PeakMemUseBytes = Bytes;\r
+ return Bytes;\r
+ }\r
+#elif defined(__MACH__)\r
+#include <memory.h>\r
+#include <stdlib.h>\r
+#include <stdio.h>\r
+#include <unistd.h>\r
+#include <sys/types.h>\r
+#include <sys/sysctl.h>\r
+#include <sys/socket.h>\r
+#include <sys/gmon.h>\r
+#include <mach/vm_param.h>\r
+#include <netinet/in.h>\r
+#include <netinet/icmp6.h>\r
+#include <sys/vmmeter.h>\r
+#include <sys/proc.h>\r
+#include <mach/task_info.h>\r
+#include <mach/task.h>\r
+#include <mach/mach_init.h>\r
+#include <mach/vm_statistics.h>\r
+\r
+#define DEFAULT_MEM_USE 100000000.0\r
+\r
+double GetMemUseBytes()\r
+ {\r
+ task_t mytask = mach_task_self();\r
+ struct task_basic_info ti;\r
+ memset((void *) &ti, 0, sizeof(ti));\r
+ mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT;\r
+ kern_return_t ok = task_info(mytask, TASK_BASIC_INFO, (task_info_t) &ti, &count);\r
+ if (ok == KERN_INVALID_ARGUMENT)\r
+ return DEFAULT_MEM_USE;\r
+\r
+ if (ok != KERN_SUCCESS)\r
+ return DEFAULT_MEM_USE;\r
+\r
+ double Bytes = (double ) ti.resident_size;\r
+ if (Bytes > g_PeakMemUseBytes)\r
+ g_PeakMemUseBytes = Bytes;\r
+ return Bytes;\r
+ }\r
+#else\r
+double GetMemUseBytes()\r
+ {\r
+ return 0;\r
+ }\r
+#endif\r
+\r
+double GetPeakMemUseBytes()\r
+ {\r
+ return g_PeakMemUseBytes;\r
+ }\r
+\r
+const char *SecsToHHMMSS(int Secs)\r
+ {\r
+ int HH = Secs/3600;\r
+ int MM = (Secs - HH*3600)/60;\r
+ int SS = Secs%60;\r
+ static char Str[16];\r
+ if (HH == 0)\r
+ sprintf(Str, "%02d:%02d", MM, SS);\r
+ else\r
+ sprintf(Str, "%02d:%02d:%02d", HH, MM, SS);\r
+ return Str;\r
+ }\r
+\r
+const char *SecsToStr(double Secs)\r
+ {\r
+ if (Secs >= 10.0)\r
+ return SecsToHHMMSS((int) Secs);\r
+\r
+ static char Str[16];\r
+ if (Secs < 1e-6)\r
+ sprintf(Str, "%.2gs", Secs);\r
+ else if (Secs < 1e-3)\r
+ sprintf(Str, "%.2fms", Secs*1e3);\r
+ else\r
+ sprintf(Str, "%.3fs", Secs);\r
+ return Str;\r
+ }\r
+\r
+const char *MemBytesToStr(double Bytes)\r
+ {\r
+ static char Str[32];\r
+\r
+ if (Bytes < 1e6)\r
+ sprintf(Str, "%.1fkb", Bytes/1e3);\r
+ else if (Bytes < 10e6)\r
+ sprintf(Str, "%.1fMb", Bytes/1e6);\r
+ else if (Bytes < 1e9)\r
+ sprintf(Str, "%.0fMb", Bytes/1e6);\r
+ else if (Bytes < 10e9)\r
+ sprintf(Str, "%.1fGb", Bytes/1e9);\r
+ else if (Bytes < 100e9)\r
+ sprintf(Str, "%.0fGb", Bytes/1e9);\r
+ else\r
+ sprintf(Str, "%.3gb", Bytes);\r
+ return Str;\r
+ }\r
+\r
+const char *IntToStr(unsigned i)\r
+ {\r
+ static char Str[32];\r
+\r
+ double d = (double) i;\r
+ if (i < 10000)\r
+ sprintf(Str, "%u", i);\r
+ else if (i < 1e6)\r
+ sprintf(Str, "%.1fk", d/1e3);\r
+ else if (i < 10e6)\r
+ sprintf(Str, "%.1fM", d/1e6);\r
+ else if (i < 1e9)\r
+ sprintf(Str, "%.0fM", d/1e6);\r
+ else if (i < 10e9)\r
+ sprintf(Str, "%.1fG", d/1e9);\r
+ else if (i < 100e9)\r
+ sprintf(Str, "%.0fG", d/1e9);\r
+ else\r
+ sprintf(Str, "%.3g", d);\r
+ return Str;\r
+ }\r
+\r
+const char *FloatToStr(double d)\r
+ {\r
+ static char Str[32];\r
+\r
+ double a = fabs(d);\r
+ if (a < 0.01)\r
+ sprintf(Str, "%.3g", a);\r
+ else if (a >= 0.01 && a < 1)\r
+ sprintf(Str, "%.3f", a);\r
+ else if (a <= 10 && a >= 1)\r
+ {\r
+ double intpart;\r
+ if (modf(a, &intpart) < 0.05)\r
+ sprintf(Str, "%.0f", d);\r
+ else\r
+ sprintf(Str, "%.1f", d);\r
+ }\r
+ else if (a > 10 && a < 10000)\r
+ sprintf(Str, "%.0f", d);\r
+ else if (a < 1e6)\r
+ sprintf(Str, "%.1fk", d/1e3);\r
+ else if (a < 10e6)\r
+ sprintf(Str, "%.1fM", d/1e6);\r
+ else if (a < 1e9)\r
+ sprintf(Str, "%.0fM", d/1e6);\r
+ else if (a < 10e9)\r
+ sprintf(Str, "%.1fG", d/1e9);\r
+ else if (a < 100e9)\r
+ sprintf(Str, "%.0fG", d/1e9);\r
+ else\r
+ sprintf(Str, "%.3g", d);\r
+ return Str;\r
+ }\r
+\r
+bool opt_quiet = false;\r
+bool opt_version = false;\r
+bool opt_logopts = false;\r
+bool opt_compilerinfo = false;\r
+bool opt_help = false;\r
+string opt_log = "";\r
+\r
+bool optset_quiet = false;\r
+bool optset_version = false;\r
+bool optset_logopts = false;\r
+bool optset_compilerinfo = false;\r
+bool optset_help = false;\r
+bool optset_log = false;\r
+\r
+static string g_CurrentProgressLine;\r
+static string g_ProgressDesc;\r
+static unsigned g_ProgressIndex;\r
+static unsigned g_ProgressCount;\r
+\r
+static unsigned g_CurrProgressLineLength;\r
+static unsigned g_LastProgressLineLength;\r
+static unsigned g_CountsInterval;\r
+static unsigned g_StepCalls;\r
+static time_t g_TimeLastOutputStep;\r
+\r
+static string &GetProgressPrefixStr(string &s)\r
+ {\r
+ double Bytes = GetMemUseBytes();\r
+ unsigned Secs = GetElapsedSecs();\r
+ s = string(SecsToHHMMSS(Secs));\r
+ if (Bytes > 0)\r
+ {\r
+ s.push_back(' ');\r
+ char Str[32];\r
+ sprintf(Str, "%5.5s", MemBytesToStr(Bytes));\r
+ s += string(Str);\r
+ }\r
+ s.push_back(' ');\r
+ return s;\r
+ }\r
+\r
+void ProgressLog(const char *Format, ...)\r
+ {\r
+ string Str;\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ myvstrprintf(Str, Format, ArgList);\r
+ va_end(ArgList);\r
+\r
+ Log("%s", Str.c_str());\r
+ Progress("%s", Str.c_str());\r
+ }\r
+\r
+void Progress(const char *Format, ...)\r
+ {\r
+ if (opt_quiet)\r
+ return;\r
+\r
+ string Str;\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ myvstrprintf(Str, Format, ArgList);\r
+ va_end(ArgList);\r
+\r
+#if 0\r
+ Log("Progress(");\r
+ for (unsigned i = 0; i < Str.size(); ++i)\r
+ {\r
+ char c = Str[i];\r
+ if (c == '\r')\r
+ Log("\\r");\r
+ else if (c == '\n')\r
+ Log("\\n");\r
+ else\r
+ Log("%c", c);\r
+ }\r
+ Log(")\n");\r
+#endif //0\r
+\r
+ for (unsigned i = 0; i < Str.size(); ++i)\r
+ {\r
+ if (g_CurrProgressLineLength == 0)\r
+ {\r
+ string s;\r
+ GetProgressPrefixStr(s);\r
+ for (unsigned j = 0; j < s.size(); ++j)\r
+ {\r
+ fputc(s[j], stderr);\r
+ ++g_CurrProgressLineLength;\r
+ }\r
+ }\r
+\r
+ char c = Str[i];\r
+ if (c == '\n' || c == '\r')\r
+ {\r
+ for (unsigned j = g_CurrProgressLineLength; j < g_LastProgressLineLength; ++j)\r
+ fputc(' ', stderr);\r
+ if (c == '\n')\r
+ g_LastProgressLineLength = 0;\r
+ else\r
+ g_LastProgressLineLength = g_CurrProgressLineLength;\r
+ g_CurrProgressLineLength = 0;\r
+ fputc(c, stderr);\r
+ }\r
+ else\r
+ {\r
+ fputc(c, stderr);\r
+ ++g_CurrProgressLineLength;\r
+ }\r
+ }\r
+ }\r
+\r
+void ProgressExit()\r
+ {\r
+ time_t Now = time(0);\r
+ struct tm *t = localtime(&Now);\r
+ const char *s = asctime(t);\r
+ unsigned Secs = GetElapsedSecs();\r
+\r
+ Log("\n");\r
+ Log("Finished %s", s); // there is a newline in s\r
+ Log("Elapsed time %s\n", SecsToHHMMSS((int) Secs));\r
+ Log("Max memory %s\n", MemBytesToStr(g_PeakMemUseBytes));\r
+#if WIN32 && DEBUG\r
+// Skip exit(), which can be very slow in DEBUG build\r
+// VERY DANGEROUS practice, because it skips global destructors.\r
+// But if you know the rules, you can break 'em, right?\r
+ ExitProcess(0);\r
+#endif\r
+ }\r
+\r
+const char *PctStr(double x, double y)\r
+ {\r
+ if (y == 0)\r
+ {\r
+ if (x == 0)\r
+ return "100%";\r
+ else\r
+ return "inf%";\r
+ }\r
+ static char Str[16];\r
+ double p = x*100.0/y;\r
+ sprintf(Str, "%5.1f%%", p);\r
+ return Str;\r
+ }\r
+\r
+string &GetProgressLevelStr(string &s)\r
+ {\r
+ unsigned Index = g_ProgressIndex;\r
+ unsigned Count = g_ProgressCount;\r
+ if (Count == UINT_MAX)\r
+ {\r
+ if (Index == UINT_MAX)\r
+ s = "100%";\r
+ else\r
+ {\r
+ char Tmp[16];\r
+ sprintf(Tmp, "%u", Index); \r
+ s = Tmp;\r
+ }\r
+ }\r
+ else\r
+ s = string(PctStr(Index+1, Count));\r
+ s += string(" ") + g_ProgressDesc;\r
+ return s;\r
+ }\r
+\r
+void ProgressStep(unsigned i, unsigned N, const char *Format, ...)\r
+ {\r
+ if (opt_quiet)\r
+ return;\r
+\r
+ if (i == 0)\r
+ {\r
+ string Str;\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ myvstrprintf(Str, Format, ArgList);\r
+ va_end(ArgList);\r
+ g_ProgressDesc = Str;\r
+ g_ProgressIndex = 0;\r
+ g_ProgressCount = N;\r
+ g_CountsInterval = 1;\r
+ g_StepCalls = 0;\r
+ g_TimeLastOutputStep = 0;\r
+ if (g_CurrProgressLineLength > 0)\r
+ Progress("\n");\r
+ }\r
+\r
+ if (i >= N && i != UINT_MAX)\r
+ Die("ProgressStep(%u,%u)", i, N);\r
+ bool IsLastStep = (i == UINT_MAX || i + 1 == N);\r
+ if (!IsLastStep)\r
+ {\r
+ ++g_StepCalls;\r
+ if (g_StepCalls%g_CountsInterval != 0)\r
+ return;\r
+\r
+ time_t Now = time(0);\r
+ if (Now == g_TimeLastOutputStep)\r
+ {\r
+ if (g_CountsInterval < 128)\r
+ g_CountsInterval = (g_CountsInterval*3)/2;\r
+ else\r
+ g_CountsInterval += 64;\r
+ return;\r
+ }\r
+ else\r
+ {\r
+ time_t Secs = Now - g_TimeLastOutputStep;\r
+ if (Secs > 1)\r
+ g_CountsInterval = unsigned(g_CountsInterval/(Secs*8));\r
+ }\r
+\r
+ if (g_CountsInterval < 1)\r
+ g_CountsInterval = 1;\r
+\r
+ g_TimeLastOutputStep = Now;\r
+ }\r
+\r
+ g_ProgressIndex = i;\r
+\r
+ if (i > 0)\r
+ {\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ myvstrprintf(g_ProgressDesc, Format, ArgList);\r
+ }\r
+\r
+ string LevelStr;\r
+ GetProgressLevelStr(LevelStr);\r
+ Progress(" %s\r", LevelStr.c_str());\r
+\r
+ if (IsLastStep)\r
+ {\r
+ g_CountsInterval = 1;\r
+ fputc('\n', stderr);\r
+ }\r
+ }\r
+\r
+enum OptType\r
+ {\r
+ OT_Flag,\r
+ OT_Tog,\r
+ OT_Int,\r
+ OT_Uns,\r
+ OT_Str,\r
+ OT_Float,\r
+ OT_Enum\r
+ };\r
+\r
+struct OptInfo\r
+ {\r
+ void *Value;\r
+ bool *OptSet;\r
+ string LongName;\r
+ OptType Type;\r
+ int iMin;\r
+ int iMax;\r
+ unsigned uMin;\r
+ unsigned uMax;\r
+ double dMin;\r
+ double dMax;\r
+ map<string, unsigned> EnumValues;\r
+\r
+ bool bDefault;\r
+ int iDefault;\r
+ unsigned uDefault;\r
+ double dDefault;\r
+ string strDefault;\r
+\r
+ string Help;\r
+\r
+ bool operator<(const OptInfo &rhs) const\r
+ {\r
+ return LongName < rhs.LongName;\r
+ }\r
+ };\r
+\r
+static set<OptInfo> g_Opts;\r
+\r
+void Help()\r
+ {\r
+ printf("\n");\r
+\r
+ void Usage();\r
+ Usage();\r
+\r
+ for (set<OptInfo>::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p)\r
+ {\r
+ const OptInfo &Opt = *p;\r
+\r
+ printf("\n");\r
+ string LongName = Opt.LongName.c_str();\r
+ if (Opt.Type == OT_Tog)\r
+ LongName = string("[no]") + LongName;\r
+ printf(" --%s ", LongName.c_str());\r
+\r
+ switch (Opt.Type)\r
+ {\r
+ case OT_Flag:\r
+ break;\r
+ case OT_Tog:\r
+ break;\r
+ case OT_Int:\r
+ printf("<int>");\r
+ break;\r
+ case OT_Uns:\r
+ printf("<uint>");\r
+ break;\r
+ case OT_Str:\r
+ printf("<str>");\r
+ break;\r
+ case OT_Float:\r
+ printf("<float>");\r
+ break;\r
+ case OT_Enum:\r
+ printf("<enum>");\r
+ break;\r
+ default:\r
+ printf("??type");\r
+ break;\r
+ }\r
+\r
+ printf(" ");\r
+ const string &s = Opt.Help;\r
+ for (string::const_iterator q = s.begin(); q != s.end(); ++q)\r
+ {\r
+ char c = *q;\r
+ if (c == '\n')\r
+ printf("\n ");\r
+ else\r
+ printf("%c", c);\r
+ }\r
+ printf("\n");\r
+ }\r
+ printf("\n");\r
+ exit(0);\r
+ }\r
+\r
+void CmdLineErr(const char *Format, ...)\r
+ {\r
+ va_list ArgList;\r
+ va_start(ArgList, Format);\r
+ string Str;\r
+ myvstrprintf(Str, Format, ArgList);\r
+ va_end(ArgList);\r
+ fprintf(stderr, "\n");\r
+ fprintf(stderr, "Invalid command line\n");\r
+ fprintf(stderr, "%s\n", Str.c_str());\r
+ fprintf(stderr, "For list of command-line options use --help.\n");\r
+ fprintf(stderr, "\n");\r
+ exit(1);\r
+ }\r
+\r
+static set<OptInfo>::iterator GetOptInfo(const string &LongName,\r
+ bool ErrIfNotFound)\r
+ {\r
+ for (set<OptInfo>::iterator p = g_Opts.begin();\r
+ p != g_Opts.end(); ++p)\r
+ {\r
+ const OptInfo &Opt = *p;\r
+ if (Opt.LongName == LongName)\r
+ return p;\r
+ if (Opt.Type == OT_Tog && "no" + Opt.LongName == LongName)\r
+ return p;\r
+ }\r
+ if (ErrIfNotFound)\r
+ CmdLineErr("Option --%s is invalid", LongName.c_str());\r
+ return g_Opts.end();\r
+ }\r
+\r
+static void AddOpt(const OptInfo &Opt)\r
+ {\r
+ if (GetOptInfo(Opt.LongName, false) != g_Opts.end())\r
+ Die("Option --%s defined twice", Opt.LongName.c_str());\r
+ g_Opts.insert(Opt);\r
+ }\r
+\r
+#ifdef _MSC_VER\r
+#pragma warning(disable: 4505) // unreferenced local function\r
+#endif\r
+\r
+static void DefineFlagOpt(const string &LongName, const string &Help,\r
+ void *Value, bool *OptSet)\r
+ {\r
+ *(bool *) Value = false;\r
+\r
+ OptInfo Opt;\r
+ Opt.Value = Value;\r
+ Opt.OptSet = OptSet;\r
+ Opt.LongName = LongName;\r
+ Opt.bDefault = false;\r
+ Opt.Help = Help;\r
+ Opt.Type = OT_Flag;\r
+ AddOpt(Opt);\r
+ }\r
+\r
+static void DefineTogOpt(const string &LongName, bool Default, const string &Help,\r
+ void *Value, bool *OptSet)\r
+ {\r
+ *(bool *) Value = Default;\r
+\r
+ OptInfo Opt;\r
+ Opt.Value = Value;\r
+ Opt.OptSet = OptSet;\r
+ Opt.LongName = LongName;\r
+ Opt.bDefault = Default;\r
+ Opt.Help = Help;\r
+ Opt.Type = OT_Tog;\r
+ AddOpt(Opt);\r
+ }\r
+\r
+static void DefineIntOpt(const string &LongName, int Default, int Min, int Max,\r
+ const string &Help, void *Value, bool *OptSet)\r
+ {\r
+ *(int *) Value = Default;\r
+\r
+ OptInfo Opt;\r
+ Opt.Value = Value;\r
+ Opt.OptSet = OptSet;\r
+ Opt.LongName = LongName;\r
+ Opt.iDefault = Default;\r
+ Opt.iMin = Min;\r
+ Opt.iMax = Max;\r
+ Opt.Help = Help;\r
+ Opt.Type = OT_Int;\r
+ AddOpt(Opt);\r
+ }\r
+\r
+static void DefineUnsOpt(const string &LongName, unsigned Default, unsigned Min,\r
+ unsigned Max, const string &Help, void *Value, bool *OptSet)\r
+ {\r
+ *(unsigned *) Value = Default;\r
+\r
+ OptInfo Opt;\r
+ Opt.Value = Value;\r
+ Opt.OptSet = OptSet;\r
+ Opt.LongName = LongName;\r
+ Opt.uDefault = Default;\r
+ Opt.uMin = Min;\r
+ Opt.uMax = Max;\r
+ Opt.Help = Help;\r
+ Opt.Type = OT_Uns;\r
+ AddOpt(Opt);\r
+ }\r
+\r
+static void DefineFloatOpt(const string &LongName, double Default, double Min,\r
+ double Max, const string &Help, void *Value, bool *OptSet)\r
+ {\r
+ *(double *) Value = Default;\r
+\r
+ OptInfo Opt;\r
+ Opt.Value = Value;\r
+ Opt.OptSet = OptSet;\r
+ Opt.LongName = LongName;\r
+ Opt.dDefault = Default;\r
+ Opt.dMin = Min;\r
+ Opt.dMax = Max;\r
+ Opt.Help = Help;\r
+ Opt.Type = OT_Float;\r
+ AddOpt(Opt);\r
+ }\r
+\r
+static void DefineStrOpt(const string &LongName, const char *Default,\r
+ const string &Help, void *Value, bool *OptSet)\r
+ {\r
+ *(string *) Value = (Default == 0 ? "" : string(Default));\r
+\r
+ OptInfo Opt;\r
+ Opt.Value = Value;\r
+ Opt.OptSet = OptSet;\r
+ Opt.LongName = LongName;\r
+ Opt.strDefault = (Default == 0 ? "" : string(Default));\r
+ Opt.Help = Help;\r
+ Opt.Type = OT_Str;\r
+ AddOpt(Opt);\r
+ }\r
+\r
+static void ParseEnumValues(const string &Values, map<string, unsigned> &EnumValues)\r
+ {\r
+ EnumValues.clear();\r
+ \r
+ string Name;\r
+ string Value;\r
+ bool Eq = false;\r
+ for (string::const_iterator p = Values.begin(); ; ++p)\r
+ {\r
+ char c = (p == Values.end() ? '|' : *p);\r
+ if (isspace(c))\r
+ ;\r
+ else if (c == '|')\r
+ {\r
+ if (EnumValues.find(Name) != EnumValues.end())\r
+ Die("Invalid enum values, '%s' defined twice: '%s'",\r
+ Name.c_str(), Values.c_str());\r
+ if (Name.empty() || Value.empty())\r
+ Die("Invalid enum values, empty name or value: '%s'",\r
+ Values.c_str());\r
+\r
+ EnumValues[Name] = atoi(Value.c_str());\r
+ Name.clear();\r
+ Value.clear();\r
+ Eq = false;\r
+ }\r
+ else if (c == '=')\r
+ Eq = true;\r
+ else if (Eq)\r
+ Value.push_back(c);\r
+ else\r
+ Name.push_back(c);\r
+ if (p == Values.end())\r
+ return;\r
+ }\r
+ }\r
+\r
+static void DefineEnumOpt(const string &LongName, const string &ShortName,\r
+ int Default, const string &Values, const string &Help, void *Value)\r
+ {\r
+ *(int *) Value = Default;\r
+\r
+ OptInfo Opt;\r
+ Opt.Value = Value;\r
+ Opt.LongName = LongName;\r
+ Opt.iDefault = Default;\r
+ Opt.Help = Help;\r
+ Opt.Type = OT_Enum;\r
+ ParseEnumValues(Values, Opt.EnumValues);\r
+ AddOpt(Opt);\r
+ }\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+#define FLAG_OPT(LongName) bool opt_##LongName; bool optset_##LongName;\r
+#define TOG_OPT(LongName, Default) bool opt_##LongName; bool optset_##LongName;\r
+#define INT_OPT(LongName, Default, Min, Max) int opt_##LongName; bool optset_##LongName;\r
+#define UNS_OPT(LongName, Default, Min, Max) unsigned opt_##LongName; bool optset_##LongName;\r
+#define FLT_OPT(LongName, Default, Min, Max) double opt_##LongName; bool optset_##LongName;\r
+#define STR_OPT(LongName, Default) string opt_##LongName; bool optset_##LongName;\r
+#define ENUM_OPT(LongName, Values, Default) int opt_##LongName; bool optset_##LongName;\r
+#include "myopts.h"\r
+\r
+static int EnumStrToInt(const OptInfo &Opt, const string &Value)\r
+ {\r
+ const map<string, unsigned> &e = Opt.EnumValues;\r
+ string s;\r
+ for (map<string, unsigned>::const_iterator p = e.begin(); p != e.end(); ++p)\r
+ {\r
+ if (Value == p->first)\r
+ return p->second;\r
+ s += " " + p->first;\r
+ }\r
+ CmdLineErr("--%s %s not recognized, valid are: %s",\r
+ Opt.LongName.c_str(), Value.c_str(), s.c_str());\r
+ ureturn(-1);\r
+ }\r
+\r
+static void SetOpt(OptInfo &Opt, const string &Value)\r
+ {\r
+ *Opt.OptSet = true;\r
+ switch (Opt.Type)\r
+ {\r
+ case OT_Int:\r
+ {\r
+ *(int *) Opt.Value = atoi(Value.c_str());\r
+ break;\r
+ }\r
+ case OT_Uns:\r
+ {\r
+ unsigned uValue = 0;\r
+ int n = sscanf(Value.c_str(), "%u", &uValue);\r
+ if (n != 1)\r
+ CmdLineErr("Invalid value '%s' for --%s",\r
+ Value.c_str(), Opt.LongName.c_str());\r
+ *(unsigned *) Opt.Value = uValue;\r
+ break;\r
+ }\r
+ case OT_Float:\r
+ {\r
+ *(double *) Opt.Value = atof(Value.c_str());\r
+ break;\r
+ }\r
+ case OT_Str:\r
+ {\r
+ *(string *) Opt.Value = Value;\r
+ break;\r
+ }\r
+ case OT_Enum:\r
+ {\r
+ *(int *) Opt.Value = EnumStrToInt(Opt, Value);\r
+ break;\r
+ }\r
+ default:\r
+ asserta(false);\r
+ }\r
+ }\r
+\r
+void LogOpts()\r
+ {\r
+ for (set<OptInfo>::const_iterator p = g_Opts.begin(); p != g_Opts.end(); ++p)\r
+ {\r
+ const OptInfo &Opt = *p;\r
+ Log("%s = ", Opt.LongName.c_str());\r
+ switch (Opt.Type)\r
+ {\r
+ case OT_Flag:\r
+ Log("%s", (*(bool *) Opt.Value) ? "yes" : "no");\r
+ break;\r
+ case OT_Tog:\r
+ Log("%s", (*(bool *) Opt.Value) ? "on" : "off");\r
+ break;\r
+ case OT_Int:\r
+ Log("%d", *(int *) Opt.Value);\r
+ break;\r
+ case OT_Uns:\r
+ Log("%u", *(unsigned *) Opt.Value);\r
+ break;\r
+ case OT_Float:\r
+ {\r
+ double Value = *(double *) Opt.Value;\r
+ if (Value == FLT_MAX)\r
+ Log("*");\r
+ else\r
+ Log("%g", Value);\r
+ break;\r
+ }\r
+ case OT_Str:\r
+ Log("%s", (*(string *) Opt.Value).c_str());\r
+ break;\r
+ case OT_Enum:\r
+ Log("%d", *(int *) Opt.Value);\r
+ break;\r
+ default:\r
+ asserta(false);\r
+ }\r
+ Log("\n");\r
+ }\r
+ }\r
+\r
+static void CompilerInfo()\r
+ {\r
+#ifdef _FILE_OFFSET_BITS\r
+ printf("_FILE_OFFSET_BITS=%d\n", _FILE_OFFSET_BITS);\r
+#else\r
+ printf("_FILE_OFFSET_BITS not defined\n");\r
+#endif\r
+\r
+#define x(t) printf("sizeof(" #t ") = %d\n", (int) sizeof(t));\r
+ x(int)\r
+ x(long)\r
+ x(float)\r
+ x(double)\r
+ x(void *)\r
+ x(off_t)\r
+#undef x\r
+ exit(0);\r
+ }\r
+\r
+void Split(const string &Str, vector<string> &Fields, char Sep)\r
+ {\r
+ Fields.clear();\r
+ const unsigned Length = (unsigned) Str.size();\r
+ string s;\r
+ for (unsigned i = 0; i < Length; ++i)\r
+ {\r
+ char c = Str[i];\r
+ if ((Sep == 0 && isspace(c)) || c == Sep)\r
+ {\r
+ if (!s.empty() || Sep != 0)\r
+ Fields.push_back(s);\r
+ s.clear();\r
+ }\r
+ else\r
+ s.push_back(c);\r
+ }\r
+ if (!s.empty())\r
+ Fields.push_back(s);\r
+ }\r
+\r
+static void GetArgsFromFile(const string &FileName, vector<string> &Args)\r
+ {\r
+ Args.clear();\r
+\r
+ FILE *f = OpenStdioFile(FileName);\r
+ string Line;\r
+ while (ReadLineStdioFile(f, Line))\r
+ {\r
+ size_t n = Line.find('#');\r
+ if (n != string::npos)\r
+ Line = Line.substr(0, n);\r
+ vector<string> Fields;\r
+ Split(Line, Fields);\r
+ Args.insert(Args.end(), Fields.begin(), Fields.end());\r
+ }\r
+ CloseStdioFile(f);\r
+ }\r
+\r
+void MyCmdLine(int argc, char **argv)\r
+ {\r
+ static unsigned RecurseDepth = 0;\r
+ ++RecurseDepth;\r
+\r
+ DefineFlagOpt("compilerinfo", "Write info about compiler types and #defines to stdout.",\r
+ (void *) &opt_compilerinfo, &optset_compilerinfo);\r
+ DefineFlagOpt("quiet", "Turn off progress messages.", (void *) &opt_quiet, &optset_quiet);\r
+ DefineFlagOpt("version", "Show version and exit.", (void *) &opt_version, &optset_version);\r
+ DefineFlagOpt("logopts", "Log options.", (void *) &opt_logopts, &optset_logopts);\r
+ DefineFlagOpt("help", "Display command-line options.", (void *) &opt_help, &optset_help);\r
+ DefineStrOpt("log", "", "Log file name.", (void *) &opt_log, &optset_log);\r
+\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+#define FLAG_OPT(LongName) DefineFlagOpt(#LongName, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define TOG_OPT(LongName, Default) DefineTogOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define INT_OPT(LongName, Default, Min, Max) DefineIntOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define UNS_OPT(LongName, Default, Min, Max) DefineUnsOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define FLT_OPT(LongName, Default, Min, Max) DefineFloatOpt(#LongName, Default, Min, Max, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define STR_OPT(LongName, Default) DefineStrOpt(#LongName, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#define ENUM_OPT(LongName, Values, Default) DefineEnumOpt(#LongName, Values, Default, "help", (void *) &opt_##LongName, &optset_##LongName);\r
+#include "myopts.h"\r
+\r
+ if (RecurseDepth == 0)\r
+ g_Argv.clear();\r
+\r
+ for (int i = 0; i < argc; ++i) \r
+ g_Argv.push_back(string(argv[i]));\r
+ \r
+\r
+ int i = 1;\r
+ for (;;)\r
+ {\r
+ if (i >= argc)\r
+ break;\r
+ const string &Arg = g_Argv[i];\r
+ \r
+ if (Arg.empty())\r
+ continue;\r
+ else if (Arg == "file:" && i + 1 < argc)\r
+ {\r
+ const string &FileName = g_Argv[i+1];\r
+ vector<string> Args;\r
+ GetArgsFromFile(FileName, Args);\r
+ for (vector<string>::const_iterator p = Args.begin();\r
+ p != Args.end(); ++p)\r
+ {\r
+ g_Argv.push_back(*p);\r
+ ++argc;\r
+ }\r
+ i += 2;\r
+ continue;\r
+ }\r
+ else if (Arg.size() > 1 && Arg[0] == '-')\r
+ {\r
+ string LongName = (Arg.size() > 2 && Arg[1] == '-' ? Arg.substr(2) : Arg.substr(1));\r
+ OptInfo Opt = *GetOptInfo(LongName, true);\r
+ *Opt.OptSet = true;\r
+ if (Opt.Type == OT_Flag)\r
+ {\r
+ g_Opts.erase(Opt);\r
+ *(bool *) Opt.Value = true;\r
+ g_Opts.insert(Opt);\r
+ ++i;\r
+ continue;\r
+ }\r
+ else if (Opt.Type == OT_Tog)\r
+ {\r
+ g_Opts.erase(Opt);\r
+ if (string("no") + Opt.LongName == LongName)\r
+ *(bool *) Opt.Value = false;\r
+ else\r
+ {\r
+ asserta(Opt.LongName == LongName);\r
+ *(bool *) Opt.Value = true;\r
+ }\r
+ g_Opts.insert(Opt);\r
+ ++i;\r
+ continue;\r
+ }\r
+\r
+ ++i;\r
+ if (i >= argc)\r
+ CmdLineErr("Missing value for option --%s", LongName.c_str());\r
+\r
+ string Value = g_Argv[i];\r
+ SetOpt(Opt, Value);\r
+\r
+ ++i;\r
+ continue;\r
+ }\r
+ else\r
+ CmdLineErr("Expected -option_name or --option_name, got '%s'", Arg.c_str());\r
+ }\r
+\r
+ --RecurseDepth;\r
+ if (RecurseDepth > 0)\r
+ return;\r
+\r
+ if (opt_help)\r
+ Help();\r
+\r
+ if (opt_compilerinfo)\r
+ CompilerInfo();\r
+\r
+ SetLogFileName(opt_log);\r
+\r
+ if (opt_log != "")\r
+ {\r
+ for (int i = 0; i < argc; ++i)\r
+ Log("%s%s", i == 0 ? "" : " ", g_Argv[i].c_str());\r
+ Log("\n");\r
+ time_t Now = time(0);\r
+ struct tm *t = localtime(&Now);\r
+ const char *s = asctime(t);\r
+ Log("Started %s", s); // there is a newline in s\r
+ Log("Version " MY_VERSION ".%s\n", SVN_VERSION);\r
+ Log("\n");\r
+ }\r
+\r
+ if (opt_logopts)\r
+ LogOpts();\r
+ }\r
+\r
+double Pct(double x, double y)\r
+ {\r
+ if (y == 0.0f)\r
+ return 0.0f;\r
+ return (x*100.0f)/y;\r
+ }\r
+\r
+void GetCmdLine(string &s)\r
+ {\r
+ s.clear();\r
+ for (unsigned i = 0; i < SIZE(g_Argv); ++i)\r
+ {\r
+ if (i > 0)\r
+ s += " ";\r
+ s += g_Argv[i];\r
+ }\r
+ }\r
+\r
+char *mystrsave(const char *s)\r
+ {\r
+ unsigned n = unsigned(strlen(s));\r
+ char *t = myalloc(char, n+1);\r
+ memcpy(t, s, n+1);\r
+ return t;\r
+ }\r
+\r
+void Logu(unsigned u, unsigned w, unsigned prefixspaces)\r
+ {\r
+ for (unsigned i = 0; i < prefixspaces; ++i)\r
+ Log(" ");\r
+ if (u == UINT_MAX)\r
+ Log("%*.*s", w, w, "*");\r
+ else\r
+ Log("%*u", w, u);\r
+ }\r
+\r
+void Logf(float x, unsigned w, unsigned prefixspaces)\r
+ {\r
+ for (unsigned i = 0; i < prefixspaces; ++i)\r
+ Log(" ");\r
+ if (x == FLT_MAX)\r
+ Log("%*.*s", w, w, "*");\r
+ else\r
+ Log("%*.2f", w, x);\r
+ }\r
+\r
+static uint32 g_SLCG_state = 1;\r
+\r
+// Numerical values used by Microsoft C, according to wikipedia:\r
+// http://en.wikipedia.org/wiki/Linear_congruential_generator\r
+static uint32 g_SLCG_a = 214013;\r
+static uint32 g_SLCG_c = 2531011;\r
+\r
+// Simple Linear Congruential Generator\r
+// Bad properties; used just to initialize the better generator.\r
+static uint32 SLCG_rand()\r
+ {\r
+ g_SLCG_state = g_SLCG_state*g_SLCG_a + g_SLCG_c;\r
+ return g_SLCG_state;\r
+ }\r
+\r
+static void SLCG_srand(uint32 Seed)\r
+ {\r
+ g_SLCG_state = Seed;\r
+ for (int i = 0; i < 10; ++i)\r
+ SLCG_rand();\r
+ }\r
+\r
+/***\r
+A multiply-with-carry random number generator, see:\r
+http://en.wikipedia.org/wiki/Multiply-with-carry\r
+\r
+The particular multipliers used here were found on\r
+the web where they are attributed to George Marsaglia.\r
+***/\r
+\r
+static bool g_InitRandDone = false;\r
+static uint32 g_X[5];\r
+\r
+uint32 RandInt32()\r
+ {\r
+ InitRand();\r
+\r
+ uint64 Sum = 2111111111*(uint64) g_X[3] + 1492*(uint64) g_X[2] +\r
+ 1776*(uint64) g_X[1] + 5115*(uint64) g_X[0] + g_X[4];\r
+ g_X[3] = g_X[2];\r
+ g_X[2] = g_X[1];\r
+ g_X[1] = g_X[0];\r
+ g_X[4] = (uint32) (Sum >> 32);\r
+ g_X[0] = (uint32) Sum;\r
+ return g_X[0];\r
+ }\r
+\r
+unsigned randu32()\r
+ {\r
+ return (unsigned) RandInt32();\r
+ }\r
+\r
+void InitRand()\r
+ {\r
+ if (g_InitRandDone)\r
+ return;\r
+// Do this first to avoid recursion\r
+ g_InitRandDone = true;\r
+\r
+ unsigned Seed = (optset_randseed ? opt_randseed : (unsigned) (time(0)*getpid()));\r
+ Log("RandSeed=%u\n", Seed);\r
+ SLCG_srand(Seed);\r
+\r
+ for (unsigned i = 0; i < 5; i++)\r
+ g_X[i] = SLCG_rand();\r
+\r
+ for (unsigned i = 0; i < 100; i++)\r
+ RandInt32();\r
+ }\r
+\r
+// MUST COME AT END BECAUSE OF #undef\r
+#if RCE_MALLOC\r
+#undef mymalloc\r
+#undef myfree\r
+#undef myfree2\r
+void *mymalloc(unsigned bytes, const char *FileName, int Line)\r
+ {\r
+ void *rce_malloc(unsigned bytes, const char *FileName, int Line);\r
+ return rce_malloc(bytes, FileName, Line);\r
+ }\r
+\r
+void myfree(void *p, const char *FileName, int Line)\r
+ {\r
+ void rce_free(void *p, const char *FileName, int Line);\r
+ rce_free(p, FileName, Line);\r
+ }\r
+\r
+void myfree2(void *p, unsigned bytes, const char *FileName, int Line)\r
+ {\r
+ void rce_free(void *p, const char *FileName, int Line);\r
+ rce_free(p, FileName, Line);\r
+ }\r
+\r
+#else // RCE_MALLOC\r
+void *mymalloc(unsigned bytes)\r
+ {\r
+ ++g_NewCalls;\r
+ if (g_InitialMemUseBytes == 0)\r
+ g_InitialMemUseBytes = GetMemUseBytes();\r
+\r
+ g_TotalAllocBytes += bytes;\r
+ g_NetBytes += bytes;\r
+ if (g_NetBytes > g_MaxNetBytes)\r
+ {\r
+ if (g_NetBytes > g_MaxNetBytes + 10000000)\r
+ GetMemUseBytes();//to force update of peak\r
+ g_MaxNetBytes = g_NetBytes;\r
+ }\r
+ void *p = malloc(bytes);\r
+ //void *p = _malloc_dbg(bytes, _NORMAL_BLOCK, __FILE__, __LINE__);\r
+ if (0 == p)\r
+ {\r
+ double b = GetMemUseBytes();\r
+ fprintf(stderr, "\nOut of memory mymalloc(%u), curr %.3g bytes",\r
+ (unsigned) bytes, b);\r
+ void LogAllocs();\r
+ LogAllocs();\r
+#if DEBUG && defined(_MSC_VER)\r
+ asserta(_CrtCheckMemory());\r
+#endif\r
+ Die("Out of memory, mymalloc(%u), curr %.3g bytes\n",\r
+ (unsigned) bytes, b);\r
+ }\r
+ return p;\r
+ }\r
+\r
+void myfree(void *p)\r
+ {\r
+ if (p == 0)\r
+ return;\r
+ free(p);\r
+ //_free_dbg(p, _NORMAL_BLOCK);\r
+ }\r
+\r
+void myfree2(void *p, unsigned bytes)\r
+ {\r
+ ++g_FreeCalls;\r
+ g_TotalFreeBytes += bytes;\r
+ g_NetBytes -= bytes;\r
+\r
+ if (p == 0)\r
+ return;\r
+ free(p);\r
+ }\r
+#endif\r
--- /dev/null
+#ifndef myutils_h\r
+#define myutils_h\r
+\r
+#define RCE_MALLOC 0\r
+
+#include <stdio.h>\r
+#include <sys/types.h>\r
+#include <string>\r
+#include <string.h>\r
+#include <memory.h>\r
+#include <vector>\r
+#include <math.h>\r
+#include <stdarg.h>\r
+#include <cstdlib>\r
+#include <climits>\r
+\r
+#ifndef _MSC_VER\r
+#include <inttypes.h>\r
+#endif\r
+\r
+using namespace std;\r
+\r
+#ifdef _MSC_VER\r
+#include <crtdbg.h>\r
+#pragma warning(disable: 4996) // deprecated functions\r
+#define _CRT_SECURE_NO_DEPRECATE 1\r
+#endif\r
+\r
+#if defined(_DEBUG) && !defined(DEBUG)\r
+#define DEBUG 1\r
+#endif\r
+\r
+#if defined(DEBUG) && !defined(_DEBUG)\r
+#define _DEBUG 1\r
+#endif\r
+\r
+#ifndef NDEBUG\r
+#define DEBUG 1\r
+#define _DEBUG 1\r
+#endif\r
+\r
+typedef unsigned char byte;\r
+typedef unsigned short uint16;\r
+typedef unsigned uint32;\r
+typedef int int32;\r
+typedef double float32;\r
+typedef signed char int8;\r
+typedef unsigned char uint8;\r
+\r
+#ifdef _MSC_VER\r
+\r
+typedef __int64 int64;\r
+typedef unsigned __int64 uint64;\r
+\r
+#define INT64_PRINTF "lld"\r
+#define UINT64_PRINTF "llu"\r
+\r
+#define SIZE_T_PRINTF "u"\r
+#define OFF64_T_PRINTF "lld"\r
+\r
+#define INT64_PRINTFX "llx"\r
+#define UINT64_PRINTFX "llx"\r
+\r
+#define SIZE_T_PRINTFX "x"\r
+#define OFF64_T_PRINTFX "llx"\r
+\r
+#elif defined(__x86_64__)\r
+\r
+typedef long int64;\r
+typedef unsigned long uint64;\r
+\r
+#define INT64_PRINTF "ld"\r
+#define UINT64_PRINTF "lu"\r
+\r
+#define SIZE_T_PRINTF "lu"\r
+#define OFF64_T_PRINTF "ld"\r
+\r
+#define INT64_PRINTFX "lx"\r
+#define UINT64_PRINTFX "lx"\r
+\r
+#define SIZE_T_PRINTFX "lx"\r
+#define OFF64_T_PRINTFX "lx"\r
+\r
+#else\r
+\r
+typedef long long int64;\r
+typedef unsigned long long uint64;\r
+\r
+#define INT64_PRINTF "lld"\r
+#define UINT64_PRINTF "llu"\r
+\r
+#define SIZE_T_PRINTF "u"\r
+#define OFF64_T_PRINTF "lld"\r
+\r
+#define INT64_PRINTFX "llx"\r
+#define UINT64_PRINTFX "llx"\r
+\r
+#define SIZE_T_PRINTFX "x"\r
+#define OFF64_T_PRINTFX "llx"\r
+#endif\r
+\r
+#define d64 INT64_PRINTF\r
+#define u64 UINT64_PRINTF\r
+#define x64 UINT64_PRINTFX\r
+\r
+// const uint64 UINT64_MAX = (~((uint64) 0));\r
+\r
+void myassertfail(const char *Exp, const char *File, unsigned Line);\r
+#undef assert\r
+#ifdef NDEBUG\r
+#define assert(exp) ((void)0)\r
+#define myassert(exp) ((void)0)\r
+#else\r
+#define assert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+#define myassert(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+#endif\r
+#define asserta(exp) (void)( (exp) || (myassertfail(#exp, __FILE__, __LINE__), 0) )\r
+\r
+#define ureturn(x) return (x)\r
+\r
+#define NotUsed(v) ((void *) &v)\r
+\r
+// pom=plus or minus, tof=true or false\r
+static inline char pom(bool Plus) { return Plus ? '+' : '-'; }\r
+static inline char tof(bool x) { return x ? 'T' : 'F'; }\r
+static inline char yon(bool x) { return x ? 'Y' : 'N'; }\r
+unsigned GetElapsedSecs();\r
+\r
+#if RCE_MALLOC\r
+\r
+void *rce_malloc(unsigned bytes, const char *FileName, int Line);\r
+void rce_free(void *p, const char *FileName, int LineNr);\r
+void rce_chkmem();\r
+\r
+void rce_dumpmem_(const char *FileName, int LineNr);\r
+#define rce_dumpmem() rce_dumpmem_(__FILE__, __LINE__)\r
+\r
+void rce_assertvalidptr_(void *p, const char *FileName, int LineNr);\r
+#define rce_assertvalidptr(p) rce_assertvalidptr_(p, __FILE__, __LINE__)\r
+\r
+void rce_dumpptr_(void *p, const char *FileName, int LineNr);\r
+#define rce_dumpptr(p) rce_dumpptr_(p, __FILE__, __LINE__)\r
+\r
+#define mymalloc(n) rce_malloc((n), __FILE__, __LINE__)\r
+#define myfree(p) rce_free(p, __FILE__, __LINE__)\r
+#define myfree2(p,n) rce_free(p, __FILE__, __LINE__)\r
+#define myalloc(t, n) (t *) rce_malloc((n)*sizeof(t), __FILE__, __LINE__)\r
+\r
+#else // RCE_MALLOC\r
+void *mymalloc(unsigned bytes);\r
+void myfree2(void *p, unsigned Bytes);\r
+void myfree(void *p);\r
+#define rce_chkmem() /* empty */\r
+#define myalloc(t, n) (t *) mymalloc((n)*sizeof(t))\r
+#endif // RCE_MALLOC\r
+\r
+#define SIZE(c) unsigned((c).size())\r
+\r
+bool myisatty(int fd);\r
+\r
+#ifdef _MSC_VER\r
+#define off_t __int64\r
+#endif\r
+\r
+FILE *OpenStdioFile(const string &FileName);\r
+FILE *CreateStdioFile(const string &FileName);\r
+bool CanSetStdioFilePos(FILE *f);\r
+void CloseStdioFile(FILE *f);\r
+void SetStdioFilePos(FILE *f, off_t Pos);\r
+void ReadStdioFile(FILE *f, off_t Pos, void *Buffer, unsigned Bytes);\r
+void ReadStdioFile(FILE *f, void *Buffer, unsigned Bytes);\r
+void WriteStdioFile(FILE *f, off_t Pos, const void *Buffer, unsigned Bytes);\r
+void WriteStdioFile(FILE *f, const void *Buffer, unsigned Bytes);\r
+bool ReadLineStdioFile(FILE *f, char *Line, unsigned Bytes);\r
+bool ReadLineStdioFile(FILE *f, string &Line);\r
+byte *ReadAllStdioFile(FILE *f, off_t &FileSize);\r
+byte *ReadAllStdioFile(const string &FileName, off_t &FileSize);\r
+void AppendStdioFileToFile(FILE *fFrom, FILE *fTo);\r
+void FlushStdioFile(FILE *f);\r
+bool StdioFileExists(const string &FileName);\r
+off_t GetStdioFilePos(FILE *f);\r
+off_t GetStdioFileSize(FILE *f);\r
+void LogStdioFileState(FILE *f);\r
+void RenameStdioFile(const string &FileNameFrom, const string &FileNameTo);\r
+void DeleteStdioFile(const string &FileName);\r
+\r
+void myvstrprintf(string &Str, const char *szFormat, va_list ArgList);\r
+void myvstrprintf(string &Str, const char *szFormat, ...);\r
+\r
+void SetLogFileName(const string &FileName);\r
+void Log(const char *szFormat, ...);\r
+\r
+void Die(const char *szFormat, ...);\r
+void Warning(const char *szFormat, ...);\r
+\r
+void ProgressStep(unsigned i, unsigned N, const char *Format, ...);\r
+void Progress(const char *szFormat, ...);\r
+void Progress(const string &Str);\r
+void ProgressLog(const char *szFormat, ...);\r
+void ProgressExit();\r
+\r
+char *mystrsave(const char *s);\r
+\r
+double GetPeakMemUseBytes();\r
+\r
+// Are two floats equal to within epsilon?\r
+const double epsilon = 0.01;\r
+inline bool feq(double x, double y, double epsilon)\r
+ {\r
+ if (fabs(x) > 10000)\r
+ epsilon = fabs(x)/10000;\r
+ if (fabs(x - y) > epsilon)\r
+ return false;\r
+ return true;\r
+ }\r
+\r
+inline bool feq(double x, double y)\r
+ {\r
+ if (x < -1e6 && y < -1e6)\r
+ return true;\r
+ double e = epsilon;\r
+ if (fabs(x) > 10000)\r
+ e = fabs(x)/10000;\r
+ if (fabs(x - y) > e)\r
+ return false;\r
+ return true;\r
+ }\r
+\r
+#define asserteq(x, y) assert(feq(x, y))\r
+#define assertaeq(x, y) asserta(feq(x, y))\r
+\r
+#define zero(a, n) memset(a, 0, n*sizeof(a[0]))\r
+\r
+void InitRand();\r
+unsigned randu32();\r
+void Split(const string &Str, vector<string> &Fields, char Sep = 0);\r
+double Pct(double x, double y);\r
+double GetMemUseBytes();\r
+const char *MemBytesToStr(double Bytes);\r
+const char *IntToStr(unsigned i);\r
+const char *FloatToStr(double d);\r
+const char *SecsToStr(double Secs);\r
+void Logu(unsigned u, unsigned w, unsigned prefixspaces = 2);\r
+void Logf(float x, unsigned w, unsigned prefixspaces = 2);\r
+const char *SecsToHHMMSS(int Secs);\r
+\r
+void MyCmdLine(int argc, char **argv);\r
+void CmdLineErr(const char *Format, ...);\r
+void Help();\r
+void GetCmdLine(string &s);\r
+\r
+#define FLAG_OPT(LongName) extern bool opt_##LongName; extern bool optset_##LongName;\r
+#define TOG_OPT(LongName, Default) extern bool opt_##LongName; extern bool optset_##LongName;\r
+#define INT_OPT(LongName, Default, Min, Max) extern int opt_##LongName; extern bool optset_##LongName;\r
+#define UNS_OPT(LongName, Default, Min, Max) extern unsigned opt_##LongName; extern bool optset_##LongName;\r
+#define FLT_OPT(LongName, Default, Min, Max) extern double opt_##LongName; extern bool optset_##LongName;\r
+#define STR_OPT(LongName, Default) extern string opt_##LongName; extern bool optset_##LongName;\r
+#define ENUM_OPT(LongName, Default, Values) extern int opt_##LongName; extern bool optset_##LongName;\r
+#include "myopts.h"\r
+#undef FLAG_OPT\r
+#undef TOG_OPT\r
+#undef INT_OPT\r
+#undef UNS_OPT\r
+#undef FLT_OPT\r
+#undef STR_OPT\r
+#undef ENUM_OPT\r
+\r
+extern const char *SVN_VERSION;\r
+extern const char *SVN_MODS;\r
+extern bool opt_quiet;
+extern bool opt_version;
+extern FILE *g_fLog;
+\r
+#endif // myutils_h\r
--- /dev/null
+#ifndef orf_h\r
+#define orf_h\r
+\r
+#include "alpha.h"\r
+\r
+struct ORFData\r
+ {\r
+ const byte *NucSeq;\r
+ const byte *AminoSeq;\r
+ int Frame;\r
+ unsigned NucL;\r
+ unsigned AminoL;\r
+ unsigned NucLo;\r
+ unsigned NucHi;\r
+ ORFData *Next;\r
+\r
+ unsigned GetNucPosFirstBase() const;\r
+ unsigned GetAAPos(unsigned NucPos) const;\r
+ unsigned GetCodex(unsigned NucPos) const;\r
+ unsigned GetNucLo(unsigned AALo, unsigned AAHi) const;\r
+ unsigned GetNucHi(unsigned AALo, unsigned AAHi) const;\r
+ unsigned GetAALo(unsigned NucLo, unsigned NucHi) const;\r
+ unsigned GetAAHi(unsigned NucLo, unsigned NucHi) const;\r
+ unsigned GetNucPosFirstBaseInCodon(unsigned AAPos) const;\r
+ unsigned GetNucPosLastBaseInCodon(unsigned AAPos) const;\r
+ unsigned RoundToCodonLo(unsigned NucPos) const;\r
+ unsigned RoundToCodonHi(unsigned NucPos) const;\r
+ void LogMe() const;\r
+ void LogMe2() const;\r
+ };\r
+\r
+const byte ORFEND = '.';\r
+\r
+void GetORFs(const byte *NucSeq, unsigned NucL, vector<ORFData> &ORFs,\r
+ unsigned ORFStyle, int FindFrame, int Sign);\r
+\r
+#endif // orf_h\r
--- /dev/null
+#ifndef out_h\r
+#define out_h\r
+\r
+#include "seq.h"\r
+#include "hsp.h"\r
+#include "orf.h"\r
+#include "path.h"\r
+#include <float.h>\r
+\r
+struct AlnData\r
+ {\r
+/***\r
+SA.Seq and SB.Seq align.\r
+Reverse strand stuff for nucleotides is handled like this:\r
+ SA.RevComp must be false.\r
+ If SB.RevComp is true, then SA.Seq is r.c.'d relative to the sequence in\r
+ the input file (query or db). If so, coordinates in HSP refer to SB.Seq\r
+ so are also r.c.'d relative to the original sequence.\r
+***/\r
+ SeqData SA;\r
+ SeqData SB;\r
+ HSPData HSP;\r
+ const char *Path;\r
+ char IdDesc[256];\r
+\r
+ float FractId;\r
+ float RawScore;\r
+ float BitScore;\r
+ float Evalue;\r
+\r
+ void LogMe() const\r
+ {\r
+ Log("AD: ");\r
+ HSP.LogMe();\r
+ Log(" %s,%s\n", SA.Label, SB.Label);\r
+ }\r
+ };\r
+\r
+bool OnDerepHit(const SeqData &SA, const SeqData &SB);\r
+\r
+bool OnLocalUngappedHit(const SeqData &SA, const SeqData &SB,\r
+ const HSPData &HSP, float &Evalue, float &FractId);\r
+\r
+bool OnLocalGappedHit(const SeqData &SA, const SeqData &SB,\r
+ const HSPData &HSP, const PathData &PD, float &Evalue, float &FractId);\r
+\r
+bool OnGlobalHit(const SeqData &SA, const SeqData &SB, const PathData &PD,\r
+ float &FractId);\r
+\r
+void OnReject(const SeqData &SA, const SeqData &SB, double FractId,\r
+ const char *Path);\r
+\r
+void OnNotMatched(const char *Label, unsigned L);\r
+void OnNewCluster(unsigned ClusterIndex, const char *Label, unsigned L);\r
+void OnNewLibCluster(unsigned ClusterIndex, const char *Label, unsigned L);\r
+void OnLibCluster(unsigned ClusterIndex, unsigned Size, double AvgId,\r
+ const char *Label);\r
+void OnNewCluster(unsigned ClusterIndex, unsigned Size, double AvgId,\r
+ const char *Label);\r
+void OnChainCov(const SeqData &NucleoSD, const SeqData &TargetSD,\r
+ float Score, float ChainCov);\r
+\r
+void SetUserFieldIndexes(const string &s);\r
+\r
+void BlastOut(FILE *f, const AlnData &AD);\r
+void Blast6Out(FILE *f, const AlnData &AD);\r
+void FastaPairOut(FILE *f, const AlnData &AD);\r
+void UserOut(FILE *f, const AlnData &AD);\r
+\r
+void BlastOutORF(FILE *f, const AlnData &AD);\r
+\r
+void OpenOutputFiles();\r
+void CloseOutputFiles();\r
+void SetLibSeedCount(unsigned DBSeqCount);\r
+const char *UserFieldIndexToStr(unsigned i);\r
+\r
+extern float **g_SubstMx;\r
+\r
+static char g_IdChar = '|';\r
+static char g_DiffChar = ' ';\r
+\r
+static inline char GetSymN(byte Letter1, byte Letter2)\r
+ {\r
+ Letter1 = toupper(Letter1);\r
+ Letter2 = toupper(Letter2);\r
+ if (Letter1 == Letter2)\r
+ return g_IdChar;\r
+ return g_DiffChar;\r
+ }\r
+\r
+static inline char GetSymA(byte Letter1, byte Letter2)\r
+ {\r
+ Letter1 = toupper(Letter1);\r
+ Letter2 = toupper(Letter2);\r
+ if (Letter1 == Letter2)\r
+ return '|';\r
+\r
+ float Score = g_SubstMx[Letter1][Letter2];\r
+ if (Score >= 2.0f)\r
+ return ':';\r
+ if (Score > 0.0f)\r
+ return '.';\r
+ return ' ';\r
+ }\r
+\r
+static inline char GetSym(byte Letter1, byte Letter2, bool Nucleo)\r
+ {\r
+ if (Nucleo)\r
+ return GetSymN(Letter1, Letter2);\r
+ else\r
+ return GetSymA(Letter1, Letter2);\r
+ }\r
+\r
+static unsigned GetNDig(unsigned n)\r
+ {\r
+ if (n < 10)\r
+ return 1;\r
+ if (n < 100)\r
+ return 2;\r
+ if (n < 1000)\r
+ return 3;\r
+ if (n < 10000)\r
+ return 4;\r
+ if (n < 100000)\r
+ return 5;\r
+ if (n < 1000000)\r
+ return 6;\r
+ return 10;\r
+ }\r
+\r
+extern unsigned *g_UserFieldIndexes;\r
+extern unsigned g_UserFieldCount;\r
+\r
+#endif // out_h\r
--- /dev/null
+#include "myutils.h"\r
+#include "path.h"\r
+#include "timing.h"\r
+\r
+#define TRACE 0\r
+\r
+const unsigned PathMagic = 0x9A783A16;\r
+\r
+struct PathBuffer\r
+ {\r
+ unsigned Magic;\r
+ char *Buffer;\r
+ unsigned Size;\r
+ bool InUse;\r
+ };\r
+\r
+static PathBuffer **g_PathBuffers;\r
+static unsigned g_PathBufferSize;\r
+\r
+static char *AllocBuffer(unsigned Size)\r
+ {\r
+ if (Size == 0)\r
+ return 0;\r
+\r
+// Is a free buffer that is big enough?\r
+ for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+ {\r
+ PathBuffer *PB = g_PathBuffers[i];\r
+ asserta(PB->Magic == PathMagic);\r
+ if (!PB->InUse)\r
+ {\r
+ if (PB->Size >= Size)\r
+ {\r
+ PB->InUse = true;\r
+ return PB->Buffer;\r
+ }\r
+ if (PB->Buffer == 0)\r
+ {\r
+ unsigned Size2 = Size + 1024;\r
+ PB->Buffer = MYALLOC(char, Size2, Path);\r
+ PB->Size = Size2;\r
+ PB->InUse = true;\r
+ return PB->Buffer;\r
+ }\r
+ }\r
+ }\r
+\r
+// No available buffer, must expand g_PathBuffers[]\r
+ unsigned NewPathBufferSize = g_PathBufferSize + 1024;\r
+ PathBuffer **NewPathBuffers = MYALLOC(PathBuffer *, NewPathBufferSize, Path);\r
+ \r
+ for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+ NewPathBuffers[i] = g_PathBuffers[i];\r
+\r
+ for (unsigned i = g_PathBufferSize; i < NewPathBufferSize; ++i)\r
+ {\r
+ PathBuffer *PB = MYALLOC(PathBuffer, 1, Path);\r
+ PB->Magic = PathMagic;\r
+ PB->Buffer = 0;\r
+ PB->Size = 0;\r
+ PB->InUse = false;\r
+ NewPathBuffers[i] = PB;\r
+ }\r
+\r
+ PathBuffer *PB = NewPathBuffers[g_PathBufferSize];\r
+\r
+ MYFREE(g_PathBuffers, g_PathBufferSize, Path);\r
+ g_PathBuffers = NewPathBuffers;\r
+ g_PathBufferSize = NewPathBufferSize;\r
+\r
+ asserta(!PB->InUse && PB->Buffer == 0);\r
+\r
+ unsigned Size2 = Size + 1024;\r
+ PB->Buffer = MYALLOC(char, Size2, Path);\r
+ PB->Size = Size2;\r
+ PB->InUse = true;\r
+ return PB->Buffer;\r
+ }\r
+\r
+static void FreeBuffer(char *Buffer)\r
+ {\r
+ if (Buffer == 0)\r
+ return;\r
+\r
+ for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+ {\r
+ PathBuffer *PB = g_PathBuffers[i];\r
+ if (PB->Buffer == Buffer)\r
+ {\r
+ asserta(PB->InUse);\r
+ PB->InUse = false;\r
+ return;\r
+ }\r
+ }\r
+\r
+ Die("FreeBuffer, not found");\r
+ }\r
+\r
+void PathData::Alloc(unsigned MaxLen)\r
+ {\r
+ if (MaxLen < Bytes)\r
+ return;\r
+\r
+ StartTimer(PathAlloc);\r
+ if (Bytes > 0)\r
+ {\r
+ FreeBuffer(Front);\r
+ }\r
+\r
+ Bytes = MaxLen + 1;\r
+ Front = AllocBuffer(Bytes);\r
+ Back = Front + Bytes - 1;\r
+ Start = 0;\r
+ EndTimer(PathAlloc);\r
+ }\r
+\r
+void PathData::Free()\r
+ {\r
+ FreeBuffer(Front);\r
+ Front = 0;\r
+ Start = 0;\r
+ Back = 0;\r
+ }\r
+\r
+void PathData::Copy(const PathData &rhs)\r
+ {\r
+ Alloc(rhs.Bytes);\r
+ strcpy(Front, rhs.Front);\r
+ Start = Front + (rhs.Start - rhs.Front);\r
+ }\r
+\r
+void PathData::FromStr(const char *PathStr)\r
+ {\r
+ asserta(PathStr != 0);\r
+ unsigned NeededBytes = (unsigned) strlen(PathStr) + 1;\r
+ Alloc(NeededBytes);\r
+ strcpy(Front, PathStr);\r
+ Start = Front;\r
+ }\r
+\r
+void LogPathStats()\r
+ {\r
+ Log("\n");\r
+ unsigned Bytes = 0;\r
+ for (unsigned i = 0; i < g_PathBufferSize; ++i)\r
+ {\r
+ const PathBuffer *PB = g_PathBuffers[i];\r
+ Bytes += PB->Size;\r
+ }\r
+ Log("%u paths allocated, total memory %u bytes\n", g_PathBufferSize, Bytes);\r
+ }\r
--- /dev/null
+#ifndef path_h\r
+#define path_h\r
+\r
+struct PathData\r
+ {\r
+private:\r
+ PathData(PathData &);\r
+ PathData &operator=(PathData &);\r
+\r
+public:\r
+ char *Start;\r
+ char *Front;\r
+ char *Back;\r
+ unsigned Bytes;\r
+\r
+public:\r
+ PathData()\r
+ {\r
+ Clear(true);\r
+ }\r
+ ~PathData()\r
+ {\r
+ Free();\r
+ }\r
+ void Free();\r
+ void Alloc(unsigned MaxLen);\r
+ void Clear(bool ctor = false)\r
+ {\r
+ Start = 0;\r
+ if (ctor)\r
+ {\r
+ Front = 0;\r
+ Back = 0;\r
+ Bytes = 0;\r
+ }\r
+ else\r
+ Free();\r
+ }\r
+ void Copy(const PathData &rhs);\r
+ void FromStr(const char *PathStr);\r
+ void Reverse()\r
+ {\r
+ asserta(Start != 0);\r
+ unsigned L = (unsigned) strlen(Start);\r
+ for (unsigned k = 0; k < L/2; ++k)\r
+ {\r
+ char c = Start[k];\r
+ Start[k] = Start[L-k-1];\r
+ Start[L-k-1] = c;\r
+ }\r
+ }\r
+ void SetEmpty()\r
+ {\r
+ Start = 0;\r
+ }\r
+\r
+ bool IsEmpty() const\r
+ {\r
+ return Start == 0;\r
+ }\r
+ };\r
+\r
+#endif // path_h\r
--- /dev/null
+#include "myutils.h"\r
+#include "ultra.h"\r
+#include "chime.h"\r
+#include "uc.h"\r
+#include "dp.h"\r
+#include <set>\r
+#include <algorithm>\r
+\r
+#define TRACE 0\r
+\r
+extern FILE *g_fUChime;\r
+\r
+void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,\r
+ vector<unsigned> &Parents);\r
+\r
+void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,\r
+ const string &PathQA, const string &PathQB, ChimeHit2 &Hit);\r
+\r
+double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo);\r
+\r
+static void GetSmoothedIdVec(const SeqData &QSD, const SeqData &PSD, const string &Path,\r
+ vector<unsigned> &IdVec, unsigned d)\r
+ {\r
+ IdVec.clear();\r
+ const unsigned ColCount = SIZE(Path);\r
+\r
+ const byte *Q = QSD.Seq;\r
+ const byte *P = PSD.Seq;\r
+\r
+ const unsigned QL = QSD.L;\r
+ const unsigned PL = PSD.L;\r
+\r
+ if (QL <= d)\r
+ {\r
+ IdVec.resize(QSD.L, 0);\r
+ return;\r
+ }\r
+\r
+ unsigned QPos = 0;\r
+ unsigned PPos = 0;\r
+\r
+ vector<bool> SameVec;\r
+ SameVec.reserve(QL);\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ char c = Path[Col];\r
+\r
+ bool Same = false;\r
+ if (c == 'M')\r
+ {\r
+ byte q = Q[QPos];\r
+ byte p = P[PPos];\r
+ Same = (toupper(q) == toupper(p));\r
+ }\r
+\r
+ if (c == 'M' || c == 'D')\r
+ {\r
+ ++QPos;\r
+ SameVec.push_back(Same);\r
+ }\r
+\r
+ if (c == 'M' || c == 'I')\r
+ ++PPos;\r
+ }\r
+\r
+ asserta(SIZE(SameVec) == QL);\r
+\r
+ unsigned n = 0;\r
+ for (unsigned QPos = 0; QPos < d; ++QPos)\r
+ {\r
+ if (SameVec[QPos])\r
+ ++n;\r
+ IdVec.push_back(n);\r
+ }\r
+\r
+ for (unsigned QPos = d; QPos < QL; ++QPos)\r
+ {\r
+ if (SameVec[QPos])\r
+ ++n;\r
+ IdVec.push_back(n);\r
+ if (SameVec[QPos-d])\r
+ --n;\r
+ }\r
+ asserta(SIZE(IdVec) == QL);\r
+\r
+#if TRACE\r
+ {\r
+ Log("\n");\r
+ Log("GetSmoothedIdVec\n");\r
+ unsigned QPos = 0;\r
+ unsigned PPos = 0;\r
+ Log("Q P Same Id\n");\r
+ Log("- - ---- -------\n");\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ char c = Path[Col];\r
+\r
+ bool Same = false;\r
+ if (c == 'M')\r
+ {\r
+ byte q = Q[QPos];\r
+ byte p = P[PPos];\r
+ Same = (toupper(q) == toupper(p));\r
+ Log("%c %c %4c %7d\n", q, p, tof(Same), IdVec[QPos]);\r
+ }\r
+\r
+ if (c == 'M' || c == 'D')\r
+ ++QPos;\r
+ if (c == 'M' || c == 'I')\r
+ ++PPos;\r
+ }\r
+ }\r
+#endif\r
+ }\r
+\r
+bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
+ const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
+ float MinFractId, ChimeHit2 &Hit)\r
+ {\r
+ Hit.Clear();\r
+ Hit.QLabel = QSD.Label;\r
+\r
+ if (opt_verbose)\r
+ {\r
+ Log("\n");\r
+ Log("SearchChime()\n");\r
+ Log("Query>%s\n", QSD.Label);\r
+ }\r
+\r
+ vector<unsigned> Parents;\r
+ GetCandidateParents(U, QSD, QAb, Parents);\r
+\r
+ unsigned ParentCount = SIZE(Parents);\r
+ if (ParentCount <= 1)\r
+ {\r
+ if (opt_verbose)\r
+ Log("%u candidate parents, done.\n", ParentCount);\r
+ return false;\r
+ }\r
+\r
+ if (opt_fastalign)\r
+ HF.SetA(QSD);\r
+ HSPFinder *ptrHF = (opt_fastalign ? &HF : 0);\r
+\r
+ unsigned ChunkLength;\r
+ vector<unsigned> ChunkLos;\r
+ GetChunkInfo(QSD.L, ChunkLength, ChunkLos);\r
+ const unsigned ChunkCount = SIZE(ChunkLos);\r
+\r
+ vector<unsigned> ChunkIndexToBestId(ChunkCount, 0);\r
+ vector<unsigned> ChunkIndexToBestParentIndex(ChunkCount, UINT_MAX);\r
+\r
+ vector<SeqData> PSDs;\r
+ vector<string> Paths;\r
+ double TopPctId = 0.0;\r
+ unsigned TopParentIndex = UINT_MAX;\r
+ unsigned QL = QSD.L;\r
+ vector<unsigned> MaxIdVec(QL, 0);\r
+ for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
+ {\r
+ unsigned ParentSeqIndex = Parents[ParentIndex];\r
+\r
+ SeqData PSD;\r
+ //PSD.Label = U.GetSeedLabel(ParentSeqIndex);\r
+ //PSD.Seq = U.GetSeedSeq(ParentSeqIndex);\r
+ //PSD.L = U.GetSeedLength(ParentSeqIndex);\r
+ //PSD.Index = ParentSeqIndex;\r
+ U.GetSeqData(ParentSeqIndex, PSD);\r
+ PSDs.push_back(PSD);\r
+\r
+ if (opt_fastalign)\r
+ HF.SetB(PSD);\r
+\r
+ PathData PD;\r
+\r
+ float HSPId;\r
+ bool Found = GlobalAlign(QSD, PSD, AP, AH, *ptrHF, MinFractId, HSPId, PD);\r
+ if (!Found)\r
+ {\r
+ Paths.push_back(""); \r
+ continue;\r
+ }\r
+\r
+ double PctId = 100.0*GetFractIdGivenPath(QSD.Seq, PSD.Seq, PD.Start, true);\r
+ if (opt_selfid && PctId == 100.0)\r
+ {\r
+ Paths.push_back(""); \r
+ continue;\r
+ }\r
+\r
+ if (PctId > TopPctId)\r
+ {\r
+ TopParentIndex = ParentIndex;\r
+ TopPctId = PctId;\r
+ if (TopPctId >= 100.0 - opt_mindiv)\r
+ {\r
+ if (opt_verbose)\r
+ {\r
+ Log(" %.1f%% >%s\n", TopPctId, PSD.Label);\r
+ Log(" Top hit exceeds ctl threshold, done.\n");\r
+ return false;\r
+ }\r
+ }\r
+ }\r
+\r
+ string Path = PD.Start;\r
+ Paths.push_back(Path);\r
+\r
+ vector<unsigned> IdVec;\r
+ GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+\r
+ for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+ if (IdVec[QPos] > MaxIdVec[QPos])\r
+ MaxIdVec[QPos] = IdVec[QPos];\r
+ }\r
+\r
+ vector<unsigned> BestParents;\r
+ for (unsigned k = 0; k < opt_maxp; ++k)\r
+ {\r
+ unsigned BestParent = UINT_MAX;\r
+ unsigned BestCov = 0;\r
+ for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)\r
+ {\r
+ const SeqData &PSD = PSDs[ParentIndex];\r
+ const string &Path = Paths[ParentIndex];\r
+ if (Path == "")\r
+ continue;\r
+\r
+ vector<unsigned> IdVec;\r
+ GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+\r
+ unsigned Cov = 0;\r
+ for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+ if (IdVec[QPos] == MaxIdVec[QPos])\r
+ ++Cov;\r
+\r
+ if (Cov > BestCov)\r
+ {\r
+ BestParent = ParentIndex;\r
+ BestCov = Cov;\r
+ }\r
+ }\r
+\r
+ if (BestParent == UINT_MAX)\r
+ break;\r
+\r
+ BestParents.push_back(BestParent);\r
+ vector<unsigned> IdVec;\r
+\r
+ const SeqData &PSD = PSDs[BestParent];\r
+ const string &Path = Paths[BestParent];\r
+ GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);\r
+ for (unsigned QPos = 0; QPos < QL; ++QPos)\r
+ if (IdVec[QPos] == MaxIdVec[QPos])\r
+ MaxIdVec[QPos] = UINT_MAX;\r
+ }\r
+\r
+ unsigned BestParentCount = SIZE(BestParents);\r
+\r
+ if (opt_verbose)\r
+ {\r
+ Log("%u/%u best parents\n", BestParentCount, ParentCount);\r
+ for (unsigned k = 0; k < BestParentCount; ++k)\r
+ {\r
+ unsigned i = BestParents[k];\r
+ Log(" %s\n", PSDs[i].Label);\r
+ }\r
+ }\r
+\r
+ bool Found = false;\r
+ for (unsigned k1 = 0; k1 < BestParentCount; ++k1)\r
+ {\r
+ unsigned i1 = BestParents[k1];\r
+ asserta(i1 < ParentCount);\r
+\r
+ const SeqData &PSD1 = PSDs[i1];\r
+ const string &Path1 = Paths[i1];\r
+\r
+ for (unsigned k2 = k1 + 1; k2 < BestParentCount; ++k2)\r
+ {\r
+ unsigned i2 = BestParents[k2];\r
+ asserta(i2 < ParentCount);\r
+ asserta(i2 != i1);\r
+\r
+ const SeqData &PSD2 = PSDs[i2];\r
+ const string &Path2 = Paths[i2];\r
+\r
+ ChimeHit2 Hit2;\r
+ AlignChime(QSD, PSD1, PSD2, Path1, Path2, Hit2);\r
+ Hit2.PctIdQT = TopPctId;\r
+\r
+ if (Hit2.Accept())\r
+ Found = true;\r
+\r
+ if (Hit2.Score > Hit.Score)\r
+ Hit = Hit2;\r
+\r
+ if (opt_verbose)\r
+ Hit2.LogMe();\r
+ }\r
+ }\r
+\r
+ return Found;\r
+ }\r
--- /dev/null
+#ifndef seq_h\r
+#define seq_h\r
+\r
+struct ORFData;\r
+\r
+struct SeqData\r
+ {\r
+ const char *Label;\r
+ const byte *Seq;\r
+ unsigned L;\r
+ unsigned Index;\r
+\r
+// RevComp means that SeqData.Seq is reverse-complemented relative\r
+// to the sequence in the input file (query or db). Coordinates in\r
+// a hit (e.g., AlnData) will be relative to SeqData.Seq, so both\r
+// the sequence and the coordinates should be r.c.'d for output.\r
+ bool RevComp;\r
+ bool Nucleo;\r
+ const ORFData *ORFParent;\r
+\r
+ SeqData()\r
+ {\r
+ Clear();\r
+ }\r
+\r
+ void Clear()\r
+ {\r
+ Label = 0;\r
+ Seq = 0;\r
+ L = 0;\r
+ Index = UINT_MAX;\r
+ RevComp = false;\r
+ Nucleo = false;\r
+ ORFParent = 0;\r
+ }\r
+ };\r
+\r
+#endif // seq_h\r
--- /dev/null
+#include "myutils.h"\r
+#include "seqdb.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+#include "sfasta.h"\r
+#include "seq.h"\r
+\r
+void SeqToFasta(FILE *f, const char *Label, const byte *Seq, unsigned L)\r
+ {\r
+ const unsigned ROWLEN = 80;\r
+ if (Label != 0)\r
+ fprintf(f, ">%s\n", Label);\r
+ unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN;\r
+ for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex)\r
+ {\r
+ unsigned From = BlockIndex*ROWLEN;\r
+ unsigned To = From + ROWLEN;\r
+ if (To >= L)\r
+ To = L;\r
+ for (unsigned Pos = From; Pos < To; ++Pos)\r
+ fputc(Seq[Pos], f);\r
+ fputc('\n', f);\r
+ }\r
+ }\r
+\r
+SeqDB::~SeqDB()\r
+ {\r
+ Clear();\r
+ }\r
+\r
+SeqDB::SeqDB()\r
+ {\r
+ Clear(true);\r
+ }\r
+\r
+void SeqDB::Clear(bool ctor)\r
+ {\r
+ if (!ctor)\r
+ {\r
+ for (unsigned i = 0; i < m_SeqCount; ++i)\r
+ {\r
+ unsigned n = strlen(m_Labels[i]);\r
+ MYFREE(m_Labels[i], n, SeqDB);\r
+ MYFREE(m_Seqs[i], m_SeqLengths[i], SeqDB);\r
+ }\r
+ MYFREE(m_Labels, m_Size, SeqDB);\r
+ MYFREE(m_Seqs, m_Size, SeqDB);\r
+ MYFREE(m_SeqLengths, m_Size, SeqDB);\r
+ }\r
+\r
+ m_FileName.clear();\r
+ m_SeqCount = 0;\r
+ m_Size = 0;\r
+\r
+ m_Labels = 0;\r
+ m_Seqs = 0;\r
+ m_SeqLengths = 0;\r
+\r
+ m_Aligned = false;\r
+ m_IsNucleo = false;\r
+ m_IsNucleoSet = false;\r
+ }\r
+\r
+void SeqDB::InitEmpty(bool Nucleo)\r
+ {\r
+ Clear();\r
+ m_IsNucleo = Nucleo;\r
+ m_IsNucleoSet = true;\r
+ }\r
+\r
+void SeqDB::FromFasta(const string &FileName, bool AllowGaps)\r
+ {\r
+ Clear();\r
+ m_FileName = FileName;\r
+ SFasta SF;\r
+\r
+ SF.Open(FileName);\r
+ SF.m_AllowGaps = AllowGaps;\r
+\r
+ ProgressStep(0, 1000, "Reading %s", FileName.c_str());\r
+ for (;;)\r
+ {\r
+ unsigned QueryPctDoneX10 = SF.GetPctDoneX10();\r
+ ProgressStep(QueryPctDoneX10, 1000, "Reading %s", FileName.c_str());\r
+ const byte *Seq = SF.GetNextSeq();\r
+ if (Seq == 0)\r
+ break;\r
+\r
+ const char *Label = SF.GetLabel();\r
+ unsigned L = SF.GetSeqLength();\r
+ AddSeq(Label, Seq, L);\r
+ }\r
+ ProgressStep(999, 1000, "Reading %s", FileName.c_str());\r
+\r
+ SetIsNucleo();\r
+\r
+ Progress("%s sequences\n", IntToStr(GetSeqCount()));\r
+ }\r
+\r
+void SeqDB::ToFasta(const string &FileName) const\r
+ {\r
+ FILE *f = CreateStdioFile(FileName);\r
+ for (unsigned SeqIndex = 0; SeqIndex < GetSeqCount(); ++SeqIndex)\r
+ ToFasta(f, SeqIndex);\r
+ CloseStdioFile(f);\r
+ }\r
+\r
+void SeqDB::SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel) const\r
+ {\r
+ if (WithLabel)\r
+ fprintf(f, ">%s\n", GetLabel(SeqIndex));\r
+\r
+ const unsigned ROWLEN = 80;\r
+\r
+ unsigned L = GetSeqLength(SeqIndex);\r
+ const byte *Seq = GetSeq(SeqIndex);\r
+ unsigned BlockCount = (L + ROWLEN - 1)/ROWLEN;\r
+ for (unsigned BlockIndex = 0; BlockIndex < BlockCount; ++BlockIndex)\r
+ {\r
+ unsigned From = BlockIndex*ROWLEN;\r
+ unsigned To = From + ROWLEN;\r
+ if (To >= L)\r
+ To = L;\r
+ for (unsigned Pos = From; Pos < To; ++Pos)\r
+ fputc(Seq[Pos], f);\r
+ fputc('\n', f);\r
+ }\r
+ }\r
+\r
+void SeqDB::ToFasta(FILE *f, unsigned SeqIndex) const\r
+ {\r
+ asserta(SeqIndex < m_SeqCount);\r
+ fprintf(f, ">%s\n", GetLabel(SeqIndex));\r
+ SeqToFasta(f, SeqIndex);\r
+ }\r
+\r
+unsigned SeqDB::GetMaxLabelLength() const\r
+ {\r
+ const unsigned SeqCount = GetSeqCount();\r
+ unsigned MaxL = 0;\r
+ for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+ {\r
+ unsigned L = (unsigned) strlen(m_Labels[Index]);\r
+ if (L > MaxL)\r
+ MaxL = L;\r
+ }\r
+ return MaxL;\r
+ }\r
+\r
+unsigned SeqDB::GetMaxSeqLength() const\r
+ {\r
+ const unsigned SeqCount = GetSeqCount();\r
+ unsigned MaxL = 0;\r
+ for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+ {\r
+ unsigned L = m_SeqLengths[Index];\r
+ if (L > MaxL)\r
+ MaxL = L;\r
+ }\r
+ return MaxL;\r
+ }\r
+\r
+void SeqDB::LogMe() const\r
+ {\r
+ Log("\n");\r
+ const unsigned SeqCount = GetSeqCount();\r
+ Log("SeqDB %u seqs, aligned=%c\n", SeqCount, tof(m_Aligned));\r
+ if (SeqCount == 0)\r
+ return;\r
+\r
+ Log("Index Label Length Seq\n");\r
+ Log("----- ---------------- ------ ---\n");\r
+ for (unsigned Index = 0; Index < SeqCount; ++Index)\r
+ {\r
+ Log("%5u", Index);\r
+ Log(" %16.16s", m_Labels[Index]);\r
+ unsigned L = m_SeqLengths[Index];\r
+ Log(" %6u", L);\r
+ Log(" %*.*s", L, L, m_Seqs[Index]);\r
+ Log("\n");\r
+ }\r
+ }\r
+\r
+void SeqDB::GetSeqData(unsigned Id, SeqData &Buffer) const\r
+ {\r
+ asserta(Id < m_SeqCount);\r
+ Buffer.Seq = m_Seqs[Id];\r
+ Buffer.Label = m_Labels[Id];\r
+ Buffer.L = m_SeqLengths[Id];\r
+ Buffer.Index = Id;\r
+ Buffer.ORFParent = 0;\r
+ Buffer.RevComp = false;\r
+ Buffer.Nucleo = IsNucleo();\r
+ }\r
+\r
+void SeqDB::SetIsNucleo()\r
+ {\r
+ const unsigned SeqCount = GetSeqCount();\r
+ unsigned N = 0;\r
+ for (unsigned i = 0; i < 100; ++i)\r
+ {\r
+ unsigned SeqIndex = unsigned(rand()%SeqCount);\r
+ const byte *Seq = GetSeq(SeqIndex);\r
+ unsigned L = GetSeqLength(SeqIndex);\r
+ const unsigned Pos = unsigned(rand()%L);\r
+ byte c = Seq[Pos];\r
+\r
+ if (g_IsNucleoChar[c])\r
+ ++N;\r
+ }\r
+ m_IsNucleo = (N > 80);\r
+ m_IsNucleoSet = true;\r
+ }\r
+\r
+unsigned SeqDB::GetTotalLength() const\r
+ {\r
+ const unsigned SeqCount = GetSeqCount();\r
+ unsigned TotalLength = 0;\r
+ for (unsigned Id = 0; Id < SeqCount; ++Id)\r
+ TotalLength += GetSeqLength(Id);\r
+ return TotalLength;\r
+ }\r
+\r
+unsigned SeqDB::AddSeq(const char *Label, const byte *Seq, unsigned L)\r
+ {\r
+ StartTimer(AddSeq);\r
+ if (m_SeqCount >= m_Size)\r
+ {\r
+ unsigned NewSize = unsigned(m_Size*1.5) + 1024;\r
+ char **NewLabels = MYALLOC(char *, NewSize, SeqDB);\r
+ byte **NewSeqs = MYALLOC(byte *, NewSize, SeqDB);\r
+ unsigned *NewSeqLengths = MYALLOC(unsigned, NewSize, SeqDB);\r
+\r
+ for (unsigned i = 0; i < m_SeqCount; ++i)\r
+ {\r
+ NewLabels[i] = m_Labels[i];\r
+ NewSeqs[i] = m_Seqs[i];\r
+ NewSeqLengths[i] = m_SeqLengths[i];\r
+ }\r
+\r
+ MYFREE(m_Labels, m_SeqCount, SeqDB);\r
+ MYFREE(m_Seqs, m_SeqCount, SeqDB);\r
+ MYFREE(m_SeqLengths, m_SeqCount, SeqDB);\r
+\r
+ m_Labels = NewLabels;\r
+ m_Seqs = NewSeqs;\r
+ m_SeqLengths = NewSeqLengths;\r
+ m_Size = NewSize;\r
+ }\r
+\r
+ unsigned Index = m_SeqCount++;\r
+ m_Seqs[Index] = MYALLOC(byte, L, SeqDB);\r
+ memcpy(m_Seqs[Index], Seq, L);\r
+\r
+ unsigned n = strlen(Label) + 1;\r
+ m_Labels[Index] = MYALLOC(char, n, SeqDB);\r
+ memcpy(m_Labels[Index], Label, n);\r
+\r
+ if (Index == 0)\r
+ m_Aligned = true;\r
+ else\r
+ m_Aligned = (m_Aligned && L == m_SeqLengths[0]);\r
+\r
+ m_SeqLengths[Index] = L;\r
+\r
+ EndTimer(AddSeq);\r
+ return Index;\r
+ }\r
+\r
+unsigned SeqDB::GetIndex(const char *Label) const\r
+ {\r
+ for (unsigned i = 0; i < m_SeqCount; ++i)\r
+ if (strcmp(Label, m_Labels[i]) == 0)\r
+ return i;\r
+ Die("SeqDB::GetIndex(%s), not found", Label);\r
+ return UINT_MAX;\r
+ }\r
+\r
+void SeqDB::MakeLabelToIndex(map<string, unsigned> &LabelToIndex)\r
+ {\r
+ LabelToIndex.clear();\r
+ for (unsigned i = 0; i < m_SeqCount; ++i)\r
+ {\r
+ const string &Label = string(GetLabel(i));\r
+ if (LabelToIndex.find(Label) != LabelToIndex.end())\r
+ Die("Duplicate label: %s", Label.c_str());\r
+ LabelToIndex[Label] = i;\r
+ }\r
+ }\r
--- /dev/null
+#ifndef seqdb_h\r
+#define seqdb_h\r
+\r
+#include <vector>\r
+#include <map>\r
+#include "myutils.h"\r
+\r
+struct SeqData;\r
+\r
+using namespace std;\r
+\r
+struct SeqDB\r
+ {\r
+private:\r
+ SeqDB(const SeqDB &rhs);\r
+ SeqDB &operator=(const SeqDB &rhs);\r
+\r
+public:\r
+ string m_FileName;\r
+ char **m_Labels;\r
+ byte **m_Seqs;\r
+ unsigned *m_SeqLengths;\r
+ unsigned m_SeqCount;\r
+ unsigned m_Size;\r
+\r
+ bool m_Aligned;\r
+ bool m_IsNucleo;\r
+ bool m_IsNucleoSet;\r
+\r
+public:\r
+ SeqDB();\r
+ ~SeqDB();\r
+ void Clear(bool ctor = false);\r
+ void InitEmpty(bool Nucleo);\r
+\r
+ unsigned AddSeq(const char *Label, const byte *Seq, unsigned L);\r
+\r
+ byte *GetSeq(unsigned SeqIndex) const\r
+ {\r
+ asserta(SeqIndex < m_SeqCount);\r
+ return m_Seqs[SeqIndex];\r
+ }\r
+\r
+ const char *GetLabel(unsigned SeqIndex) const\r
+ {\r
+ asserta(SeqIndex < m_SeqCount);\r
+ return m_Labels[SeqIndex];\r
+ }\r
+\r
+ unsigned GetSeqLength(unsigned SeqIndex) const\r
+ {\r
+ asserta(SeqIndex < m_SeqCount);\r
+ return m_SeqLengths[SeqIndex];\r
+ }\r
+\r
+ unsigned GetSeqCount() const\r
+ {\r
+ return m_SeqCount;\r
+ }\r
+\r
+ unsigned GetPairCount() const\r
+ {\r
+ unsigned SeqCount = GetSeqCount();\r
+ return (SeqCount*(SeqCount - 1))/2;\r
+ }\r
+\r
+ unsigned GetPairIndex(unsigned SeqIndex1, unsigned SeqIndex2) const\r
+ {\r
+ if (SeqIndex1 > SeqIndex2)\r
+ return (SeqIndex1*(SeqIndex1 - 1))/2 + SeqIndex2;\r
+ return (SeqIndex2*(SeqIndex2 - 1))/2 + SeqIndex1;\r
+ }\r
+\r
+ unsigned GetColCount() const\r
+ {\r
+ if (!m_Aligned)\r
+ Die("SeqDB::GetColCount, not aligned");\r
+ if (m_SeqCount == 0)\r
+ Die("SeqDB::GetColCount, empty");\r
+ return m_SeqLengths[0];\r
+ }\r
+\r
+ bool IsNucleo() const\r
+ {\r
+ asserta(m_IsNucleoSet);\r
+ return m_IsNucleo;\r
+ }\r
+\r
+ void GetSeqData(unsigned Id, SeqData &Buffer) const;\r
+\r
+ unsigned GetMaxLabelLength() const;\r
+ unsigned GetMaxSeqLength() const;\r
+ void SetIsNucleo();\r
+ unsigned GetIndex(const char *Label) const;\r
+ void MakeLabelToIndex(map<string, unsigned> &LabelToIndex);\r
+\r
+ void LogMe() const;\r
+ void FromFasta(const string &FileName, bool AllowGaps = false);\r
+\r
+ void ToFasta(const string &FileName) const;\r
+ void ToFasta(FILE *f, unsigned SeqIndex) const;\r
+ void SeqToFasta(FILE *f, unsigned SeqIndex, bool WithLabel = false) const;\r
+\r
+ unsigned GetTotalLength() const;\r
+ };\r
+\r
+bool isgap(byte c);\r
+\r
+#endif\r
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
--- /dev/null
+#include "myutils.h"
+#include "mx.h"
+
+Mx<float> g_SubstMxf;
+float **g_SubstMx;
+
+static const char Alphabet[] = "ACGTU";
+
+void SetNucSubstMx(double Match, double Mismatch)\r
+ {\r
+ static bool Done = false;\r
+ if (Done)\r
+ return;\r
+ Done = true;\r
+\r
+ if (Match <= 0.0)\r
+ Die("Match score should be +ve");\r
+ if (Mismatch >= 0.0)\r
+ Die("Mismatch score should be -ve");\r
+\r
+ unsigned N = unsigned(strlen(Alphabet));\r
+\r
+ g_SubstMxf.Alloc("NUCMX", 256, 256);\r
+ strcpy(g_SubstMxf.m_Alpha, "ACGT");\r
+ g_SubstMxf.Init(0);\r
+ g_SubstMx = g_SubstMxf.GetData();\r
+ for (unsigned i = 0; i < N; ++i)\r
+ {\r
+ for (unsigned j = 0; j < N; ++j)\r
+ {\r
+ float v = float(i == j ? Match : Mismatch);\r
+\r
+ byte ui = (byte) toupper(Alphabet[i]);\r
+ byte uj = (byte) toupper(Alphabet[j]);\r
+ byte li = (byte) tolower(ui);\r
+ byte lj = (byte) tolower(uj);\r
+ ui = (byte) toupper(ui);\r
+ uj = (byte) toupper(uj);\r
+\r
+ g_SubstMx[ui][uj] = v;\r
+ g_SubstMx[uj][ui] = v;\r
+\r
+ g_SubstMx[ui][lj] = v;\r
+ g_SubstMx[uj][li] = v;\r
+\r
+ g_SubstMx[li][uj] = v;\r
+ g_SubstMx[lj][ui] = v;\r
+\r
+ g_SubstMx[li][lj] = v;\r
+ g_SubstMx[lj][li] = v;\r
+ }\r
+ }\r
+\r
+ for (unsigned j = 0; j < N; ++j)\r
+ {\r
+ float v = 0.0f;\r
+\r
+ byte ui = (byte) 'N';\r
+ byte uj = (byte) toupper(Alphabet[j]);\r
+ byte li = (byte) 'n';\r
+ byte lj = (byte) tolower(uj);\r
+ ui = (byte) toupper(ui);\r
+ uj = (byte) toupper(uj);\r
+\r
+ g_SubstMx[ui][uj] = v;\r
+ g_SubstMx[uj][ui] = v;\r
+\r
+ g_SubstMx[ui][lj] = v;\r
+ g_SubstMx[uj][li] = v;\r
+\r
+ g_SubstMx[li][uj] = v;\r
+ g_SubstMx[lj][ui] = v;\r
+\r
+ g_SubstMx[li][lj] = v;\r
+ g_SubstMx[lj][li] = v;\r
+ }\r
+ }\r
--- /dev/null
+#include "sfasta.h"\r
+#include "orf.h"\r
+#include "alpha.h"\r
+#include "timing.h"\r
+\r
+static inline bool isgap(byte c)\r
+ {\r
+ return c == '-' || c == '.';\r
+ }\r
+\r
+const unsigned BufferSize = 16*1024*1024;\r
+\r
+static unsigned GetMaxPoly(const byte *Seq, unsigned L)\r
+ {\r
+ byte CurrChar = Seq[0];\r
+ unsigned Start = 0;\r
+ unsigned MaxLen = 1;\r
+ for (unsigned i = 1; i < L; ++i)\r
+ {\r
+ char c = Seq[i];\r
+ if (c != CurrChar || i+1 == L)\r
+ {\r
+ unsigned Len = i - Start;\r
+ if (Len > MaxLen)\r
+ MaxLen = Len;\r
+ CurrChar = c;\r
+ Start = i;\r
+ }\r
+ }\r
+ return MaxLen;\r
+ }\r
+\r
+SFasta::SFasta()\r
+ {\r
+ m_FileName = "";\r
+ m_File = 0;\r
+ m_Buffer = 0;\r
+ m_BufferSize = 0;\r
+ m_BufferOffset = 0;\r
+ m_BufferBytes = 0;\r
+ m_FilePos = 0;\r
+ m_FileSize = 0;\r
+ m_Label = 0;\r
+ m_SeqLength = 0;\r
+ m_TooShortCount = 0;\r
+ m_TooLongCount = 0;\r
+ m_ShortestLength = 0;\r
+ m_LongestLength = 0;\r
+ m_IsNucleo = false;\r
+ m_IsNucleoSet = false;\r
+ }\r
+\r
+SFasta::~SFasta()\r
+ {\r
+ Clear();\r
+ }\r
+\r
+void SFasta::Clear()\r
+ {\r
+ MYFREE(m_Buffer, m_BufferSize, SFasta);\r
+ if (m_File != 0)\r
+ CloseStdioFile(m_File);\r
+\r
+ m_FileName = "";\r
+ m_File = 0;\r
+ m_Buffer = 0;\r
+ m_BufferSize = 0;\r
+ m_BufferOffset = 0;\r
+ m_BufferBytes = 0;\r
+ m_FilePos = 0;\r
+ m_FileSize = 0;\r
+ m_Label = 0;\r
+ m_SeqLength = 0;\r
+ m_SeqIndex = UINT_MAX;\r
+ m_AllowGaps = false;\r
+ m_IsNucleo = false;\r
+ m_IsNucleoSet = false;\r
+ m_TooShortCount = 0;\r
+ m_TooLongCount = 0;\r
+ m_ShortestLength = 0;\r
+ m_LongestLength = 0;\r
+ m_TooPolyCount = 0;\r
+ }\r
+\r
+void SFasta::LogMe() const\r
+ {\r
+ Log("\n");\r
+ Log("SFasta::LogMe()\n");\r
+ Log("FileName=%s\n", m_FileName.c_str());\r
+ Log("FileSize=%u\n", (unsigned) m_FileSize);\r
+ Log("FilePos=%u\n", (unsigned) m_FilePos);\r
+ Log("BufferSize=%u\n", m_BufferSize);\r
+ Log("BufferPos=%u\n", m_BufferOffset);\r
+ Log("BufferBytes=%u\n", m_BufferBytes);\r
+ if (m_Label == 0)\r
+ Log("Label=NULL\n");\r
+ else\r
+ Log("Label=%s\n", m_Label);\r
+ Log("SeqLength=%u\n", m_SeqLength);\r
+ }\r
+\r
+const byte *SFasta::GetNextSeq()\r
+ {\r
+ for (;;)\r
+ {\r
+ const byte *Seq = GetNextSeqLo();\r
+ if (Seq == 0)\r
+ {\r
+ if (m_TooShortCount > 0)\r
+ Warning("%u short sequences (--minlen %u, shortest %u) discarded from %s",\r
+ m_TooShortCount, opt_minlen, m_ShortestLength, m_FileName.c_str());\r
+ if (m_TooLongCount > 0)\r
+ Warning("%u long sequences (--maxlen %u, longest %u) discarded from %s",\r
+ m_TooLongCount, opt_maxlen, m_LongestLength, m_FileName.c_str());\r
+ if (m_TooPolyCount > 0)\r
+ Warning("%u sequences with long homopolymers discarded (--maxpoly %u)",\r
+ m_TooPolyCount, opt_maxpoly);\r
+ return 0;\r
+ }\r
+ if (m_SeqLength < opt_minlen)\r
+ {\r
+ ++m_TooShortCount;\r
+ if (m_ShortestLength == 0 || m_SeqLength < m_ShortestLength)\r
+ m_ShortestLength = m_SeqLength;\r
+ continue;\r
+ }\r
+ if (m_SeqLength > opt_maxlen && opt_maxlen != 0)\r
+ {\r
+ if (m_LongestLength == 0 || m_SeqLength > m_LongestLength)\r
+ m_LongestLength = m_SeqLength;\r
+ ++m_TooLongCount;\r
+ continue;\r
+ }\r
+ return Seq;\r
+ }\r
+ }\r
+\r
+const byte *SFasta::GetNextSeqLo()\r
+ {\r
+// End of cache?\r
+ if (m_BufferOffset == m_BufferBytes)\r
+ {\r
+ // End of file?\r
+ if (m_FilePos == m_FileSize)\r
+ return 0;\r
+ FillCache();\r
+ }\r
+\r
+ StartTimer(SF_GetNextSeq);\r
+ asserta(m_Buffer[m_BufferOffset] == '>');\r
+ m_Label = (char *) (m_Buffer + m_BufferOffset + 1);\r
+ \r
+//// Scan to end-of-line.\r
+//// Use dubious library function strchr() in the hope\r
+//// that it uses fast machine code.\r
+// byte *ptr = (byte *) strchr(m_Label, '\n');\r
+// asserta(ptr != 0);\r
+// *ptr = 0;\r
+\r
+ byte *ptr = 0;\r
+ for (unsigned i = m_BufferOffset; i < m_BufferSize; ++i)\r
+ {\r
+ char c = m_Buffer[i];\r
+ if (c == '\n' || c == '\r')\r
+ {\r
+ ptr = m_Buffer + i;\r
+ break;\r
+ }\r
+ }\r
+ asserta(ptr != 0);\r
+\r
+ if (opt_trunclabels)\r
+ {\r
+ for (char *p = m_Label; *p; ++p)\r
+ if (isspace(*p))\r
+ {\r
+ *p = 0;\r
+ break;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ for (char *p = m_Label; *p; ++p)\r
+ {\r
+ if (*p == '\t')\r
+ *p = ' ';\r
+ else if (*p == '\r' || *p == '\n')\r
+ {\r
+ *p = 0;\r
+ char NextChar = *(p+1);\r
+ if (NextChar == '\r' || NextChar == '\n')\r
+ ++p;\r
+ break;\r
+ }\r
+ }\r
+ }\r
+\r
+// ptr points to end-of-line.\r
+// Move to start of sequence data.\r
+ byte *Seq = ++ptr;\r
+\r
+// Delete white space in-place\r
+ byte *To = ptr;\r
+ m_BufferOffset = (unsigned) (ptr - m_Buffer);\r
+ while (m_BufferOffset < m_BufferBytes)\r
+ {\r
+ byte c = m_Buffer[m_BufferOffset];\r
+ if (c == '>')\r
+ {\r
+ char prevc = '\n';\r
+ if (m_BufferOffset > 0)\r
+ prevc = m_Buffer[m_BufferOffset-1];\r
+ if (prevc == '\n' || prevc == '\r')\r
+ break;\r
+ }\r
+ ++m_BufferOffset;\r
+ if (isalpha(c) || (isgap(c) && m_AllowGaps))\r
+ *To++ = c;\r
+ else if (c == '\n' || c == '\r')\r
+ continue;\r
+ else\r
+ {\r
+ const char *Label = (m_Label == 0 ? "" : m_Label);\r
+ static bool WarningDone = false;\r
+ if (!WarningDone)\r
+ {\r
+ if (isgap(c))\r
+ Warning("Ignoring gaps in FASTA file '%s'",\r
+ m_FileName.c_str());\r
+ else if (isprint(c))\r
+ Warning("Invalid FASTA file '%s', non-letter '%c' in sequence >%s",\r
+ m_FileName.c_str(), c, Label);\r
+ else\r
+ Warning("Invalid FASTA file '%s', non-printing byte (hex %02x) in sequence >%s",\r
+ m_FileName.c_str(), c, Label);\r
+ WarningDone = true;\r
+ }\r
+ continue;\r
+ }\r
+ }\r
+ m_SeqLength = unsigned(To - Seq);\r
+\r
+ if (m_SeqIndex == UINT_MAX)\r
+ m_SeqIndex = 0;\r
+ else\r
+ ++m_SeqIndex;\r
+\r
+ EndTimer(SF_GetNextSeq);\r
+ return Seq;\r
+ }\r
+\r
+void SFasta::Open(const string &FileName)\r
+ {\r
+ Clear();\r
+ m_FileName = FileName;\r
+ m_File = OpenStdioFile(FileName);\r
+ m_BufferSize = BufferSize;\r
+ //m_Buffer = myalloc<byte>(m_BufferSize);\r
+ m_Buffer = MYALLOC(byte, m_BufferSize, SFasta);\r
+ m_FileSize = GetStdioFileSize(m_File);\r
+ }\r
+\r
+void SFasta::Rewind()\r
+ {\r
+ m_BufferOffset = 0;\r
+ m_BufferBytes = 0;\r
+ m_FilePos = 0;\r
+ }\r
+\r
+bool SFasta::SetIsNucleo()\r
+ {\r
+ if (m_FilePos != 0)\r
+ Die("SFasta::IsNucleo, not at BOF");\r
+\r
+ unsigned LetterCount = 0;\r
+ unsigned NucleoLetterCount = 0;\r
+ for (;;)\r
+ {\r
+ const byte *Seq = GetNextSeq();\r
+ if (Seq == 0)\r
+ break;\r
+ unsigned L = GetSeqLength();\r
+ for (unsigned i = 0; i < L; ++i)\r
+ if (g_IsNucleoChar[Seq[i]])\r
+ ++NucleoLetterCount;\r
+ LetterCount += L;\r
+ if (LetterCount > 256)\r
+ break;\r
+ }\r
+ Rewind();\r
+ if (LetterCount == 0)\r
+ {\r
+ m_IsNucleoSet = true;\r
+ m_IsNucleo = true;\r
+ return true;\r
+ }\r
+\r
+// Nucleo if more than 90% nucleo letters AGCTUN\r
+ m_IsNucleo = double(NucleoLetterCount)/LetterCount > 0.9;\r
+ m_IsNucleoSet = true;\r
+ return m_IsNucleo;\r
+ }\r
+\r
+void SFasta::FillCache()\r
+ {\r
+ StartTimer(SF_FillCache);\r
+ asserta(m_FilePos < m_FileSize);\r
+\r
+// off_t may be larger type than unsigned, e.g. 64- vs. 32-bit.\r
+ off_t otBytesToRead = m_FileSize - m_FilePos;\r
+\r
+ bool FinalBuffer = true;\r
+ if (otBytesToRead > (off_t) m_BufferSize)\r
+ {\r
+ FinalBuffer = false;\r
+ otBytesToRead = m_BufferSize;\r
+ }\r
+\r
+ unsigned BytesToRead = unsigned(otBytesToRead);\r
+ asserta(BytesToRead > 0);\r
+ asserta(BytesToRead <= m_BufferSize);\r
+\r
+ SetStdioFilePos(m_File, m_FilePos);\r
+ ReadStdioFile(m_File, m_Buffer, BytesToRead);\r
+ if (m_Buffer[0] != '>')\r
+ {\r
+ if (m_FilePos == 0)\r
+ Die("Input is not FASTA file");\r
+ else\r
+ Die("SFasta::FillCache() failed, expected '>'");\r
+ }\r
+\r
+ m_BufferOffset = 0;\r
+\r
+// If last buffer in file, done\r
+ if (FinalBuffer)\r
+ {\r
+ m_BufferBytes = BytesToRead;\r
+ m_FilePos += BytesToRead;\r
+ EndTimer(SF_FillCache);\r
+ return;\r
+ }\r
+\r
+// If not last buffer, truncate any partial sequence\r
+// at end of buffer. Search backwards to find last '>'.\r
+ byte *ptr = m_Buffer + BytesToRead - 1;\r
+ while (ptr > m_Buffer)\r
+ {\r
+ if (ptr[0] == '>' && (ptr[-1] == '\n' || ptr[-1] == '\r'))\r
+ break;\r
+ --ptr;\r
+ }\r
+\r
+ if (ptr == m_Buffer)\r
+ {\r
+ LogMe();\r
+ if (*ptr != '>')\r
+ {\r
+ // No '>' found.\r
+ // This might techincally be legal FASTA if the entire\r
+ // buffer is white space, but strange if not the last buffer\r
+ // in the file, so quit anyway.\r
+ Die("Failed to find '>' (pos=%u, bytes=%u)",\r
+ (unsigned) m_FilePos, BytesToRead);\r
+ }\r
+ else\r
+ {\r
+ // Entire buffer is one sequence which may be truncated.\r
+ Die("Sequence too long (pos=%u, bytes=%u)",\r
+ (unsigned) m_FilePos, BytesToRead);\r
+ }\r
+ }\r
+\r
+ asserta(*ptr == '>');\r
+\r
+ m_BufferBytes = unsigned(ptr - m_Buffer);\r
+ m_FilePos += m_BufferBytes;\r
+\r
+ EndTimer(SF_FillCache);\r
+ }\r
+\r
+unsigned SFasta::GetPctDoneX10() const\r
+ {\r
+ if (m_FilePos == 0 || m_FileSize == 0)\r
+ return 0;\r
+\r
+ assert(m_FilePos >= (off_t) m_BufferBytes);\r
+ off_t BufferStart = m_FilePos - m_BufferBytes;\r
+ off_t BufferPos = BufferStart + m_BufferOffset;\r
+\r
+ unsigned iPctX10 = unsigned(10.0*double(BufferPos)*100.0/double(m_FileSize));\r
+ if (iPctX10 == 0)\r
+ return 1;\r
+ if (iPctX10 >= 999)\r
+ return 998;\r
+ return iPctX10;\r
+ }\r
+\r
+double SFasta::GetPctDone() const\r
+ {\r
+ if (m_FilePos == 0 || m_FileSize == 0)\r
+ return 0;\r
+\r
+ assert(m_FilePos >= (off_t) m_BufferBytes);\r
+ off_t BufferStart = m_FilePos - m_BufferBytes;\r
+ off_t BufferPos = BufferStart + m_BufferOffset;\r
+\r
+ return double(BufferPos)*100.0/double(m_FileSize);\r
+ }\r
+\r
+bool SFasta::GetNextSD(SeqData &SD)\r
+ {\r
+ SD.Seq = GetNextSeq();\r
+ if (SD.Seq == 0)\r
+ return false;\r
+\r
+ SD.Label = GetLabel();\r
+ SD.L = GetSeqLength();\r
+ SD.Index = GetSeqIndex();\r
+ SD.ORFParent = 0;\r
+ SD.Nucleo = GetIsNucleo();\r
+ SD.RevComp = false;\r
+\r
+ return true;\r
+ }\r
+\r
+#if TEST\r
+void TestSFasta()\r
+ {\r
+ SFasta SF;\r
+ SF.Open(opt_input);\r
+\r
+ if (opt_verbose)\r
+ {\r
+ Log(" Index Length Label\n");\r
+ Log("------- ------- -----\n");\r
+ }\r
+\r
+ unsigned Index = 0;\r
+ unsigned SeqCount = 0;\r
+ double LetterCount = 0.0;\r
+ ProgressStep(0, 1000, "Reading");\r
+ for (;;)\r
+ {\r
+ const byte *Seq = SF.GetNextSeq();\r
+ if (Seq == 0)\r
+ break;\r
+ ProgressStep(SF.GetPctDoneX10(), 1000, "Reading");\r
+ const char *Label = SF.GetLabel();\r
+ unsigned L = SF.GetSeqLength();\r
+ ++SeqCount;\r
+ LetterCount += L;\r
+\r
+ if (opt_verbose)\r
+ {\r
+ Log(">%7u %7u '%s'\n", Index, L, Label);\r
+ Log("+%7.7s %7.7s \"%*.*s\"\n", "", "", L, L, Seq);\r
+ }\r
+\r
+ ++Index;\r
+ }\r
+ ProgressStep(999, 1000, "Reading");\r
+\r
+ Progress("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount));\r
+ Log("%u seqs, %s letters\n", SeqCount, FloatToStr(LetterCount));\r
+ }\r
+#endif // TEST\r
--- /dev/null
+#ifndef sfasta_h\r
+#define sfasta_h\r
+\r
+#include "myutils.h"\r
+#include "seq.h"\r
+\r
+typedef void (*ON_START_XSEQ)(const SeqData &SD);\r
+typedef void (*ON_END_XSEQ)(const SeqData &SD);\r
+\r
+// Sequential reader for FASTA file format.\r
+// Serves sequences in file order to save memory.\r
+// Caches biggish chunks to compromise memory vs. speed.\r
+class SFasta\r
+ {\r
+public:\r
+ string m_FileName;\r
+ FILE *m_File;\r
+ bool m_AllowGaps;\r
+\r
+ off_t m_FileSize;\r
+\r
+// Position to start next read\r
+ off_t m_FilePos;\r
+\r
+// Cached data.\r
+ byte *m_Buffer;\r
+\r
+// Bytes allocated to m_Buffer\r
+ unsigned m_BufferSize;\r
+\r
+// Current position in buffer, normally points to '>'\r
+ unsigned m_BufferOffset;\r
+\r
+// File data in buffer <= m_BufferSize\r
+ unsigned m_BufferBytes;\r
+\r
+// Current label\r
+// Points into m_Buffer, not a separate buffer.\r
+ char *m_Label;\r
+\r
+// Current sequence length\r
+ unsigned m_SeqLength;\r
+\r
+// Current seq index\r
+ unsigned m_SeqIndex;\r
+\r
+ unsigned m_ShortestLength;\r
+ unsigned m_LongestLength;\r
+ unsigned m_TooShortCount;\r
+ unsigned m_TooLongCount;\r
+ unsigned m_TooPolyCount;\r
+\r
+private:\r
+ bool m_IsNucleoSet;\r
+ bool m_IsNucleo;\r
+\r
+public:\r
+ SFasta();\r
+ ~SFasta();\r
+\r
+ void Clear();\r
+ void Open(const string &FileName);\r
+ void Rewind();\r
+ bool SetIsNucleo();\r
+ bool GetIsNucleo() const { asserta(m_IsNucleoSet); return m_IsNucleo; };\r
+\r
+// Get next sequence.\r
+// Returns zero on end-of-file\r
+ const byte *GetNextSeq();\r
+\r
+// Get next sequence as SeqData object, return false on end-of-file.\r
+ bool GetNextSD(SeqData &SD);\r
+\r
+// Length of most recent sequence returned by GetNextSeq().\r
+ unsigned GetSeqLength() const { return m_SeqLength; }\r
+\r
+// Label of most recent sequence returned by GetNextSeq().\r
+ const char *GetLabel() const { return m_Label; }\r
+\r
+// Index of most recent sequence returned by GetNextSeq().\r
+ unsigned GetSeqIndex() const { return m_SeqIndex; }\r
+\r
+ unsigned GetPctDoneX10() const;\r
+ double GetPctDone() const;\r
+\r
+ void LogMe() const;\r
+\r
+private:\r
+ void FillCache();\r
+ const byte *GetNextSeqLo();\r
+ };\r
+\r
+#endif // sfasta_h\r
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
- CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-skulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
+ CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
--- /dev/null
+"Path: .\n"
+"URL: file:///public/svn/usearch\n"
+"Repository Root: file:///public/svn/usearch\n"
+"Repository UUID: 58640331-1837-4c17-bc3e-636dc59aced1\n"
+"Revision: 34\n"
+"Node Kind: directory\n"
+"Schedule: normal\n"
+"Last Changed Author: bob\n"
+"Last Changed Rev: 34\n"
+"Last Changed Date: 2011-05-01 08:29:04 -0700 (Sun, 01 May 2011)\n"
+"\n"
+"? mk\n"
+"! svnmods.h\n"
+"M ungappedblastid.cpp\n"
+"M chaindisjointhits.cpp\n"
--- /dev/null
+T(MxBase_Alloc)\r
+T(MxBase_FreeData)\r
+T(MxBase_AllocData)\r
+T(SortSeqIndexes)\r
+T(Alloc_Vectors)\r
+T(MainLoop_NotNW)\r
+T(WriteOutput)\r
+T(NWB)\r
+T(ReadAllStdioFile)\r
+T(Windex_Init)\r
+T(Windex_SetSeqIndex)\r
+T(SeqToWords)\r
+T(SeqToWordsStep)\r
+T(SeqToShortWords)\r
+T(SeqToShortWordsA)\r
+T(SeqToShortWordsB)\r
+T(GetFractIdB)\r
+T(Windex_UniqueWordsAlloc)\r
+T(Windex_UniqueWords)\r
+T(GetPctId)\r
+T(Windex_Reset)\r
+T(GetSig)\r
+T(NWEditDist)\r
+T(EditDist_Myers)\r
+T(EditDist_BlockTarget)\r
+T(NWBand)\r
+T(WordCounting)\r
+T(NWAff)\r
+T(NWAffBand)\r
+T(NWSimple)\r
+T(NWSimpleB)\r
+T(BandWrap)\r
+T(IncIdCounts)\r
+T(GetBestDiagB)\r
+T(GetBestDiagB1)\r
+T(GetBestDiagB2)\r
+T(ClusterInit)\r
+T(ClusterPrep)\r
+T(HotSort1)\r
+T(HotSort2)\r
+T(SortA)\r
+T(SortB)\r
+T(CountSort)\r
+T(AddWords)\r
+T(ClusterWindex)\r
+T(MainInit)\r
+T(Output)\r
+T(WindexTail)\r
+T(WindexExit)\r
+T(Sort)\r
+T(U_AllocSeqLength)\r
+T(U_AllocSeedCount)\r
+T(U_AddSeed)\r
+T(AddSeq)\r
+T(U_SetWordCounts)\r
+T(U_SetWordCountsHash)\r
+T(U_SetWordScores)\r
+T(U_SetHotHits)\r
+T(U_SetHotHitsHash)\r
+T(U_SetHotHitsScores)\r
+T(U_Search)\r
+T(U_SearchExact)\r
+T(WF_SeqToWords)\r
+T(WF_SeqToWordsA)\r
+T(WF_SeqToWordsB)\r
+T(WF_AllocLA)\r
+T(WF_AllocLB)\r
+T(WF_AllocDiags)\r
+T(WF_SetA)\r
+T(WF_SetA_Nb)\r
+T(WF_SetAZero)\r
+T(WF_SetA2)\r
+T(WF_SetB)\r
+T(WF_GetCommonWordCount)\r
+T(WF_GetBestDiag)\r
+T(GetFractIdGivenPath)\r
+T(WX_GetUniqueWords)\r
+T(CompressPath)\r
+T(GetHSPs1)\r
+T(GetHSPs2)\r
+T(AlignHSPs)\r
+T(WF_ResolveHSPs)\r
+T(WX_SetExcludes)\r
+T(ViterbiFast)\r
+T(ViterbiFastBand)\r
+T(ViterbiFastBand0)\r
+T(ViterbiFastBand1)\r
+T(ViterbiFastBand2)\r
+T(ViterbiFastBand3)\r
+T(ViterbiFastBand4)\r
+T(TraceBackBit)\r
+T(TraceBackBitSW)\r
+T(SF_GetNextSeq)\r
+T(SF_FillCache)\r
+T(OnGlobalAccept)\r
+T(UngappedBlast)\r
+T(UngappedBlastId)\r
+T(UngappedBlast2Hit)\r
+T(LogHSPs)\r
+T(BlastOutput)\r
+T(BlastLeft)\r
+T(BlastRight)\r
+T(Blast1)\r
+T(Blast2)\r
+T(Blast3)\r
+T(Blast4)\r
+T(GetBestSeg)\r
+T(SWLinearDP)\r
+T(SWLinearTB)\r
+T(SWLinearDP2)\r
+T(SWLinearTB2)\r
+T(Chain)\r
+T(XlatSeq)\r
+T(XlatSeqToLetters)\r
+T(XDropFwdSimple)\r
+T(XDropFwdFast)\r
+T(XDropFwdFastTB)\r
+T(XDropBwd)\r
+T(SWSimple)\r
+T(PathAlloc)\r
+T(SubPath)\r
+T(SWUngapped)\r
+T(SWFast)\r
+T(SWFastNTB)\r
+T(SWAT_CacheQuery)\r
+T(SWAT_AlignTarget)\r
+T(SWAT_CacheQueryNW)\r
+T(SWAT_AlignTargetNW)\r
+T(SeqDB_FromFasta)\r
+T(LocalUngappedHitToAD)\r
+T(LocalGappedHitToAD)\r
+T(GlobalHitToAD)\r
+T(ResolveOverlaps)\r
+T(GetORFs)\r
+T(ChainCov_AddHit)\r
+T(ChainCov_EndQuery)\r
+T(ChainCov_DoTarget)\r
+T(BuildNb)\r
+T(MakeIntSubstMx)\r
+T(UngappedExtendLeft)\r
+T(UngappedExtendRight)\r
+T(AlignSP)\r
+T(AlignHSP)\r
+\r
+// Background\r
+T(Bg_SearchLoop)\r
+T(Bg_MainInit)\r
+T(Bg_MainTerm)\r
+T(Bg_Other)\r
+T(Bg_1)\r
+T(Bg_2)\r
+T(Bg_3)\r
+T(Bg_4)\r
+T(Bg_5)\r
+T(Bg_6)\r
+T(Bg_7)\r
+T(Bg_8)\r
+T(Bg_9)\r
+T(Bg_XFrame2)\r
+T(Bg_Usearch1)\r
+T(Bg_Usearch2)\r
+T(Bg_Usearch3)\r
+T(Bg_Usearch4)\r
+T(Bg_Hot)\r
+\r
+// For Timer2\r
+T(Search_2)\r
+T(Search_Loop_2)\r
+T(Search_InnerLoop_2)\r
+T(OnHit_2)\r
+T(UngappedBlast_2)\r
+T(MainInit_2)\r
+T(MainTerm_2)\r
--- /dev/null
+#define TIMING 0
+#ifndef timing_h
+#define timing_h
+
+#define BG_TIMING 0
+
+#if !TIMING
+#undef BG_TIMING
+#define BG_TIMING 0
+#endif
+
+//#if UCHIMES
+#undef TIMING
+#define TIMING 0
+//#endif
+
+#if TIMING
+
+enum TIMER
+ {
+ TIMER_None,
+#define T(x) TIMER_##x,
+#include "timers.h"
+#undef T
+ };
+
+const unsigned TimerCount =
+ 1 // TIMER_None
+#define T(x) +1
+#include "timers.h"
+#undef T
+ ;
+
+enum COUNTER
+ {
+#define C(x) COUNTER_##x,
+#include "counters.h"
+#undef C
+ };
+
+enum ALLOCER
+ {
+#define A(x) ALLOCER_##x,
+#include "allocs.h"
+#undef A
+ };
+
+const unsigned CounterCount =
+#define C(x) +1
+#include "counters.h"
+#undef C
+ ;
+
+const unsigned AllocerCount =
+#define A(x) +1
+#include "allocs.h"
+#undef A
+ ;
+
+#ifdef _MSC_VER
+
+typedef unsigned __int64 TICKS;
+
+#pragma warning(disable:4035)
+inline TICKS GetClockTicks()
+ {
+ _asm
+ {
+ _emit 0x0f
+ _emit 0x31
+ }
+ }
+
+#else // ifdef _MSC_VER
+
+typedef uint64_t TICKS;
+__inline__ uint64_t GetClockTicks()
+ {
+ uint32_t lo, hi;
+ /* We cannot use "=A", since this would use %rax on x86_64 */
+ __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+ return (uint64_t)hi << 32 | lo;
+ }
+
+#endif // ifdef _MSC_VER
+
+//void AddTicks(const string &Name, TICKS Ticks1, TICKS Ticks2);
+//void AddBytes(const string &Name, double Bytes);
+//#define SubBytes(Name, Bytes) AddBytes(Name, -double(Bytes))
+
+const char *TimerToStr(TIMER t);
+
+extern TICKS g_BeginTicks[TimerCount];
+extern double g_TotalTicks[TimerCount];
+extern double g_TotalCounts[TimerCount];
+extern double g_Counters[CounterCount];
+extern unsigned g_AllocNewCount[AllocerCount];
+extern unsigned g_AllocFreeCount[AllocerCount];
+extern double g_AllocNewBytes[AllocerCount];
+extern double g_AllocFreeBytes[AllocerCount];
+extern double g_AllocNetBytes[AllocerCount];
+extern double g_AllocPeakBytes[AllocerCount];
+extern bool g_Timer2[TimerCount];
+extern TIMER g_CurrTimer;
+#if BG_TIMING
+extern TIMER g_BackgroundTimer;
+#endif
+
+#define MYALLOC(Type, N, Name) (Type *) MyAlloc_((N)*sizeof(Type), ALLOCER_##Name, __FILE__, __LINE__)
+#define MYFREE(Array, N, Name) MyFree_(Array, N*sizeof(Array[0]), ALLOCER_##Name, __FILE__, __LINE__)
+
+inline void *MyAlloc_(unsigned Bytes, unsigned a, const char *FileName, int Line)
+ {
+ ++g_AllocNewCount[a];
+ g_AllocNewBytes[a] += Bytes;
+ g_AllocNetBytes[a] += Bytes;
+ if (g_AllocNetBytes[a] > g_AllocPeakBytes[a])
+ g_AllocPeakBytes[a] = g_AllocNetBytes[a];
+ return mymalloc(Bytes);
+ }
+
+inline void MyFree_(void *p, unsigned Bytes, unsigned a, const char *FileName, int Line)
+ {
+ ++g_AllocFreeCount[a];
+ g_AllocFreeBytes[a] += Bytes;
+ g_AllocNetBytes[a] -= Bytes;
+ myfree2(p, Bytes);
+ }
+
+#if BG_TIMING
+inline void SetBackgroundTimer_(TIMER Timer)
+ {
+ TICKS Now = GetClockTicks();
+ if (g_BeginTicks[g_BackgroundTimer] != 0)
+ {
+ ++g_TotalCounts[g_BackgroundTimer];
+ g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]);
+ }
+ g_BackgroundTimer = Timer;
+ g_BeginTicks[Timer] = Now;
+ }
+#else
+#define SetBackgroundTimer_(Timer) /* empty */
+#endif
+
+inline void StartTimer_(TIMER Timer)
+ {
+ if (g_CurrTimer != TIMER_None)
+ Die("StartTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+ TICKS Now = GetClockTicks();
+#if BG_TIMING
+ if (g_BeginTicks[g_BackgroundTimer] != 0)
+ {
+ ++g_TotalCounts[g_BackgroundTimer];
+ g_TotalTicks[g_BackgroundTimer] += double(Now - g_BeginTicks[g_BackgroundTimer]);
+ }
+#endif
+ g_BeginTicks[Timer] = Now;
+ g_CurrTimer = Timer;
+ }
+
+inline void PauseTimer_(TIMER Timer)
+ {
+ if (Timer != g_CurrTimer)
+ Die("PauseTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+ TICKS Now = GetClockTicks();
+ g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]);
+ g_BeginTicks[Timer] = Now;
+ g_CurrTimer = TIMER_None;
+ }
+
+inline void EndTimer_(TIMER Timer)
+ {
+ if (Timer != g_CurrTimer)
+ Die("EndTimer(%s), curr=%s", TimerToStr(Timer), TimerToStr(g_CurrTimer));
+
+ TICKS Now = GetClockTicks();
+#if BG_TIMING
+ g_BeginTicks[g_BackgroundTimer] = Now;
+#endif
+ g_TotalTicks[Timer] += double(Now - g_BeginTicks[Timer]);
+ ++g_TotalCounts[Timer];
+ g_CurrTimer = TIMER_None;
+ }
+
+inline void StartTimer2_(TIMER Timer)
+ {
+ g_Timer2[Timer] = true;
+ g_BeginTicks[Timer] = GetClockTicks();
+ }
+
+inline void EndTimer2_(TIMER Timer)
+ {
+ g_TotalTicks[Timer] += double(GetClockTicks() - g_BeginTicks[Timer]);
+ ++g_TotalCounts[Timer];
+ }
+
+#define AddCounter(x, N) g_Counters[COUNTER_##x] += N
+#define IncCounter(x) ++(g_Counters[COUNTER_##x])
+#define StartTimer(x) StartTimer_(TIMER_##x)
+#define PauseTimer(x) PauseTimer_(TIMER_##x)
+#define EndTimer(x) EndTimer_(TIMER_##x)
+#define StartTimer2(x) StartTimer2_(TIMER_##x)
+#define EndTimer2(x) EndTimer2_(TIMER_##x)
+
+#if BG_TIMING
+#define SetBackgroundTimer(x) SetBackgroundTimer_(TIMER_##x)
+#else
+#define SetBackgroundTimer(x) /* empty */
+#endif
+
+#else // if TIMING
+
+#define AddCounter(x, N) /* empty */
+#define IncCounter(x) /* empty */
+#define StartTimer(x) /* empty */
+#define PauseTimer(x) /* empty */
+#define EndTimer(x) /* empty */
+#define StartTimer2(x) /* empty */
+#define PauseTimer2(x) /* empty */
+#define EndTimer2(x) /* empty */
+#define SetBackgroundTimer(x) /* empty */
+#define MYALLOC(Type, N, Name) myalloc(Type, N)
+#define MYFREE(Array, N, Name) myfree(Array)
+
+#endif // if TIMING
+
+void LogMemStats();
+void LogTickStats();
+void LogStats();
+void LogAllocs();
+
+#define AddBytes(x, n) /* empty */
+#define SubBytes(x, n) /* empty */
+
+#endif // if timing_h
--- /dev/null
+#include "dp.h"
+
+#define TRACE 0
+
+Mx<byte> g_Mx_TBBit;
+byte **g_TBBit;
+float *g_DPRow1;
+float *g_DPRow2;
+static float *g_DPBuffer1;
+static float *g_DPBuffer2;
+
+static unsigned g_CacheLB;
+
+void AllocBit(unsigned LA, unsigned LB)
+ {
+ g_Mx_TBBit.Alloc("TBBit", LA+1, LB+1);
+ g_TBBit = g_Mx_TBBit.GetData();
+ if (LB > g_CacheLB)
+ {
+ MYFREE(g_DPBuffer1, g_CacheLB, AllocBit);
+ MYFREE(g_DPBuffer2, g_CacheLB, AllocBit);
+
+ g_CacheLB = LB + 128;
+
+ // Allow use of [-1]
+ //g_DPBuffer1 = myalloc<float>(g_CacheLB+3);
+ //g_DPBuffer2 = myalloc<float>(g_CacheLB+3);
+ g_DPBuffer1 = MYALLOC(float, g_CacheLB+3, AllocBit);
+ g_DPBuffer2 = MYALLOC(float, g_CacheLB+3, AllocBit);
+ g_DPRow1 = g_DPBuffer1 + 1;
+ g_DPRow2 = g_DPBuffer2 + 1;
+ }
+ }
+
+void TraceBackBit(unsigned LA, unsigned LB, char State, PathData &PD)
+ {
+ PD.Alloc(LA+LB);
+
+ StartTimer(TraceBackBit);
+ char *PathPtr = PD.Back;
+ *PathPtr = 0;
+
+ byte **TB = g_TBBit;
+
+#if TRACE
+ Log("\n");
+ Log("TraceBackBit\n");
+#endif
+
+ size_t i = LA;
+ size_t j = LB;
+ for (;;)
+ {
+#if TRACE
+ Log("i=%3d j=%3d state=%c\n", (int) i, (int) j, State);
+#endif
+ if (i == 0 && j == 0)
+ break;
+
+ --PathPtr;
+ *PathPtr = State;
+
+ byte t;
+ switch (State)
+ {
+ case 'M':
+ asserta(i > 0 && j > 0);
+ t = TB[i-1][j-1];
+ if (t & TRACEBITS_DM)
+ State = 'D';
+ else if (t & TRACEBITS_IM)
+ State = 'I';
+ else
+ State = 'M';
+ --i;
+ --j;
+ break;
+ case 'D':
+ asserta(i > 0);
+ t = TB[i-1][j];
+ if (t & TRACEBITS_MD)
+ State = 'M';
+ else
+ State = 'D';
+ --i;
+ break;
+
+ case 'I':
+ asserta(j > 0);
+ t = TB[i][j-1];
+ if (t & TRACEBITS_MI)
+ State = 'M';
+ else
+ State = 'I';
+ --j;
+ break;
+
+ default:
+ Die("TraceBackBit, invalid state %c", State);
+ }
+ }
+ PD.Start = PathPtr;
+ EndTimer(TraceBackBit);
+ }
+
+void TraceBackBitSW(unsigned LA, unsigned LB, unsigned Besti, unsigned Bestj,
+ unsigned &Leni, unsigned &Lenj, PathData &PD)
+ {
+ PD.Alloc(LA+LB);
+
+ StartTimer(TraceBackBitSW);
+ char *PathPtr = PD.Back;
+ *PathPtr = 0;
+
+ byte **TB = g_TBBit;
+
+#if TRACE
+ Log("\n");
+ Log("TraceBackBitSW\n");
+#endif
+
+ unsigned i = Besti;
+ unsigned j = Bestj;
+ char State = 'M';
+ for (;;)
+ {
+#if TRACE
+ Log("i=%3d j=%3d state=%c\n", (int) i, (int) j, State);
+#endif
+ --PathPtr;
+ *PathPtr = State;
+
+ byte t;
+ switch (State)
+ {
+ case 'M':
+ asserta(i > 0 && j > 0);
+ t = TB[i-1][j-1];
+ if (t & TRACEBITS_DM)
+ State = 'D';
+ else if (t & TRACEBITS_IM)
+ State = 'I';
+ else if (t & TRACEBITS_SM)
+ {
+ Leni = Besti - i + 1;
+ Lenj = Bestj - j + 1;
+ PD.Start = PathPtr;
+ EndTimer(TraceBackBitSW);
+ return;
+ }
+ else
+ State = 'M';
+ --i;
+ --j;
+ break;
+ case 'D':
+ asserta(i > 0);
+ t = TB[i-1][j];
+ if (t & TRACEBITS_MD)
+ State = 'M';
+ else
+ State = 'D';
+ --i;
+ break;
+
+ case 'I':
+ asserta(j > 0);
+ t = TB[i][j-1];
+ if (t & TRACEBITS_MI)
+ State = 'M';
+ else
+ State = 'I';
+ --j;
+ break;
+
+ default:
+ Die("TraceBackBitSW, invalid state %c", State);
+ }
+ }
+ }
--- /dev/null
+#ifndef uc_h\r
+#define uc_h\r
+\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+#include "path.h"\r
+\r
+struct AlnData;\r
+\r
+int uchime_main(int, char**); \r
+\r
+class UCFile\r
+ {\r
+public:\r
+ FILE *m_File;\r
+ byte *m_Data;\r
+ vector<char> m_RecTypes;\r
+ vector<float> m_PctIds;\r
+ vector<const char *> m_Labels;\r
+ vector<const char *> m_SeedLabels;\r
+ vector<unsigned> m_SeedIndexes;\r
+ vector<const char *> m_CompressedPaths;\r
+ vector<unsigned> m_SeqLengths;\r
+ vector<unsigned> m_SortOrder;\r
+ vector<char> m_Strands;\r
+ vector<unsigned> m_Los;\r
+ vector<unsigned> m_SeedLos;\r
+\r
+public:\r
+ /* some function prototypes */\r
+ \r
+ \r
+ UCFile();\r
+ void Clear(bool ctor = false);\r
+ void Close();\r
+ void FromFile(const string &FileName);\r
+ void FromClstr(const string &FileName);\r
+ void ToFile(const string &FileName);\r
+ unsigned GetRecordCount() const;\r
+ void LogMe() const;\r
+ void ToClstr(const string &FileName);\r
+ void ToFasta(const string &FileName, const SeqDB &Input, bool Reformat);\r
+ void Create(const string &FileName);\r
+ void Sort();\r
+ void Flush() const;\r
+\r
+ void WriteNotMatched(unsigned L, const char *Label) const;\r
+ void WriteLibSeed(unsigned SeedIndex, unsigned L, const char *Label) const;\r
+ void WriteNewSeed(unsigned SeedIndex, unsigned L, const char *Label) const;\r
+ void WriteHit(const SeqData &SA, const SeqData &SB, double FractId,\r
+ const PathData &PD) const;\r
+ void WriteReject(const SeqData &SA, const SeqData &SB, double FractId,\r
+ const char *Path) const;\r
+ void WriteHit(unsigned SeedIndex, unsigned L, double PctId,\r
+ const char *CompressedPath, char Strand, unsigned Lo, unsigned SeedLo,\r
+ const char *Label, const char *SeedLabel) const;\r
+ void WriteHit(const AlnData &AD);\r
+ void WriteLibCluster(unsigned SeedIndex, unsigned Size, double AvgId,\r
+ const char *Label) const;\r
+ void WriteNewCluster(unsigned SeedIndex, unsigned Size, double AvgId,\r
+ const char *Label) const;\r
+ void WriteSeqX(FILE *f, const byte *Seq, unsigned L, const char *CompressedPath) const;\r
+ };\r
+\r
+#endif // uc_h\r
--- /dev/null
+#include "myutils.h"\r
+#include "chime.h"\r
+#include "seqdb.h"\r
+#include "dp.h"\r
+#include "ultra.h"\r
+#include "hspfinder.h"\r
+#include <algorithm>\r
+#include <set>\r
+#include "mothurout.h"\r
+\r
+bool SearchChime(Ultra &U, const SeqData &QSD, float QAb, \r
+ const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,\r
+ float MinFractId, ChimeHit2 &Hit);\r
+\r
+FILE *g_fUChime;\r
+FILE *g_fUChimeAlns;\r
+const vector<float> *g_SortVecFloat;\r
+bool g_UchimeDeNovo = false;\r
+\r
+void Usage()\r
+ {\r
+ //printf("\n");\r
+ //printf("UCHIME %s by Robert C. Edgar\n", MY_VERSION);\r
+ //printf("http://www.drive5.com/uchime\n");\r
+ //printf("\n");\r
+ //printf("This software is donated to the public domain\n");\r
+ //printf("\n");\r
+\r
+ //printf(\r
+//#include "help.h"\r
+ //);\r
+ }\r
+\r
+void SetBLOSUM62()\r
+ {\r
+ Die("SetBLOSUM62 not implemented");\r
+ }\r
+\r
+void ReadSubstMx(const string &/*FileName*/, Mx<float> &/*Mxf*/)\r
+ {\r
+ Die("ReadSubstMx not implemented");\r
+ }\r
+\r
+void LogAllocs()\r
+ {\r
+ /*empty*/\r
+ }\r
+\r
+static bool CmpDescVecFloat(unsigned i, unsigned j)\r
+ {\r
+ return (*g_SortVecFloat)[i] > (*g_SortVecFloat)[j];\r
+ }\r
+\r
+void Range(vector<unsigned> &v, unsigned N)\r
+ {\r
+ v.clear();\r
+ v.reserve(N);\r
+ for (unsigned i = 0; i < N; ++i)\r
+ v.push_back(i);\r
+ }\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order)\r
+ {\r
+ StartTimer(Sort);\r
+ const unsigned N = SIZE(Values);\r
+ Range(Order, N);\r
+ g_SortVecFloat = &Values;\r
+ sort(Order.begin(), Order.end(), CmpDescVecFloat);\r
+ EndTimer(Sort);\r
+ }\r
+\r
+float GetAbFromLabel(const string &Label)\r
+ {\r
+ vector<string> Fields;\r
+ Split(Label, Fields, '/');\r
+ const unsigned N = SIZE(Fields);\r
+ for (unsigned i = 0; i < N; ++i)\r
+ {\r
+ const string &Field = Fields[i];\r
+ if (Field.substr(0, 3) == "ab=")\r
+ {\r
+ string a = Field.substr(3, string::npos);\r
+ return (float) atof(a.c_str());\r
+ }\r
+ }\r
+ if (g_UchimeDeNovo)\r
+ Die("Missing abundance /ab=xx/ in label >%s", Label.c_str());\r
+ return 0.0;\r
+ }\r
+\r
+int uchime_main(int argc, char *argv[])\r
+ {\r
+ MothurOut* m;\r
+ m = MothurOut::getInstance();\r
+ \r
+ MyCmdLine(argc, argv);\r
+\r
+ if (argc < 2)\r
+ {\r
+ Usage();\r
+ return 0;\r
+ }\r
+\r
+ if (opt_version)\r
+ {\r
+ printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+ return 0;\r
+ }\r
+\r
+ //printf("uchime v" MY_VERSION ".%s\n", SVN_VERSION);\r
+ //printf("by Robert C. Edgar\n");\r
+ //printf("http://drive5.com/uchime\n");\r
+ //printf("This code is donated to the public domain.\n");\r
+ //printf("\n");\r
+ if (!optset_w)\r
+ opt_w = 8;\r
+ \r
+ float MinFractId = 0.95f;\r
+ if (optset_id)\r
+ MinFractId = (float) opt_id;\r
+\r
+ Log("%8.2f minh\n", opt_minh);\r
+ Log("%8.2f xn\n", opt_xn);\r
+ Log("%8.2f dn\n", opt_dn);\r
+ Log("%8.2f xa\n", opt_xa);\r
+ Log("%8.2f mindiv\n", opt_mindiv);\r
+ Log("%8u maxp\n", opt_maxp);\r
+\r
+ if (opt_input == "" && opt_uchime != "")\r
+ opt_input = opt_uchime;\r
+\r
+ if (opt_input == "")\r
+ Die("Missing --input");\r
+\r
+ g_UchimeDeNovo = (opt_db == "");\r
+\r
+ if (opt_uchimeout != "")\r
+ g_fUChime = CreateStdioFile(opt_uchimeout);\r
+\r
+ if (opt_uchimealns != "")\r
+ g_fUChimeAlns = CreateStdioFile(opt_uchimealns);\r
+\r
+ SeqDB Input;\r
+ SeqDB DB;\r
+\r
+ Input.FromFasta(opt_input);\r
+ if (!Input.IsNucleo())\r
+ Die("Input contains amino acid sequences");\r
+\r
+ const unsigned QuerySeqCount = Input.GetSeqCount();\r
+ vector<unsigned> Order;\r
+ for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+ Order.push_back(i);\r
+\r
+ if (g_UchimeDeNovo)\r
+ {\r
+ vector<float> Abs;\r
+ for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+ {\r
+ const char *Label = Input.GetLabel(i);\r
+ float Ab = GetAbFromLabel(Label);\r
+ Abs.push_back(Ab);\r
+ }\r
+ SortDescending(Abs, Order);\r
+ DB.m_IsNucleoSet = true;\r
+ DB.m_IsNucleo = true;\r
+ }\r
+ else\r
+ {\r
+ DB.FromFasta(opt_db);\r
+ if (!DB.IsNucleo())\r
+ Die("Database contains amino acid sequences");\r
+ }\r
+\r
+ vector<ChimeHit2> Hits;\r
+ unsigned HitCount = 0;\r
+ for (unsigned i = 0; i < QuerySeqCount; ++i)\r
+ {\r
+ \r
+ if (m->control_pressed) { break; }\r
+ \r
+ unsigned QuerySeqIndex = Order[i];\r
+\r
+ SeqData QSD;\r
+ Input.GetSeqData(QuerySeqIndex, QSD);\r
+\r
+ float QAb = -1.0;\r
+ if (g_UchimeDeNovo)\r
+ QAb = GetAbFromLabel(QSD.Label);\r
+\r
+ ChimeHit2 Hit;\r
+ AlnParams &AP = *(AlnParams *) 0;\r
+ AlnHeuristics &AH = *(AlnHeuristics *) 0;\r
+ HSPFinder &HF = *(HSPFinder *) 0;\r
+ bool Found = SearchChime(DB, QSD, QAb, AP, AH, HF, MinFractId, Hit);\r
+ if (Found)\r
+ ++HitCount;\r
+ else\r
+ {\r
+ if (g_UchimeDeNovo)\r
+ DB.AddSeq(QSD.Label, QSD.Seq, QSD.L);\r
+ }\r
+\r
+ WriteChimeHit(g_fUChime, Hit);\r
+\r
+ ProgressStep(i, QuerySeqCount, "%u/%u chimeras found (%.1f%%)", HitCount, i, Pct(HitCount, i+1));\r
+ \r
+ }\r
+\r
+ Log("\n");\r
+ Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
+ opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r
+\r
+ CloseStdioFile(g_fUChime);\r
+ CloseStdioFile(g_fUChimeAlns);\r
+\r
+ ProgressExit();\r
+ return 0;\r
+ }\r
--- /dev/null
+#ifndef ultra_h
+#define ultra_h
+
+#include "seqdb.h"
+#define Ultra SeqDB
+#define GetSeedLabel GetLabel
+
+#endif // ultra_h
--- /dev/null
+//#if UCHIMES\r
+\r
+#include "myutils.h"\r
+#include "seqdb.h"\r
+#include "seq.h"\r
+#include "alpha.h"\r
+\r
+void SortDescending(const vector<float> &Values, vector<unsigned> &Order);\r
+\r
+static byte *g_QueryHasWord;\r
+static unsigned g_WordCount;\r
+\r
+unsigned GetWord(const byte *Seq)\r
+ {\r
+ unsigned Word = 0;\r
+ const byte *Front = Seq;\r
+ for (unsigned i = 0; i < opt_w; ++i)\r
+ {\r
+ unsigned Letter = g_CharToLetterNucleo[*Front++];\r
+ Word = (Word*4) + Letter;\r
+ }\r
+ return Word;\r
+ }\r
+\r
+static void SetQuery(const SeqData &Query)\r
+ {\r
+ if (g_QueryHasWord == 0)\r
+ {\r
+ g_WordCount = 4;\r
+ for (unsigned i = 1; i < opt_w; ++i)\r
+ g_WordCount *= 4;\r
+\r
+ g_QueryHasWord = myalloc(byte, g_WordCount);\r
+ }\r
+\r
+ memset(g_QueryHasWord, 0, g_WordCount);\r
+\r
+ if (Query.L <= opt_w)\r
+ return;\r
+\r
+ const unsigned L = Query.L - opt_w + 1;\r
+ const byte *Seq = Query.Seq;\r
+ for (unsigned i = 0; i < L; ++i)\r
+ {\r
+ unsigned Word = GetWord(Seq++);\r
+ g_QueryHasWord[Word] = 1;\r
+ }\r
+ }\r
+\r
+static unsigned GetUniqueWordsInCommon(const SeqData &Target)\r
+ {\r
+ if (Target.L <= opt_w)\r
+ return 0;\r
+\r
+ unsigned Count = 0;\r
+ const unsigned L = Target.L - opt_w + 1;\r
+ const byte *Seq = Target.Seq;\r
+ for (unsigned i = 0; i < L; ++i)\r
+ {\r
+ unsigned Word = GetWord(Seq++);\r
+ if (g_QueryHasWord[Word])\r
+ ++Count;\r
+ }\r
+ return Count;\r
+ }\r
+\r
+void USort(const SeqData &Query, const SeqDB &DB, vector<float> &WordCounts, \r
+ vector<unsigned> &Order)\r
+ {\r
+ WordCounts.clear();\r
+ Order.clear();\r
+\r
+ SetQuery(Query);\r
+\r
+ const unsigned SeqCount = DB.GetSeqCount();\r
+ for (unsigned SeqIndex = 0; SeqIndex < SeqCount; ++SeqIndex)\r
+ {\r
+ SeqData Target;\r
+ DB.GetSeqData(SeqIndex, Target);\r
+ float WordCount = (float) GetUniqueWordsInCommon(Target);\r
+ WordCounts.push_back(WordCount);\r
+ }\r
+ SortDescending(WordCounts, Order);\r
+ }\r
+\r
+//#endif // UCHIMES\r
}else if (Estimators[i] == "ochiai") { Calculator* temp = new Ochiai(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
}else if (Estimators[i] == "anderberg") { Calculator* temp = new Anderberg(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
- }else if (Estimators[i] == "skulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
+ }else if (Estimators[i] == "kulczynski") { Calculator* temp = new Kulczynski(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
}else if (Estimators[i] == "kulczynskicody") { Calculator* temp = new KulczynskiCody(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
}else if (Estimators[i] == "lennon") { Calculator* temp = new Lennon(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
}else if (Estimators[i] == "morisitahorn") { Calculator* temp = new MorHorn(); m->mothurOut(temp->getName() + ": "); temp->citation(); delete temp;
--- /dev/null
+#include "dp.h"
+#include "out.h"
+#include "evalue.h"
+
+#define CMP_SIMPLE 0
+\r
+#if SAVE_FAST
+static Mx<float> g_MxDPM;
+static Mx<float> g_MxDPD;
+static Mx<float> g_MxDPI;
+
+static Mx<char> g_MxTBM;
+static Mx<char> g_MxTBD;
+static Mx<char> g_MxTBI;
+
+static float **g_DPM;
+static float **g_DPD;
+static float **g_DPI;
+
+static char **g_TBM;
+static char **g_TBD;
+static char **g_TBI;
+
+#if CMP_SIMPLE
+static Mx<float> *g_DPMSimpleMx;
+static Mx<float> *g_DPDSimpleMx;
+static Mx<float> *g_DPISimpleMx;
+static float **g_DPMSimple;
+static float **g_DPDSimple;
+static float **g_DPISimple;
+
+#define cmpm(i, j, x) { if (!feq(x, g_DPMSimple[i][j])) \
+ { \
+ Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+ __FILE__, __LINE__, x, i, j, g_DPMSimple[i][j]); \
+ } \
+ }
+
+#define cmpd(i, j, x) { if (!feq(x, g_DPDSimple[i][j])) \
+ { \
+ Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+ __FILE__, __LINE__, x, i, j, g_DPDSimple[i][j]); \
+ } \
+ }
+
+#define cmpi(i, j, x) { if (!feq(x, g_DPISimple[i][j])) \
+ { \
+ Die("%s:%d %.1f != DPMSimple[%u][%u] = %.1f", \
+ __FILE__, __LINE__, x, i, j, g_DPISimple[i][j]); \
+ } \
+ }
+
+#else
+
+#define cmpm(i, j, x) /* empty */
+#define cmpd(i, j, x) /* empty */
+#define cmpi(i, j, x) /* empty */
+
+#endif
+
+static void AllocSave(unsigned LA, unsigned LB)
+ {
+#if CMP_SIMPLE
+ GetSimpleDPMxs(&g_DPMSimpleMx, &g_DPDSimpleMx, &g_DPISimpleMx);
+ g_DPMSimple = g_DPMSimpleMx->GetData();
+ g_DPDSimple = g_DPDSimpleMx->GetData();
+ g_DPISimple = g_DPISimpleMx->GetData();
+#endif
+ g_MxDPM.Alloc("FastM", LA+1, LB+1);\r
+ g_MxDPD.Alloc("FastD", LA+1, LB+1);\r
+ g_MxDPI.Alloc("FastI", LA+1, LB+1);\r
+\r
+ g_MxTBM.Alloc("FastTBM", LA+1, LB+1);\r
+ g_MxTBD.Alloc("FastTBD", LA+1, LB+1);\r
+ g_MxTBI.Alloc("FastTBI", LA+1, LB+1);\r
+\r
+ g_DPM = g_MxDPM.GetData();\r
+ g_DPD = g_MxDPD.GetData();\r
+ g_DPI = g_MxDPI.GetData();\r
+\r
+ g_TBM = g_MxTBM.GetData();\r
+ g_TBD = g_MxTBD.GetData();\r
+ g_TBI = g_MxTBI.GetData();\r
+ }
+
+static void SAVE_DPM(unsigned i, unsigned j, float x)
+ {
+ g_DPM[i][j] = x;
+#if CMP_SIMPLE
+ if (i > 0 && j > 0)
+ asserta(feq(x, g_DPMSimple[i][j]));
+#endif
+ }
+
+static void SAVE_DPD(unsigned i, unsigned j, float x)
+ {
+ g_DPD[i][j] = x;
+#if CMP_SIMPLE
+ if (i > 0 && j > 0)
+ asserta(feq(x, g_DPDSimple[i][j]));
+#endif
+ }
+
+static void SAVE_DPI(unsigned i, unsigned j, float x)
+ {
+ g_DPI[i][j] = x;
+#if CMP_SIMPLE
+ if (i > 0 && j > 0)
+ asserta(feq(x, g_DPISimple[i][j]));
+#endif
+ }
+
+static void SAVE_TBM(unsigned i, unsigned j, char x)
+ {
+ g_TBM[i][j] = x;
+ }
+
+static void SAVE_TBD(unsigned i, unsigned j, char x)
+ {
+ g_TBD[i][j] = x;
+ }
+
+static void SAVE_TBI(unsigned i, unsigned j, char x)
+ {
+ g_TBI[i][j] = x;
+ }
+
+void GetFastMxs(Mx<float> **M, Mx<float> **D, Mx<float> **I)
+ {
+ *M = &g_MxDPM;
+ *D = &g_MxDPD;
+ *I = &g_MxDPI;
+ }
+
+#else // SAVE_FAST
+
+#define SAVE_DPM(i, j, x) /* empty */
+#define SAVE_DPD(i, j, x) /* empty */
+#define SAVE_DPI(i, j, x) /* empty */
+
+#define SAVE_TBM(i, j, x) /* empty */
+#define SAVE_TBD(i, j, x) /* empty */
+#define SAVE_TBI(i, j, x) /* empty */
+
+#define AllocSave(LA, LB) /* empty */
+
+#define cmpm(i, j, x) /* empty */
+#define cmpd(i, j, x) /* empty */
+#define cmpi(i, j, x) /* empty */
+
+#endif // SAVE_FAST
+
+float ViterbiFast(const byte *A, unsigned LA, const byte *B, unsigned LB,
+ const AlnParams &AP, PathData &PD)
+ {
+ if (LA*LB > 100*1000*1000)
+ Die("ViterbiFast, too long LA=%u, LB=%u", LA, LB);
+
+ AllocBit(LA, LB);
+ AllocSave(LA, LB);
+
+ StartTimer(ViterbiFast);
+
+ const float * const *Mx = AP.SubstMx;
+ float OpenA = AP.LOpenA;
+ float ExtA = AP.LExtA;
+
+ byte **TB = g_TBBit;
+ float *Mrow = g_DPRow1;
+ float *Drow = g_DPRow2;
+
+// Use Mrow[-1], so...
+ Mrow[-1] = MINUS_INFINITY;
+ for (unsigned j = 0; j <= LB; ++j)
+ {
+ Mrow[j] = MINUS_INFINITY;
+ SAVE_DPM(0, j, MINUS_INFINITY);
+ SAVE_TBM(0, j, '?');
+
+ Drow[j] = MINUS_INFINITY;
+ SAVE_DPD(0, j, MINUS_INFINITY);
+ SAVE_TBD(0, j, '?');
+ }
+
+// Main loop
+ float M0 = float (0);
+ SAVE_DPM(0, 0, 0);
+ for (unsigned i = 0; i < LA; ++i)
+ {
+ byte a = A[i];
+ const float *MxRow = Mx[a];
+ float OpenB = AP.LOpenB;
+ float ExtB = AP.LExtB;
+ float I0 = MINUS_INFINITY;
+
+ SAVE_TBM(i, 0, '?');
+
+ SAVE_DPI(i, 0, MINUS_INFINITY);
+ SAVE_DPI(i, 1, MINUS_INFINITY);
+
+ SAVE_TBI(i, 0, '?');
+ SAVE_TBI(i, 1, '?');
+
+ byte *TBrow = TB[i];
+ for (unsigned j = 0; j < LB; ++j)
+ {
+ byte b = B[j];
+ byte TraceBits = 0;
+ float SavedM0 = M0;
+
+ // MATCH
+ {
+ // M0 = DPM[i][j]
+ // I0 = DPI[i][j]
+ // Drow[j] = DPD[i][j]
+ cmpm(i, j, M0);
+ cmpd(i, j, Drow[j]);
+ cmpi(i, j, I0);
+
+ float xM = M0;
+ SAVE_TBM(i+1, j+1, 'M');
+ if (Drow[j] > xM)
+ {
+ xM = Drow[j];
+ TraceBits = TRACEBITS_DM;
+ SAVE_TBM(i+1, j+1, 'D');
+ }
+ if (I0 > xM)
+ {
+ xM = I0;
+ TraceBits = TRACEBITS_IM;
+ SAVE_TBM(i+1, j+1, 'I');
+ }
+ M0 = Mrow[j];
+ cmpm(i, j+1, M0);
+
+ Mrow[j] = xM + MxRow[b];
+ // Mrow[j] = DPM[i+1][j+1])
+ SAVE_DPM(i+1, j+1, Mrow[j]);
+ }
+
+ // DELETE
+ {
+ // SavedM0 = DPM[i][j]
+ // Drow[j] = DPD[i][j]
+ cmpm(i, j, SavedM0);
+ cmpd(i, j, Drow[j]);
+
+ float md = SavedM0 + OpenB;
+ Drow[j] += ExtB;
+ SAVE_TBD(i+1, j, 'D');
+ if (md >= Drow[j])
+ {
+ Drow[j] = md;
+ TraceBits |= TRACEBITS_MD;
+ SAVE_TBD(i+1, j, 'M');
+ }
+ // Drow[j] = DPD[i+1][j]
+ SAVE_DPD(i+1, j, Drow[j]);
+ }
+
+ // INSERT
+ {
+ // SavedM0 = DPM[i][j]
+ // I0 = DPI[i][j]
+ cmpm(i, j, SavedM0);
+ cmpi(i, j, I0);
+
+ float mi = SavedM0 + OpenA;
+ I0 += ExtA;
+ SAVE_TBI(i, j+1, 'I');
+ if (mi >= I0)
+ {
+ I0 = mi;
+ TraceBits |= TRACEBITS_MI;
+ SAVE_TBI(i, j+1, 'M');
+ }
+ // I0 = DPI[i][j+1]
+ SAVE_DPI(i, j+1, I0);
+ }
+
+ OpenB = AP.OpenB;
+ ExtB = AP.ExtB;
+
+ TBrow[j] = TraceBits;
+ }
+
+ // Special case for end of Drow[]
+ {
+ // M0 = DPM[i][LB]
+ // Drow[LB] = DPD[i][LB]
+
+ TBrow[LB] = 0;
+ float md = M0 + AP.ROpenB;
+ Drow[LB] += AP.RExtB;
+ SAVE_TBD(i+1, LB, 'D');
+ if (md >= Drow[LB])
+ {
+ Drow[LB] = md;
+ TBrow[LB] = TRACEBITS_MD;
+ SAVE_TBD(i+1, LB, 'M');
+ }
+ // Drow[LB] = DPD[i+1][LB]
+ SAVE_DPD(i+1, LB, Drow[LB]);
+ }
+
+ SAVE_DPM(i+1, 0, MINUS_INFINITY);
+ M0 = MINUS_INFINITY;
+
+ OpenA = AP.OpenA;
+ ExtA = AP.ExtA;
+ }
+
+ SAVE_TBM(LA, 0, '?');
+
+// Special case for last row of DPI
+ byte *TBrow = TB[LA];
+ float I1 = MINUS_INFINITY;
+
+ SAVE_DPI(LA, 0, MINUS_INFINITY);
+ SAVE_TBI(LA, 0, '?');
+
+ SAVE_DPI(LA, 1, MINUS_INFINITY);
+ SAVE_TBI(LA, 1, '?');
+
+ for (unsigned j = 1; j < LB; ++j)
+ {
+ // Mrow[j-1] = DPM[LA][j]
+ // I1 = DPI[LA][j]
+
+ TBrow[j] = 0;
+ float mi = Mrow[int(j)-1] + AP.ROpenA;
+ I1 += AP.RExtA;
+ SAVE_TBI(LA, j+1, 'I');
+ if (mi > I1)
+ {
+ I1 = mi;
+ TBrow[j] = TRACEBITS_MI;
+ SAVE_TBI(LA, j+1, 'M');
+ }
+ SAVE_DPI(LA, j+1, I1);
+ }
+
+ float FinalM = Mrow[LB-1];
+ float FinalD = Drow[LB];
+ float FinalI = I1;
+// FinalM = DPM[LA][LB]
+// FinalD = DPD[LA][LB]
+// FinalI = DPI[LA][LB]
+
+ float Score = FinalM;
+ byte State = 'M';
+ if (FinalD > Score)
+ {
+ Score = FinalD;
+ State = 'D';
+ }
+ if (FinalI > Score)
+ {
+ Score = FinalI;
+ State = 'I';
+ }
+
+ EndTimer(ViterbiFast);
+ TraceBackBit(LA, LB, State, PD);
+
+#if SAVE_FAST
+ g_MxDPM.LogMe();
+ g_MxDPD.LogMe();
+ g_MxDPI.LogMe();
+
+ g_MxTBM.LogMe();
+ g_MxTBD.LogMe();
+ g_MxTBI.LogMe();
+#endif
+
+ return Score;
+ }
--- /dev/null
+#ifndef windex_h\r
+#define windex_h\r
+\r
+class SFasta;\r
+struct SeqDB;\r
+\r
+typedef uint32 word_t;\r
+typedef uint16 wordcount_t;\r
+typedef uint32 arrsize_t;\r
+typedef uint16 seqcountperword_t;\r
+typedef uint32 seqindex_t;\r
+typedef uint16 commonwordcount_t;\r
+\r
+const uint32 WindexFileHdr_Magic1 = 0x312DE41;\r
+const uint32 WindexFileHdr_Magic2 = 0x312DE42;\r
+const uint32 WindexFileHdr_Magic3 = 0x312DE43;\r
+const uint32 WindexFileHdr_Magic4 = 0x312DE44;\r
+\r
+struct WindexFileHdr\r
+ {\r
+ uint32 Magic1;\r
+ uint32 IsNucleo;\r
+ uint32 WordLength;\r
+ uint32 Magic2;\r
+ };\r
+\r
+class Windex\r
+ {\r
+public:\r
+ bool m_Nucleo;\r
+ bool m_RedAlpha;\r
+ unsigned m_WordLength;\r
+ unsigned m_AlphaSize;\r
+ unsigned m_WordCount;\r
+ unsigned m_Hi;\r
+ unsigned m_CapacityInc;\r
+ arrsize_t *m_Capacities;\r
+ arrsize_t *m_Sizes;\r
+ float *m_WordScores;\r
+ seqindex_t **m_SeedIndexes;\r
+ byte *m_UniqueCounts;\r
+ unsigned m_CharToLetter[256];\r
+\r
+public:\r
+ Windex();\r
+ void ToFile(const string &FileName) const;\r
+ void FromFile(const string &FileName);\r
+ void FromSFasta(SFasta &SF);\r
+ void FromSeqDB(const SeqDB &DB);\r
+ void Clear(bool ctor = false);\r
+ void AddWords(unsigned SeqIndex, const word_t *Words, unsigned N);\r
+ void Init(bool Nucleo, unsigned WordLength);\r
+ void Init2(bool Nucleo, unsigned TableSize);\r
+ void InitRed(unsigned WordLength);\r
+ void InitWordScores(const float *const *SubstMx);\r
+ void Reset();\r
+ void LogMe() const;\r
+ unsigned LogMemSize() const;\r
+ void LogWordStats(unsigned TopWords = 10) const;\r
+ const char *WordToStr(word_t Word) const;\r
+ word_t SeqToWord(const byte *Seq) const;\r
+ unsigned SeqToWords(const byte *Seq, unsigned L, word_t *Words) const;\r
+ unsigned SeqToWordsStep(unsigned Step, const byte *Seq, unsigned L, word_t *Words) const;\r
+ unsigned WordsToCounts(const word_t *Words, unsigned N,\r
+ word_t *UniqueWords, seqcountperword_t *Counts) const;\r
+ unsigned GetUniqueWords(const word_t *Words, unsigned N,\r
+ word_t *UniqueWords) const;\r
+ void LogSizeHisto() const;\r
+ };\r
+\r
+#endif // windex_h\r
--- /dev/null
+#include "myutils.h"\r
+#include "chime.h"\r
+\r
+void WriteChimeFileHdr(FILE *f)\r
+ {\r
+ if (f == 0)\r
+ return;\r
+\r
+ fprintf(f,\r
+ "\tQuery" // 1\r
+ "\tA" // 2\r
+ "\tB" // 3\r
+ "\tIdQM" // 4\r
+ "\tIdQA" // 5\r
+ "\tIdQB" // 6\r
+ "\tIdAB" // 7\r
+ "\tIdQT" // 8\r
+ "\tLY" // 9\r
+ "\tLN" // 10\r
+ "\tLA" // 11\r
+ "\tRY" // 12\r
+ "\tRN" // 13\r
+ "\tRA" // 14\r
+ "\tDiv" // 15\r
+ "\tY" // 16\r
+ "\n"\r
+ );\r
+ }\r
+\r
+void WriteChimeHit(FILE *f, const ChimeHit2 &Hit)\r
+ {\r
+ if (f == 0)\r
+ return;\r
+\r
+ if (Hit.Div <= 0.0)\r
+ {\r
+ fprintf(f, "0.0000"); // 0\r
+\r
+ fprintf(f,\r
+ "\t%s", Hit.QLabel.c_str()); // 1\r
+\r
+ fprintf(f,\r
+ "\t*" // 2\r
+ "\t*" // 3\r
+ "\t*" // 4\r
+ "\t*" // 5\r
+ "\t*" // 6\r
+ "\t*" // 7\r
+ "\t*" // 8\r
+ "\t*" // 9\r
+ "\t*" // 10\r
+ "\t*" // 11\r
+ "\t*" // 12\r
+ "\t*" // 13\r
+ "\t*" // 14\r
+ "\t*" // 15\r
+ "\tN" // 16\r
+ "\n"\r
+ );\r
+ return;\r
+ }\r
+\r
+ fprintf(f, "%.4f", Hit.Score); // 0\r
+\r
+ fputc('\t', f);\r
+ fputs(Hit.QLabel.c_str(), f); // 1\r
+\r
+ fputc('\t', f);\r
+ fputs(Hit.ALabel.c_str(), f); // 2\r
+\r
+ fputc('\t', f);\r
+ fputs(Hit.BLabel.c_str(), f); // 3\r
+\r
+ fprintf(f, "\t%.1f", Hit.PctIdQM); // 4\r
+ fprintf(f, "\t%.1f", Hit.PctIdQA); // 5\r
+ fprintf(f, "\t%.1f", Hit.PctIdQB); // 6\r
+ fprintf(f, "\t%.1f", Hit.PctIdAB); // 7\r
+ fprintf(f, "\t%.1f", Hit.PctIdQT); // 8\r
+\r
+ fprintf(f, "\t%u", Hit.CS_LY); // 9\r
+ fprintf(f, "\t%u", Hit.CS_LN); // 10\r
+ fprintf(f, "\t%u", Hit.CS_LA); // 11\r
+\r
+ fprintf(f, "\t%u", Hit.CS_RY); // 12\r
+ fprintf(f, "\t%u", Hit.CS_RN); // 13\r
+ fprintf(f, "\t%u", Hit.CS_RA); // 14\r
+\r
+ fprintf(f, "\t%.2f", Hit.Div); // 15\r
+\r
+ fprintf(f, "\t%c", yon(Hit.Accept())); // 16\r
+ fputc('\n', f);\r
+ }\r
+\r
+unsigned GetUngappedLength(const byte *Seq, unsigned L)\r
+ {\r
+ unsigned UL = 0;\r
+ for (unsigned i = 0; i < L; ++i)\r
+ if (!isgap(Seq[i]))\r
+ ++UL;\r
+ return UL;\r
+ }\r
+\r
+void WriteChimeHitX(FILE *f, const ChimeHit2 &Hit)\r
+ {\r
+ if (f == 0)\r
+ return;\r
+\r
+ if (Hit.Div <= 0.0)\r
+ return;\r
+\r
+ const string &Q3 = Hit.Q3;\r
+ const string &A3 = Hit.A3;\r
+ const string &B3 = Hit.B3;\r
+\r
+ const byte *Q3Seq = (const byte *) Q3.c_str();\r
+ const byte *A3Seq = (const byte *) A3.c_str();\r
+ const byte *B3Seq = (const byte *) B3.c_str();\r
+\r
+// Aligned\r
+ unsigned ColCount = SIZE(Q3);\r
+ asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);\r
+\r
+ unsigned LQ = GetUngappedLength(Q3Seq, ColCount);\r
+ unsigned LA = GetUngappedLength(A3Seq, ColCount);\r
+ unsigned LB = GetUngappedLength(B3Seq, ColCount);\r
+\r
+ fprintf(f, "\n");\r
+ fprintf(f, "------------------------------------------------------------------------\n");\r
+ fprintf(f, "Query (%5u nt) %s\n", LQ, Hit.QLabel.c_str());\r
+ fprintf(f, "ParentA (%5u nt) %s\n", LA, Hit.ALabel.c_str());\r
+ fprintf(f, "ParentB (%5u nt) %s\n", LB, Hit.BLabel.c_str());\r
+\r
+// Strip terminal gaps in query\r
+ unsigned FromCol = UINT_MAX;\r
+ unsigned ToCol = UINT_MAX;\r
+ for (unsigned Col = 0; Col < ColCount; ++Col)\r
+ {\r
+ if (!isgap(Q3Seq[Col]))\r
+ {\r
+ if (FromCol == UINT_MAX)\r
+ FromCol = Col;\r
+ ToCol = Col;\r
+ }\r
+ }\r
+\r
+ unsigned QPos = 0;\r
+ unsigned APos = 0;\r
+ unsigned BPos = 0;\r
+ for (unsigned Col = 0; Col < FromCol; ++Col)\r
+ {\r
+ if (!isgap(A3Seq[Col]))\r
+ ++APos;\r
+ if (!isgap(B3Seq[Col]))\r
+ ++BPos;\r
+ }\r
+\r
+ unsigned Range = ToCol - FromCol + 1;\r
+ unsigned RowCount = (Range + 79)/80;\r
+ unsigned RowFromCol = FromCol;\r
+ for (unsigned RowIndex = 0; RowIndex < RowCount; ++RowIndex)\r
+ {\r
+ fprintf(f, "\n");\r
+ unsigned RowToCol = RowFromCol + 79;\r
+ if (RowToCol > ToCol)\r
+ RowToCol = ToCol;\r
+\r
+ // A row\r
+ fprintf(f, "A %5u ", APos + 1);\r
+ for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ if (a != q)\r
+ a = tolower(a);\r
+ fprintf(f, "%c", a);\r
+ if (!isgap(a))\r
+ ++APos;\r
+ }\r
+ fprintf(f, " %u\n", APos);\r
+\r
+ // Q row\r
+ fprintf(f, "Q %5u ", QPos + 1);\r
+ for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ fprintf(f, "%c", q);\r
+ if (!isgap(q))\r
+ ++QPos;\r
+ }\r
+ fprintf(f, " %u\n", QPos);\r
+\r
+ // B row\r
+ fprintf(f, "B %5u ", BPos + 1);\r
+ for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char b = B3Seq[Col];\r
+ if (b != q)\r
+ b = tolower(b);\r
+ fprintf(f, "%c", b);\r
+ if (!isgap(b))\r
+ ++BPos;\r
+ }\r
+ fprintf(f, " %u\n", BPos);\r
+\r
+ // Diffs\r
+ fprintf(f, "Diffs ");\r
+ for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ char c = ' ';\r
+ if (isgap(q) || isgap(a) || isgap(b))\r
+ c = ' ';\r
+ else if (Col < Hit.ColXLo)\r
+ {\r
+ if (q == a && q == b)\r
+ c = ' ';\r
+ else if (q == a && q != b)\r
+ c = 'A';\r
+ else if (q == b && q != a)\r
+ c = 'b';\r
+ else if (a == b && q != a)\r
+ c = 'N';\r
+ else\r
+ c = '?';\r
+ }\r
+ else if (Col > Hit.ColXHi)\r
+ {\r
+ if (q == a && q == b)\r
+ c = ' ';\r
+ else if (q == b && q != a)\r
+ c = 'B';\r
+ else if (q == a && q != b)\r
+ c = 'a';\r
+ else if (a == b && q != a)\r
+ c = 'N';\r
+ else\r
+ c = '?';\r
+ }\r
+\r
+ fprintf(f, "%c", c);\r
+ }\r
+ fprintf(f, "\n");\r
+\r
+ // SNPs\r
+ fprintf(f, "Votes ");\r
+ for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+ {\r
+ char q = Q3Seq[Col];\r
+ char a = A3Seq[Col];\r
+ char b = B3Seq[Col];\r
+\r
+ bool PrevGap = Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1]));\r
+ bool NextGap = Col+1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1]));\r
+\r
+ char c = ' ';\r
+ if (isgap(q) || isgap(a) || isgap(b) || PrevGap || NextGap)\r
+ c = ' ';\r
+ else if (Col < Hit.ColXLo)\r
+ {\r
+ if (q == a && q == b)\r
+ c = ' ';\r
+ else if (q == a && q != b)\r
+ c = '+';\r
+ else if (q == b && q != a)\r
+ c = '!';\r
+ else\r
+ c = '0';\r
+ }\r
+ else if (Col > Hit.ColXHi)\r
+ {\r
+ if (q == a && q == b)\r
+ c = ' ';\r
+ else if (q == b && q != a)\r
+ c = '+';\r
+ else if (q == a && q != b)\r
+ c = '!';\r
+ else\r
+ c = '0';\r
+ }\r
+\r
+ fprintf(f, "%c", c);\r
+ }\r
+ fprintf(f, "\n");\r
+\r
+ // LR row\r
+ fprintf(f, "Model ");\r
+ for (unsigned Col = RowFromCol; Col <= RowToCol; ++Col)\r
+ {\r
+ if (Col < Hit.ColXLo)\r
+ fprintf(f, "A");\r
+ else if (Col >= Hit.ColXLo && Col <= Hit.ColXHi)\r
+ fprintf(f, "x");\r
+ else\r
+ fprintf(f, "B");\r
+ }\r
+\r
+ fprintf(f, "\n");\r
+\r
+ RowFromCol += 80;\r
+ }\r
+ fprintf(f, "\n");\r
+\r
+ double PctIdBestP = max(Hit.PctIdQA, Hit.PctIdQB);\r
+ double Div = (Hit.PctIdQM - PctIdBestP)*100.0/PctIdBestP;\r
+\r
+ unsigned LTot = Hit.CS_LY + Hit.CS_LN + Hit.CS_LA;\r
+ unsigned RTot = Hit.CS_RY + Hit.CS_RN + Hit.CS_RA;\r
+\r
+ double PctL = Pct(Hit.CS_LY, LTot);\r
+ double PctR = Pct(Hit.CS_RY, RTot);\r
+\r
+ fprintf(f,\r
+ "Ids. QA %.1f%%, QB %.1f%%, AB %.1f%%, QModel %.1f%%, Div. %+.1f%%\n",\r
+ Hit.PctIdQA,\r
+ Hit.PctIdQB,\r
+ Hit.PctIdAB,\r
+ Hit.PctIdQM,\r
+ Div);\r
+\r
+ fprintf(f,\r
+ "Diffs Left %u: N %u, A %u, Y %u (%.1f%%); Right %u: N %u, A %u, Y %u (%.1f%%), Score %.4f\n",\r
+ LTot, Hit.CS_LN, Hit.CS_LA, Hit.CS_LY, PctL,\r
+ RTot, Hit.CS_RN, Hit.CS_RA, Hit.CS_RY, PctR,\r
+ Hit.Score);\r
+ }\r