From f93dae0d03856955f9424e8b2aaf261304ca647e Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 22 Dec 2008 15:58:02 +0000 Subject: [PATCH] Create trunk copy --- COPYING | 21 ++ ChangeLog | 540 ++++++++++++++++++++++++++++ ChangeLog.old | 806 ++++++++++++++++++++++++++++++++++++++++++ Makefile | 62 ++++ NEWS | 6 + bam.c | 274 ++++++++++++++ bam.h | 659 ++++++++++++++++++++++++++++++++++ bam_aux.c | 160 +++++++++ bam_endian.h | 42 +++ bam_import.c | 372 +++++++++++++++++++ bam_index.c | 452 +++++++++++++++++++++++ bam_lpileup.c | 196 ++++++++++ bam_maqcns.c | 451 +++++++++++++++++++++++ bam_maqcns.h | 48 +++ bam_pileup.c | 213 +++++++++++ bam_plcmd.c | 194 ++++++++++ bam_sort.c | 229 ++++++++++++ bam_tview.c | 315 +++++++++++++++++ bamtk.c | 112 ++++++ bgzf.c | 488 +++++++++++++++++++++++++ bgzf.h | 102 ++++++ bgzip.c | 166 +++++++++ examples/00README.txt | 28 ++ examples/ex1.fa | 56 +++ examples/ex1.fa.fai | 2 + examples/ex1.sam.gz | Bin 0 -> 107374 bytes faidx.c | 287 +++++++++++++++ faidx.h | 81 +++++ glf.h | 11 + khash.h | 486 +++++++++++++++++++++++++ kseq.h | 207 +++++++++++ ksort.h | 271 ++++++++++++++ misc/Makefile | 52 +++ misc/export2sam.pl | 107 ++++++ misc/maq2sam.c | 168 +++++++++ misc/md5.c | 307 ++++++++++++++++ misc/md5.h | 68 ++++ misc/md5fa.c | 58 +++ razf.c | 647 +++++++++++++++++++++++++++++++++ razf.h | 117 ++++++ razip.c | 139 ++++++++ samtools.1 | 258 ++++++++++++++ source.dot | 15 + zutil.h | 269 ++++++++++++++ 44 files changed, 9542 insertions(+) create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 ChangeLog.old create mode 100644 Makefile create mode 100644 NEWS create mode 100644 bam.c create mode 100644 bam.h create mode 100644 bam_aux.c create mode 100644 bam_endian.h create mode 100644 bam_import.c create mode 100644 bam_index.c create mode 100644 bam_lpileup.c create mode 100644 bam_maqcns.c create mode 100644 bam_maqcns.h create mode 100644 bam_pileup.c create mode 100644 bam_plcmd.c create mode 100644 bam_sort.c create mode 100644 bam_tview.c create mode 100644 bamtk.c create mode 100644 bgzf.c create mode 100644 bgzf.h create mode 100644 bgzip.c create mode 100644 examples/00README.txt create mode 100644 examples/ex1.fa create mode 100644 examples/ex1.fa.fai create mode 100644 examples/ex1.sam.gz create mode 100644 faidx.c create mode 100644 faidx.h create mode 100644 glf.h create mode 100644 khash.h create mode 100644 kseq.h create mode 100644 ksort.h create mode 100644 misc/Makefile create mode 100755 misc/export2sam.pl create mode 100644 misc/maq2sam.c create mode 100644 misc/md5.c create mode 100644 misc/md5.h create mode 100644 misc/md5fa.c create mode 100644 razf.c create mode 100644 razf.h create mode 100644 razip.c create mode 100644 samtools.1 create mode 100644 source.dot create mode 100644 zutil.h diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..2f596e5 --- /dev/null +++ b/COPYING @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2008 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..4c52aad --- /dev/null +++ b/ChangeLog @@ -0,0 +1,540 @@ +------------------------------------------------------------------------ +r58 | lh3lh3 | 2008-12-20 23:06:00 +0000 (Sat, 20 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/misc/export2sam.pl + + * added comments + * fixed several bugs + +------------------------------------------------------------------------ +r57 | lh3lh3 | 2008-12-20 15:44:20 +0000 (Sat, 20 Dec 2008) | 2 lines +Changed paths: + A /branches/dev/samtools/misc/export2sam.pl + +convert Export format to SAM; not thoroughly tested + +------------------------------------------------------------------------ +r56 | lh3lh3 | 2008-12-19 22:13:28 +0000 (Fri, 19 Dec 2008) | 6 lines +Changed paths: + M /branches/dev/samtools/bam_import.c + M /branches/dev/samtools/bam_plcmd.c + M /branches/dev/samtools/bam_tview.c + M /branches/dev/samtools/bamtk.c + A /branches/dev/samtools/source.dot + + * samtools-0.1.0-65 + * pileup: generate maq-like simple output + * pileup: allow to output pileup at required sites + * source.dot: source file relationship graph + * tview: fixed a minor bug + +------------------------------------------------------------------------ +r55 | lh3lh3 | 2008-12-19 20:10:26 +0000 (Fri, 19 Dec 2008) | 2 lines +Changed paths: + D /branches/dev/samtools/misc/all2sam.pl + +remove all2sam.pl + +------------------------------------------------------------------------ +r54 | lh3lh3 | 2008-12-16 22:34:25 +0000 (Tue, 16 Dec 2008) | 2 lines +Changed paths: + A /branches/dev/samtools/COPYING + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/faidx.h + M /branches/dev/samtools/khash.h + M /branches/dev/samtools/kseq.h + M /branches/dev/samtools/ksort.h + M /branches/dev/samtools/samtools.1 + +Added copyright information and a bit more documentation. No code change. + +------------------------------------------------------------------------ +r53 | lh3lh3 | 2008-12-16 13:40:18 +0000 (Tue, 16 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/bam.c + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/bam_index.c + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-64 + * improved efficiency of the indel caller for spliced alignments + +------------------------------------------------------------------------ +r52 | lh3lh3 | 2008-12-16 10:28:20 +0000 (Tue, 16 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/bam.c + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/bam_aux.c + M /branches/dev/samtools/bam_index.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-63 + * a bit code cleanup: reduce the dependency between source files + +------------------------------------------------------------------------ +r51 | lh3lh3 | 2008-12-15 14:29:32 +0000 (Mon, 15 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/bam_plcmd.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-62 + * fixed a memory leak + +------------------------------------------------------------------------ +r50 | lh3lh3 | 2008-12-15 14:00:13 +0000 (Mon, 15 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/ChangeLog + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/samtools.1 + +update documentation, ChangeLog and a comment + +------------------------------------------------------------------------ +r49 | lh3lh3 | 2008-12-15 13:36:43 +0000 (Mon, 15 Dec 2008) | 6 lines +Changed paths: + M /branches/dev/samtools/Makefile + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/bam_maqcns.h + M /branches/dev/samtools/bam_pileup.c + A /branches/dev/samtools/bam_plcmd.c + M /branches/dev/samtools/bamtk.c + M /branches/dev/samtools/samtools.1 + + * samtools-0.1.0-61 + * moved pileup command to a separate source file + * added indel caller + * added bam_cal_segend(). (NOT WORKING for spliced alignment!!!) + * updated documentation + +------------------------------------------------------------------------ +r48 | lh3lh3 | 2008-12-12 13:55:36 +0000 (Fri, 12 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-60 + * fixed another bug in maqcns when there is a nearby deletion + +------------------------------------------------------------------------ +r47 | lh3lh3 | 2008-12-12 13:42:16 +0000 (Fri, 12 Dec 2008) | 5 lines +Changed paths: + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/bam_pileup.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-59 + * pileup: outputing consensus is now optional + * fixed a bug in glfgen. This bug also exists in maq's glfgen. However, + I am not quite sure why the previous version may have problem. + +------------------------------------------------------------------------ +r46 | lh3lh3 | 2008-12-12 11:44:56 +0000 (Fri, 12 Dec 2008) | 6 lines +Changed paths: + M /branches/dev/samtools/bam_pileup.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-58 + * add maq consensus to pileup. However, I will move this part to a new + command as strictly speaking, consensus callin is not part of pileup, + and imposing it would make it harder to generate for other language + bindings. + +------------------------------------------------------------------------ +r45 | bhandsaker | 2008-12-11 20:43:56 +0000 (Thu, 11 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/bgzf.c + +Fix bug in tell() after reads that consume to the exact end of a block. + +------------------------------------------------------------------------ +r44 | lh3lh3 | 2008-12-11 09:36:53 +0000 (Thu, 11 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/samtools.1 + +update manual + +------------------------------------------------------------------------ +r43 | lh3lh3 | 2008-12-11 09:25:36 +0000 (Thu, 11 Dec 2008) | 4 lines +Changed paths: + M /branches/dev/samtools/bam_import.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-57 + * fixed a bug in parser when there is auxiliary fields + * made the parser a bit more robust + +------------------------------------------------------------------------ +r42 | lh3lh3 | 2008-12-10 14:57:29 +0000 (Wed, 10 Dec 2008) | 5 lines +Changed paths: + M /branches/dev/samtools/bam_index.c + M /branches/dev/samtools/bamtk.c + M /branches/dev/samtools/bgzf.c + + * samtools-0.1.0-56 + * fixed a bug in bgzf (only reading is affected) + * fixed a typo in bam_index.c + * in bam_index.c, check potential bugs in the underlying I/O library + +------------------------------------------------------------------------ +r41 | lh3lh3 | 2008-12-10 12:53:08 +0000 (Wed, 10 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/samtools.1 + +update manual + +------------------------------------------------------------------------ +r40 | lh3lh3 | 2008-12-10 11:52:10 +0000 (Wed, 10 Dec 2008) | 5 lines +Changed paths: + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/bam_pileup.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-55 + * tried to make pileup work with clipping (previously not), though NOT tested + * removed -v from pileup + * made pileup take the reference sequence + +------------------------------------------------------------------------ +r39 | lh3lh3 | 2008-12-09 11:59:28 +0000 (Tue, 09 Dec 2008) | 4 lines +Changed paths: + M /branches/dev/samtools/bam_import.c + M /branches/dev/samtools/bamtk.c + M /branches/dev/samtools/samtools.1 + + * samtools-0.1.0-54 + * in parser, recognize "=", rather than ",", as a match + * in parser, correctl parse "=" at the MRNM field. + +------------------------------------------------------------------------ +r38 | lh3lh3 | 2008-12-09 11:39:07 +0000 (Tue, 09 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/misc/maq2sam.c + +fixed a bug in handling maq flag 64 and 192 + +------------------------------------------------------------------------ +r37 | lh3lh3 | 2008-12-09 09:53:46 +0000 (Tue, 09 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/misc/md5fa.c + +also calculate unordered md5sum check + +------------------------------------------------------------------------ +r36 | lh3lh3 | 2008-12-09 09:46:21 +0000 (Tue, 09 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/misc/md5fa.c + +fixed a minor bug when there are space in the sequence + +------------------------------------------------------------------------ +r35 | lh3lh3 | 2008-12-09 09:40:45 +0000 (Tue, 09 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/misc/md5fa.c + +fixed a potential memory leak + +------------------------------------------------------------------------ +r34 | lh3lh3 | 2008-12-08 14:52:17 +0000 (Mon, 08 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/bam_import.c + M /branches/dev/samtools/bam_index.c + M /branches/dev/samtools/bamtk.c + + * fixed a bug in import: bin is wrongly calculated + +------------------------------------------------------------------------ +r33 | lh3lh3 | 2008-12-08 14:08:01 +0000 (Mon, 08 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/misc/all2sam.pl + +nothing, really + +------------------------------------------------------------------------ +r32 | lh3lh3 | 2008-12-08 12:56:02 +0000 (Mon, 08 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/Makefile + M /branches/dev/samtools/kseq.h + M /branches/dev/samtools/misc/Makefile + A /branches/dev/samtools/misc/md5.c + A /branches/dev/samtools/misc/md5.h + A /branches/dev/samtools/misc/md5fa.c + + * fixed two warnings in kseq.h + * added md5sum utilities + +------------------------------------------------------------------------ +r31 | lh3lh3 | 2008-12-08 11:35:29 +0000 (Mon, 08 Dec 2008) | 5 lines +Changed paths: + M /branches/dev/samtools/Makefile + M /branches/dev/samtools/bam_import.c + M /branches/dev/samtools/bamtk.c + A /branches/dev/samtools/kseq.h + D /branches/dev/samtools/kstream.h + + * samtools-0.1.0-52 + * replace kstream with kseq. kseq is a superset of kstream. I need the + extra functions in kseq.h. + * also compile stand-alone faidx + +------------------------------------------------------------------------ +r30 | lh3lh3 | 2008-12-08 11:17:04 +0000 (Mon, 08 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/bam_sort.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-51 + * sorting by read names is available + +------------------------------------------------------------------------ +r29 | lh3lh3 | 2008-12-08 10:29:02 +0000 (Mon, 08 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/bam.c + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/bam_import.c + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/bam_pileup.c + M /branches/dev/samtools/bam_sort.c + M /branches/dev/samtools/bam_tview.c + M /branches/dev/samtools/bamtk.c + M /branches/dev/samtools/misc/maq2sam.c + + * samtools-0.1.0-50 + * format change to meet the latest specification + +------------------------------------------------------------------------ +r28 | lh3lh3 | 2008-12-04 16:09:21 +0000 (Thu, 04 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/misc/maq2sam.c + + * minor change in maqcns: special care when n==0 + * change maq2sam to meet the latest specification + +------------------------------------------------------------------------ +r27 | lh3lh3 | 2008-12-04 15:55:44 +0000 (Thu, 04 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/razf.c + M /branches/dev/samtools/razf.h + +considerable code clean up in razf + +------------------------------------------------------------------------ +r26 | lh3lh3 | 2008-12-04 15:08:18 +0000 (Thu, 04 Dec 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/ChangeLog + M /branches/dev/samtools/Makefile + M /branches/dev/samtools/faidx.c + +make RAZF optional in faidx.c + +------------------------------------------------------------------------ +r25 | lh3lh3 | 2008-12-01 15:27:22 +0000 (Mon, 01 Dec 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/Makefile + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/bam_aux.c + M /branches/dev/samtools/bamtk.c + M /branches/dev/samtools/samtools.1 + + * samtools-0.1.0-49 + * added routines for retrieving aux data, NOT TESTED YET! + +------------------------------------------------------------------------ +r24 | lh3lh3 | 2008-12-01 14:29:43 +0000 (Mon, 01 Dec 2008) | 5 lines +Changed paths: + M /branches/dev/samtools/bam.c + M /branches/dev/samtools/bam_import.c + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/bamtk.c + M /branches/dev/samtools/bgzf.c + M /branches/dev/samtools/samtools.1 + + * samtools-0.1.0-48 + * bgzf: fixed a potential integer overflow on 32-it machines + * maqcns: set the minimum combined quality as 0 + * supporting hex strings + +------------------------------------------------------------------------ +r23 | lh3lh3 | 2008-11-27 17:14:37 +0000 (Thu, 27 Nov 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/bam_maqcns.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-47 + * fixed the bug in maqcns + +------------------------------------------------------------------------ +r22 | lh3lh3 | 2008-11-27 17:08:11 +0000 (Thu, 27 Nov 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/Makefile + M /branches/dev/samtools/bam.h + A /branches/dev/samtools/bam_maqcns.c + A /branches/dev/samtools/bam_maqcns.h + M /branches/dev/samtools/bam_tview.c + M /branches/dev/samtools/bamtk.c + A /branches/dev/samtools/glf.h + + * samtools-0.1.0-46 + * add MAQ consensus caller, currently BUGGY! + +------------------------------------------------------------------------ +r21 | lh3lh3 | 2008-11-27 13:51:28 +0000 (Thu, 27 Nov 2008) | 4 lines +Changed paths: + M /branches/dev/samtools/bam_pileup.c + M /branches/dev/samtools/bam_tview.c + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-45 + * tview: display padded alignment (but not P operation) + * better coordinates and reference sequence + +------------------------------------------------------------------------ +r19 | lh3lh3 | 2008-11-27 09:26:05 +0000 (Thu, 27 Nov 2008) | 2 lines +Changed paths: + A /branches/dev/samtools/ChangeLog + +new ChangeLog + +------------------------------------------------------------------------ +r18 | lh3lh3 | 2008-11-27 09:24:45 +0000 (Thu, 27 Nov 2008) | 3 lines +Changed paths: + D /branches/dev/samtools/ChangeLog + A /branches/dev/samtools/ChangeLog.old (from /branches/dev/samtools/ChangeLog:6) + +Rename ChangeLog to ChangeLog.old. This old ChangeLog is generated from +the log of my personal SVN repository. + +------------------------------------------------------------------------ +r17 | lh3lh3 | 2008-11-27 09:22:55 +0000 (Thu, 27 Nov 2008) | 6 lines +Changed paths: + M /branches/dev/samtools/Makefile + M /branches/dev/samtools/bamtk.c + M /branches/dev/samtools/bgzf.c + + * samtools-0.1.0-44 + * declare fseeko and ftello as some Linux may not do this by default and + missing these declarations will make bgzf buggy + * get rid of some harmless warings + * use BGZF by default, now + +------------------------------------------------------------------------ +r16 | lh3lh3 | 2008-11-26 21:19:11 +0000 (Wed, 26 Nov 2008) | 4 lines +Changed paths: + M /branches/dev/samtools/bam_index.c + M /branches/dev/samtools/bamtk.c + M /branches/dev/samtools/razf.c + + * samtools-0.1.0-43 + * fixed a bug in razf_read() + * give more warnings when the file is truncated (or due to bugs in I/O library) + +------------------------------------------------------------------------ +r15 | lh3lh3 | 2008-11-26 20:41:39 +0000 (Wed, 26 Nov 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/bgzf.c + +fixed a bug in bgzf.c at the end of the file + +------------------------------------------------------------------------ +r14 | lh3lh3 | 2008-11-26 17:05:18 +0000 (Wed, 26 Nov 2008) | 4 lines +Changed paths: + M /branches/dev/samtools/bamtk.c + + * samtools-0.1.0-42 + * a lot happened to RAZF, although samtools itself is untouched. Better + also update the version number anyway to avoid confusion + +------------------------------------------------------------------------ +r13 | lh3lh3 | 2008-11-26 17:03:48 +0000 (Wed, 26 Nov 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/razf.c + +a change from Jue, but I think it should not matter + +------------------------------------------------------------------------ +r12 | lh3lh3 | 2008-11-26 16:48:14 +0000 (Wed, 26 Nov 2008) | 3 lines +Changed paths: + M /branches/dev/samtools/razf.c + +fixed a potential bug in razf. However, it seems still buggy, just +rarely happens, very rarely. + +------------------------------------------------------------------------ +r11 | lh3lh3 | 2008-11-26 14:02:56 +0000 (Wed, 26 Nov 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/razf.c + +fixed a bug in razf, with the help of Jue + +------------------------------------------------------------------------ +r10 | lh3lh3 | 2008-11-26 11:55:32 +0000 (Wed, 26 Nov 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/bam_index.c + +remove a comment + +------------------------------------------------------------------------ +r9 | lh3lh3 | 2008-11-26 11:37:05 +0000 (Wed, 26 Nov 2008) | 2 lines +Changed paths: + M /branches/dev/samtools/Makefile + M /branches/dev/samtools/bam.h + M /branches/dev/samtools/razf.c + M /branches/dev/samtools/razf.h + + * Jue has updated razf to realize Bob's scheme + +------------------------------------------------------------------------ +r7 | lh3lh3 | 2008-11-25 20:37:37 +0000 (Tue, 25 Nov 2008) | 2 lines +Changed paths: + A /branches/dev/samtools/samtools.1 + +the manual page + +------------------------------------------------------------------------ +r6 | lh3lh3 | 2008-11-25 20:37:16 +0000 (Tue, 25 Nov 2008) | 3 lines +Changed paths: + A /branches/dev/samtools/ChangeLog + A /branches/dev/samtools/Makefile + A /branches/dev/samtools/bam.c + A /branches/dev/samtools/bam.h + A /branches/dev/samtools/bam_aux.c + A /branches/dev/samtools/bam_endian.h + A /branches/dev/samtools/bam_import.c + A /branches/dev/samtools/bam_index.c + A /branches/dev/samtools/bam_lpileup.c + A /branches/dev/samtools/bam_pileup.c + A /branches/dev/samtools/bam_sort.c + A /branches/dev/samtools/bam_tview.c + A /branches/dev/samtools/bamtk.c + A /branches/dev/samtools/bgzf.c + A /branches/dev/samtools/bgzf.h + A /branches/dev/samtools/bgzip.c + A /branches/dev/samtools/faidx.c + A /branches/dev/samtools/faidx.h + A /branches/dev/samtools/khash.h + A /branches/dev/samtools/ksort.h + A /branches/dev/samtools/kstream.h + A /branches/dev/samtools/misc + A /branches/dev/samtools/misc/Makefile + A /branches/dev/samtools/misc/all2sam.pl + A /branches/dev/samtools/misc/maq2sam.c + A /branches/dev/samtools/razf.c + A /branches/dev/samtools/razf.h + A /branches/dev/samtools/razip.c + A /branches/dev/samtools/zutil.h + +The initial version of samtools, replicated from my local SVN repository. +The current version is: 0.1.0-42. All future development will happen here. + +------------------------------------------------------------------------ +r5 | lh3lh3 | 2008-11-25 20:30:49 +0000 (Tue, 25 Nov 2008) | 2 lines +Changed paths: + A /branches/dev/samtools + +samtools (C version) + +------------------------------------------------------------------------ diff --git a/ChangeLog.old b/ChangeLog.old new file mode 100644 index 0000000..2e1214e --- /dev/null +++ b/ChangeLog.old @@ -0,0 +1,806 @@ +------------------------------------------------------------------------ +r703 | lh3 | 2008-11-25 20:20:02 +0000 (Tue, 25 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/samtools.1 + +rename bamtk to samtools + +------------------------------------------------------------------------ +r702 | lh3 | 2008-11-25 20:15:09 +0000 (Tue, 25 Nov 2008) | 2 lines +Changed paths: + D /branches/prog/bam/bamtk.1 + A /branches/prog/bam/samtools.1 (from /branches/prog/bam/bamtk.1:679) + +rename bamtk.1 to samtools.1 + +------------------------------------------------------------------------ +r701 | lh3 | 2008-11-25 13:29:10 +0000 (Tue, 25 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/misc/Makefile + + * samtools-0.1.0-41 + * small (but a bit dangerous) changes to meet the latest specification + +------------------------------------------------------------------------ +r700 | lh3 | 2008-11-25 13:15:11 +0000 (Tue, 25 Nov 2008) | 2 lines +Changed paths: + A /branches/prog/bam/misc/all2sam.pl (from /branches/prog/bam/misc/all2tam.pl:649) + D /branches/prog/bam/misc/all2tam.pl + A /branches/prog/bam/misc/maq2sam.c (from /branches/prog/bam/misc/maq2tam.c:699) + D /branches/prog/bam/misc/maq2tam.c + +rename tam to sam + +------------------------------------------------------------------------ +r699 | lh3 | 2008-11-25 13:14:49 +0000 (Tue, 25 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/misc/maq2tam.c + +change for the new specification + +------------------------------------------------------------------------ +r698 | lh3 | 2008-11-24 13:15:20 +0000 (Mon, 24 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/razf.c + M /branches/prog/bam/razf.h + + * add a fake BGZF mode to razf. It is fake in that it loads razf index into + memory but gives BGZF like virtual offset + +------------------------------------------------------------------------ +r697 | lh3 | 2008-11-24 09:53:44 +0000 (Mon, 24 Nov 2008) | 2 lines +Changed paths: + A /branches/prog/bam/ChangeLog + +change log + +------------------------------------------------------------------------ +r696 | lh3 | 2008-11-24 09:53:23 +0000 (Mon, 24 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bgzf.c + +updated bgzf, on behalf of Bob + +------------------------------------------------------------------------ +r695 | lh3 | 2008-11-23 11:40:31 +0000 (Sun, 23 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/razf.c + +fixed a bug in razf + +------------------------------------------------------------------------ +r694 | lh3 | 2008-11-22 16:23:52 +0000 (Sat, 22 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_lpileup.c + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bam-0.1.0-40 + * fixed two small memory leaks + * fixed a memory problem when seek outside the length of the sequence + +------------------------------------------------------------------------ +r693 | lh3 | 2008-11-22 16:10:04 +0000 (Sat, 22 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + + * bam-0.1.0-39 + * fixed an uninitialized warning. This does not matter in fact + +------------------------------------------------------------------------ +r692 | lh3 | 2008-11-22 15:44:05 +0000 (Sat, 22 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/razf.c + M /branches/prog/bam/razf.h + +Jue's new razf + +------------------------------------------------------------------------ +r691 | lh3 | 2008-11-21 21:30:39 +0000 (Fri, 21 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/bgzip.c + + * bam-0.1.0-38 + * get rid of some warings in bgzip.c + * potentially improve performance in indexing for BGZF + +------------------------------------------------------------------------ +r690 | lh3 | 2008-11-21 21:15:51 +0000 (Fri, 21 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bgzf.c + +I think I have fixed the bug in bgzf + +------------------------------------------------------------------------ +r689 | lh3 | 2008-11-21 20:48:56 +0000 (Fri, 21 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bgzf.c + +bug fix by Bob + +------------------------------------------------------------------------ +r688 | lh3 | 2008-11-21 20:37:27 +0000 (Fri, 21 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + +fixed a bug due to the name change in _IOLIB + +------------------------------------------------------------------------ +r687 | lh3 | 2008-11-21 14:42:56 +0000 (Fri, 21 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bgzf.c + +fix small things + +------------------------------------------------------------------------ +r686 | lh3 | 2008-11-21 14:37:59 +0000 (Fri, 21 Nov 2008) | 2 lines +Changed paths: + A /branches/prog/bam/bgzf.c + A /branches/prog/bam/bgzf.h + A /branches/prog/bam/bgzip.c + +Bob's BGZF format, although currently buggy + +------------------------------------------------------------------------ +r685 | lh3 | 2008-11-21 09:48:20 +0000 (Fri, 21 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bam-0.1.0-37 + * improve interface a little bit + +------------------------------------------------------------------------ +r684 | lh3 | 2008-11-21 09:30:18 +0000 (Fri, 21 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bam-0.1.0-36 + * improve the interface of tview, a little bit + +------------------------------------------------------------------------ +r683 | lh3 | 2008-11-20 22:33:54 +0000 (Thu, 20 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam_tview.c + +a little better viewer + +------------------------------------------------------------------------ +r682 | lh3 | 2008-11-20 22:27:01 +0000 (Thu, 20 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-35 + * better viewer + +------------------------------------------------------------------------ +r681 | lh3 | 2008-11-20 20:51:16 +0000 (Thu, 20 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-34 + * tview is now a component of bamtk + +------------------------------------------------------------------------ +r680 | lh3 | 2008-11-20 19:17:30 +0000 (Thu, 20 Nov 2008) | 2 lines +Changed paths: + A /branches/prog/bam/bam_tview.c + +text alignment viewer + +------------------------------------------------------------------------ +r679 | lh3 | 2008-11-20 19:17:15 +0000 (Thu, 20 Nov 2008) | 5 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_lpileup.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.1 + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/faidx.c + + * bamtk-0.1.0-33 + * added routines to reset pileup bufferes + * fixed a bug in faidx + * add text alignment viewer + +------------------------------------------------------------------------ +r678 | lh3 | 2008-11-20 11:05:02 +0000 (Thu, 20 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/Makefile + A /branches/prog/bam/bam_lpileup.c (from /branches/prog/bam/bam_tview.c:668) + D /branches/prog/bam/bam_tview.c + +rename tview as lpileup + +------------------------------------------------------------------------ +r677 | lh3 | 2008-11-20 10:08:52 +0000 (Thu, 20 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/razf.c + +fixed a bug in razf + +------------------------------------------------------------------------ +r676 | lh3 | 2008-11-19 22:52:20 +0000 (Wed, 19 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/faidx.h + +add documentations + +------------------------------------------------------------------------ +r674 | lh3 | 2008-11-19 21:39:17 +0000 (Wed, 19 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bamtk.1 + M /branches/prog/bam/faidx.h + +update documentation + +------------------------------------------------------------------------ +r673 | lh3 | 2008-11-19 21:19:03 +0000 (Wed, 19 Nov 2008) | 2 lines +Changed paths: + A /branches/prog/bam/bamtk.1 + +add manual page + +------------------------------------------------------------------------ +r672 | lh3 | 2008-11-19 16:40:49 +0000 (Wed, 19 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/faidx.c + + * bamtk-0.1.0-32 + * make faidx more error resistant + +------------------------------------------------------------------------ +r671 | lh3 | 2008-11-19 16:09:55 +0000 (Wed, 19 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/faidx.h + +add index + +------------------------------------------------------------------------ +r670 | lh3 | 2008-11-19 16:02:39 +0000 (Wed, 19 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/faidx.c + + * bamtk-0.1.0-31 + * show reference sequence in pileup -v (not in the default pileup) + +------------------------------------------------------------------------ +r669 | lh3 | 2008-11-19 14:51:17 +0000 (Wed, 19 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/faidx.c + + * bamtk-0.1.0-30 + * put faidx in bamtk and remove faidx_main.c + +------------------------------------------------------------------------ +r668 | lh3 | 2008-11-19 14:15:05 +0000 (Wed, 19 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + A /branches/prog/bam/faidx.c + A /branches/prog/bam/faidx.h + M /branches/prog/bam/razf.c + + * bamtk-0.1.0-29 + * fixed a bug in tview.c + * prepare to add faidx + +------------------------------------------------------------------------ +r667 | lh3 | 2008-11-19 10:20:45 +0000 (Wed, 19 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/razf.c + M /branches/prog/bam/razf.h + +gzip-compatible razf + +------------------------------------------------------------------------ +r664 | lh3 | 2008-11-18 12:50:23 +0000 (Tue, 18 Nov 2008) | 5 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-28 + * fetch: fixed a bug at an array boundary + * fetch: fixed a bug when the whole chromosome is retrieved + * add linear index + +------------------------------------------------------------------------ +r663 | lh3 | 2008-11-17 21:29:22 +0000 (Mon, 17 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-27 + * put l_qseq into core and move l_aux to bam1_t + +------------------------------------------------------------------------ +r662 | lh3 | 2008-11-17 20:55:16 +0000 (Mon, 17 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-26 + * save seq and qual separately + +------------------------------------------------------------------------ +r661 | lh3 | 2008-11-17 13:09:37 +0000 (Mon, 17 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + +little + +------------------------------------------------------------------------ +r660 | lh3 | 2008-11-17 13:06:14 +0000 (Mon, 17 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + +more documentations + +------------------------------------------------------------------------ +r659 | lh3 | 2008-11-17 12:55:08 +0000 (Mon, 17 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-25 + * make tview work for TAM + +------------------------------------------------------------------------ +r658 | lh3 | 2008-11-17 12:50:21 +0000 (Mon, 17 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-24 + * make tview as an independent module + +------------------------------------------------------------------------ +r657 | lh3 | 2008-11-17 11:26:06 +0000 (Mon, 17 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_pileup.c + +change little + +------------------------------------------------------------------------ +r656 | lh3 | 2008-11-16 21:33:19 +0000 (Sun, 16 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-23 + * also add tview for TAM + +------------------------------------------------------------------------ +r655 | lh3 | 2008-11-16 21:29:46 +0000 (Sun, 16 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-22 + * make tview more efficient for deep depth + +------------------------------------------------------------------------ +r654 | lh3 | 2008-11-16 20:52:19 +0000 (Sun, 16 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_pileup.c + A /branches/prog/bam/bam_tview.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-21 + * fixed bug in the TAM parser: lowercase not recognized + * unfinished function to leveled pileup (tview) + +------------------------------------------------------------------------ +r653 | lh3 | 2008-11-15 12:58:36 +0000 (Sat, 15 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-20 + * pileup now display deleted bases as '*' + +------------------------------------------------------------------------ +r652 | lh3 | 2008-11-15 09:58:39 +0000 (Sat, 15 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-19 + * fixed a bug in fetch() + * reduce memory in indexing + +------------------------------------------------------------------------ +r651 | lh3 | 2008-11-14 21:56:05 +0000 (Fri, 14 Nov 2008) | 5 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-18 + * important changes are made to index: the index size is increased, but + now we have no limit on file sizes and the new method potentially + works with BGZF, Bob's new compression format. + +------------------------------------------------------------------------ +r650 | lh3 | 2008-11-14 16:03:22 +0000 (Fri, 14 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-17 + * more comments in bam.h + * fixed a bug in bam_index.c + +------------------------------------------------------------------------ +r649 | lh3 | 2008-11-13 16:04:18 +0000 (Thu, 13 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bam_sort.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-16 + * use macros to retrieve pointers from bam1_t and thus reduce the size + of bam1_t struct. + +------------------------------------------------------------------------ +r648 | lh3 | 2008-11-13 13:21:39 +0000 (Thu, 13 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_sort.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-15 + * make more things work over pipe + +------------------------------------------------------------------------ +r647 | lh3 | 2008-11-13 12:49:28 +0000 (Thu, 13 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/misc/maq2tam.c + +fixed a bug in maq2tam + +------------------------------------------------------------------------ +r646 | lh3 | 2008-11-13 11:46:59 +0000 (Thu, 13 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/Makefile + M /branches/prog/bam/misc/Makefile + M /branches/prog/bam/misc/maq2tam.c + + * bug fix in maq2tam.c + * improve Makefile + +------------------------------------------------------------------------ +r645 | lh3 | 2008-11-13 11:39:46 +0000 (Thu, 13 Nov 2008) | 3 lines +Changed paths: + A /branches/prog/bam/misc/Makefile + M /branches/prog/bam/misc/maq2tam.c + + * corrected maq2tam + * add Makefile + +------------------------------------------------------------------------ +r644 | lh3 | 2008-11-13 11:25:45 +0000 (Thu, 13 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/razf.c + +fixed the bug in buffered write (on behalf of Jue) + +------------------------------------------------------------------------ +r643 | lh3 | 2008-11-13 10:53:42 +0000 (Thu, 13 Nov 2008) | 2 lines +Changed paths: + D /branches/prog/bam/all2tam.pl + A /branches/prog/bam/misc/all2tam.pl (from /branches/prog/bam/all2tam.pl:642) + +move to misc + +------------------------------------------------------------------------ +r642 | lh3 | 2008-11-13 10:53:23 +0000 (Thu, 13 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/all2tam.pl + +change tag + +------------------------------------------------------------------------ +r641 | lh3 | 2008-11-13 10:53:12 +0000 (Thu, 13 Nov 2008) | 2 lines +Changed paths: + D /branches/prog/bam/utils + +has been renamed + +------------------------------------------------------------------------ +r640 | lh3 | 2008-11-13 10:52:50 +0000 (Thu, 13 Nov 2008) | 2 lines +Changed paths: + A /branches/prog/bam/misc (from /branches/prog/bam/utils:639) + +rename + +------------------------------------------------------------------------ +r639 | lh3 | 2008-11-13 10:52:35 +0000 (Thu, 13 Nov 2008) | 2 lines +Changed paths: + A /branches/prog/bam/utils + A /branches/prog/bam/utils/maq2tam.c + +utilities (converters and so on) + +------------------------------------------------------------------------ +r638 | lh3 | 2008-11-12 22:24:22 +0000 (Wed, 12 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-14 + * copy the text header to BAM + * add BAM1 header flag + +------------------------------------------------------------------------ +r637 | lh3 | 2008-11-12 14:56:08 +0000 (Wed, 12 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/razf.c + + * bamtk-0.1.0-13 + * fixed a bug in razf + * improved and fixed potential bugs in index + +------------------------------------------------------------------------ +r636 | lh3 | 2008-11-12 11:57:13 +0000 (Wed, 12 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + +update documentation in the HeaderDOC format + +------------------------------------------------------------------------ +r635 | lh3 | 2008-11-12 10:08:38 +0000 (Wed, 12 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-12 + * more documentations + * rename baf1_core_t as bam1_core_t + +------------------------------------------------------------------------ +r634 | lh3 | 2008-11-11 23:00:35 +0000 (Tue, 11 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_pileup.c + +documentation + +------------------------------------------------------------------------ +r633 | lh3 | 2008-11-11 21:23:49 +0000 (Tue, 11 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-11 + * give up regional pileup. We can now use pipe to mimic that. + * for index file, change suffix .idx to .bmi + +------------------------------------------------------------------------ +r632 | lh3 | 2008-11-11 21:00:11 +0000 (Tue, 11 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/razf.c + + * bamtk-0.1.0-10 + * make pileup work on TAM + +------------------------------------------------------------------------ +r631 | lh3 | 2008-11-11 09:20:29 +0000 (Tue, 11 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/razf.c + M /branches/prog/bam/razf.h + M /branches/prog/bam/razip.c + + * bamtk-0.1.0-9 + * razf now supports streaming + * prepare to improve pileup (have not yet) + +------------------------------------------------------------------------ +r630 | lh3 | 2008-11-10 18:34:40 +0000 (Mon, 10 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-8 + * improve the interface of TAM parser + +------------------------------------------------------------------------ +r629 | lh3 | 2008-11-10 13:06:13 +0000 (Mon, 10 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-7 + * almost nothing + +------------------------------------------------------------------------ +r628 | lh3 | 2008-11-10 12:56:36 +0000 (Mon, 10 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-6 + * fixed a bug in bam_pileup.c + +------------------------------------------------------------------------ +r627 | lh3 | 2008-11-10 11:32:46 +0000 (Mon, 10 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bamtk.c + M /branches/prog/bam/razf.c + + * bamtk-0.1.0-5 + * fixed a bug in razf.c, caused by my modifications + * improve the interface of pileup. Now it will be slower but more flexible + +------------------------------------------------------------------------ +r626 | lh3 | 2008-11-09 20:51:04 +0000 (Sun, 09 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.h + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-4 + * view: dumping binary output + +------------------------------------------------------------------------ +r625 | lh3 | 2008-11-09 20:31:54 +0000 (Sun, 09 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bam_pileup.c + M /branches/prog/bam/bam_sort.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-3 + * rename functions + +------------------------------------------------------------------------ +r624 | lh3 | 2008-11-09 15:07:32 +0000 (Sun, 09 Nov 2008) | 2 lines +Changed paths: + M /branches/prog/bam/bam.h + +add comments + +------------------------------------------------------------------------ +r623 | lh3 | 2008-11-08 22:32:49 +0000 (Sat, 08 Nov 2008) | 4 lines +Changed paths: + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-2 + * improve indexing for a mixture of long and short reads, although currently + I do not know whether it really works... + +------------------------------------------------------------------------ +r622 | lh3 | 2008-11-08 22:13:58 +0000 (Sat, 08 Nov 2008) | 3 lines +Changed paths: + M /branches/prog/bam/bam_index.c + M /branches/prog/bam/bamtk.c + + * bamtk-0.1.0-1 + * prepare for improving indexing algorithm + +------------------------------------------------------------------------ +r621 | lh3 | 2008-11-08 20:28:09 +0000 (Sat, 08 Nov 2008) | 4 lines +Changed paths: + A /branches/prog/bam/all2tam.pl + M /branches/prog/bam/bam.c + M /branches/prog/bam/bam.h + M /branches/prog/bam/bam_import.c + M /branches/prog/bam/bamtk.c + D /branches/prog/bam/tam_utils.pl + + * bamtk-0.1.0 + * smarter integers + * rename tam_utils.pl to all2tam.pl + +------------------------------------------------------------------------ +r620 | lh3 | 2008-11-08 17:17:22 +0000 (Sat, 08 Nov 2008) | 2 lines +Changed paths: + A /branches/prog/bam + A /branches/prog/bam/Makefile + A /branches/prog/bam/bam.c + A /branches/prog/bam/bam.h + A /branches/prog/bam/bam_endian.h + A /branches/prog/bam/bam_import.c + A /branches/prog/bam/bam_index.c + A /branches/prog/bam/bam_pileup.c + A /branches/prog/bam/bam_sort.c + A /branches/prog/bam/bamtk.c + A /branches/prog/bam/khash.h + A /branches/prog/bam/ksort.h + A /branches/prog/bam/kstream.h + A /branches/prog/bam/razf.c + A /branches/prog/bam/razf.h + A /branches/prog/bam/razip.c + A /branches/prog/bam/tam_utils.pl + A /branches/prog/bam/zutil.h + +The Binary Alignment/Mapping format. + +------------------------------------------------------------------------ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..32e4c41 --- /dev/null +++ b/Makefile @@ -0,0 +1,62 @@ +CC= gcc +CXX= g++ +CFLAGS= -g -Wall -O2 -m64 #-arch ppc +CXXFLAGS= $(CFLAGS) +DFLAGS= -D_IOLIB=2 -D_FILE_OFFSET_BITS=64 -DHAVE_RAZF #-D_NO_CURSES +OBJS= bam.o bam_import.o bam_pileup.o bam_lpileup.o bam_sort.o bam_index.o \ + razf.o bgzf.o faidx.o bam_tview.o bam_maqcns.o bam_aux.o bam_plcmd.o +PROG= razip bgzip samtools +INCLUDES= +LIBS= -lm -lz +SUBDIRS= . misc + +.SUFFIXES:.c .o + +.c.o: + $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ + +all-recur lib-recur clean-recur cleanlocal-recur install-recur: + @target=`echo $@ | sed s/-recur//`; \ + wdir=`pwd`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + cd $$subdir; \ + $(MAKE) CC="$(CC)" CXX="$(CXX)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ + INCLUDES="$(INCLUDES)" $$target || exit 1; \ + cd $$wdir; \ + done; + +all:$(PROG) + +lib:libbam.a + +libbam.a:$(OBJS) + $(AR) -cru $@ $(OBJS) + +samtools:lib bamtk.o + $(CC) $(CFLAGS) -o $@ bamtk.o $(LIBS) -L. -lbam -lcurses + +razip:razip.o razf.o + $(CC) $(CFLAGS) -o $@ razf.o razip.o $(LIBS) + +bgzip:bgzip.o bgzf.o + $(CC) $(CFLAGS) -o $@ bgzf.o bgzip.o $(LIBS) + +razip.o:razf.h +bam.o:bam.h razf.h bam_endian.h +bam_import.o:bam.h kseq.h khash.h razf.h +bam_pileup.o:bam.h razf.h ksort.h +bam_plcmd.o:bam.h faidx.h bam_maqcns.h +bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h +bam_lpileup.o:bam.h ksort.h +bam_tview.o:bam.h faidx.h bam_maqcns.h +bam_maqcns.o:bam.h ksort.h bam_maqcns.h +bam_sort.o:bam.h ksort.h razf.h +razf.o:razf.h + +faidx.o:faidx.h razf.h khash.h +faidx_main.o:faidx.h razf.h + +cleanlocal: + rm -fr gmon.out *.o a.out *.dSYM $(PROG) *~ *.a + +clean:cleanlocal-recur diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..1ed90ab --- /dev/null +++ b/NEWS @@ -0,0 +1,6 @@ +Beta Release 0.1.1 (22 December, 2008) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The is the first public release of samtools. For more information, +please check the manual page `samtools.1' and the samtools website +http://samtools.sourceforge.net \ No newline at end of file diff --git a/bam.c b/bam.c new file mode 100644 index 0000000..6ccca7c --- /dev/null +++ b/bam.c @@ -0,0 +1,274 @@ +#include +#include +#include "bam.h" +#include "bam_endian.h" + +int bam_is_be = 0; + +/************************** + * CIGAR related routines * + **************************/ + +int bam_segreg(int32_t pos, const bam1_core_t *c, const uint32_t *cigar, bam_segreg_t *reg) +{ + unsigned k; + int32_t x = c->pos, y = 0; + int state = 0; + for (k = 0; k < c->n_cigar; ++k) { + int op = cigar[k] & BAM_CIGAR_MASK; // operation + int l = cigar[k] >> BAM_CIGAR_SHIFT; // length + if (state == 0 && (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CINS) && x + l > pos) { + reg->tbeg = x; reg->qbeg = y; reg->cbeg = k; + state = 1; + } + if (op == BAM_CMATCH) { x += l; y += l; } + else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l; + else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; + if (state == 1 && (op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP || op == BAM_CREF_SKIP || k == c->n_cigar - 1)) { + reg->tend = x; reg->qend = y; reg->cend = k; + } + } + return state? 0 : -1; +} + +uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar) +{ + uint32_t k, end; + end = c->pos; + for (k = 0; k < c->n_cigar; ++k) { + int op = cigar[k] & BAM_CIGAR_MASK; + if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP) + end += cigar[k] >> BAM_CIGAR_SHIFT; + } + return end; +} + +int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar) +{ + uint32_t k; + int32_t l = 0; + for (k = 0; k < c->n_cigar; ++k) { + int op = cigar[k] & BAM_CIGAR_MASK; + if (op == BAM_CMATCH || op == BAM_CINS || op == BAM_CSOFT_CLIP) + l += cigar[k] >> BAM_CIGAR_SHIFT; + } + return l; +} + +/******************** + * BAM I/O routines * + ********************/ + +bam_header_t *bam_header_init() +{ + bam_is_be = bam_is_big_endian(); + return (bam_header_t*)calloc(1, sizeof(bam_header_t)); +} + +void bam_header_destroy(bam_header_t *header) +{ + int32_t i; + extern void bam_destroy_header_hash(bam_header_t *header); + if (header == 0) return; + if (header->target_name) { + for (i = 0; i < header->n_targets; ++i) + free(header->target_name[i]); + free(header->target_name); + free(header->target_len); + } + free(header->text); +#ifndef BAM_NO_HASH + bam_destroy_header_hash(header); +#endif + free(header); +} + +bam_header_t *bam_header_read(bamFile fp) +{ + bam_header_t *header; + char buf[4]; + int32_t i, name_len; + // read "BAM1" + if (bam_read(fp, buf, 4) != 4) return 0; + if (strncmp(buf, "BAM\001", 4)) { + fprintf(stderr, "[bam_header_read] wrong header\n"); + return 0; + } + header = bam_header_init(); + // read plain text and the number of reference sequences + bam_read(fp, &header->l_text, 4); + if (bam_is_be) bam_swap_endian_4p(&header->l_text); + header->text = (char*)calloc(header->l_text + 1, 1); + bam_read(fp, header->text, header->l_text); + bam_read(fp, &header->n_targets, 4); + if (bam_is_be) bam_swap_endian_4p(&header->n_targets); + assert(header->n_targets > 0); + // read reference sequence names and lengths + header->target_name = (char**)calloc(header->n_targets, sizeof(char*)); + header->target_len = (uint32_t*)calloc(header->n_targets, 4); + for (i = 0; i != header->n_targets; ++i) { + bam_read(fp, &name_len, 4); + if (bam_is_be) bam_swap_endian_4p(&name_len); + header->target_name[i] = (char*)calloc(name_len, 1); + bam_read(fp, header->target_name[i], name_len); + bam_read(fp, &header->target_len[i], 4); + if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]); + } + return header; +} + +int bam_header_write(bamFile fp, const bam_header_t *header) +{ + char buf[4]; + int32_t i, name_len, x; + // write "BAM1" + strncpy(buf, "BAM\001", 4); + bam_write(fp, buf, 4); + // write plain text and the number of reference sequences + if (bam_is_be) { + x = bam_swap_endian_4(header->l_text); + bam_write(fp, &x, 4); + if (header->l_text) bam_write(fp, header->text, header->l_text); + x = bam_swap_endian_4(header->n_targets); + bam_write(fp, &x, 4); + } else { + bam_write(fp, &header->l_text, 4); + if (header->l_text) bam_write(fp, header->text, header->l_text); + bam_write(fp, &header->n_targets, 4); + } + // write sequence names and lengths + for (i = 0; i != header->n_targets; ++i) { + char *p = header->target_name[i]; + name_len = strlen(p) + 1; + if (bam_is_be) { + x = bam_swap_endian_4(name_len); + bam_write(fp, &x, 4); + } else bam_write(fp, &name_len, 4); + bam_write(fp, p, name_len); + if (bam_is_be) { + x = bam_swap_endian_4(header->target_len[i]); + bam_write(fp, &x, 4); + } else bam_write(fp, &header->target_len[i], 4); + } + return 0; +} + +static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data) +{ + uint8_t *s; + uint32_t i, *cigar = (uint32_t*)(data + c->l_qname); + s = data + c->n_cigar*4 + c->l_qname + c->l_qseq + (c->l_qseq + 1)/2; + for (i = 0; i < c->n_cigar; ++i) bam_swap_endian_4p(&cigar[i]); + while (s < data + data_len) { + uint8_t type; + s += 2; // skip key + type = toupper(*s); ++s; // skip type + if (type == 'C' || type == 'A') ++s; + else if (type == 'S') { bam_swap_endian_2p(s); s += 2; } + else if (type == 'I' || type == 'F') { bam_swap_endian_4p(s); s += 4; } + else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; } + } +} + +int bam_read1(bamFile fp, bam1_t *b) +{ + bam1_core_t *c = &b->core; + int32_t block_len, ret, i; + uint32_t x[8]; + + assert(BAM_CORE_SIZE == 32); + if ((ret = bam_read(fp, &block_len, 4)) != 4) { + if (ret == 0) return -1; // normal end-of-file + else return -2; // truncated + } + if (bam_read(fp, x, BAM_CORE_SIZE) != BAM_CORE_SIZE) return -3; + if (bam_is_be) { + bam_swap_endian_4p(&block_len); + for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i); + } + c->tid = x[0]; c->pos = x[1]; + c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff; + c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff; + c->l_qseq = x[4]; + c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7]; + b->data_len = block_len - BAM_CORE_SIZE; + if (b->m_data < b->data_len) { + b->m_data = b->data_len; + kroundup32(b->m_data); + b->data = (uint8_t*)realloc(b->data, b->m_data); + } + if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4; + b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2; + if (bam_is_be) swap_endian_data(c, b->data_len, b->data); + return 4 + block_len; +} + +inline int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data) +{ + uint32_t x[8], block_len = data_len + BAM_CORE_SIZE, y; + int i; + assert(BAM_CORE_SIZE == 32); + x[0] = c->tid; + x[1] = c->pos; + x[2] = (uint32_t)c->bin<<16 | c->qual<<8 | c->l_qname; + x[3] = (uint32_t)c->flag<<16 | c->n_cigar; + x[4] = c->l_qseq; + x[5] = c->mtid; + x[6] = c->mpos; + x[7] = c->isize; + if (bam_is_be) { + for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i); + y = block_len; + bam_write(fp, bam_swap_endian_4p(&y), 4); + swap_endian_data(c, data_len, data); + } else bam_write(fp, &block_len, 4); + bam_write(fp, x, BAM_CORE_SIZE); + bam_write(fp, data, data_len); + if (bam_is_be) swap_endian_data(c, data_len, data); + return 4 + block_len; +} + +int bam_write1(bamFile fp, const bam1_t *b) +{ + return bam_write1_core(fp, &b->core, b->data_len, b->data); +} + +void bam_view1(const bam_header_t *header, const bam1_t *b) +{ + uint8_t *s = bam1_seq(b), *t = bam1_qual(b); + int i; + const bam1_core_t *c = &b->core; + printf("%s\t%d\t", bam1_qname(b), c->flag); + if (c->tid < 0) printf("*\t"); + else printf("%s\t", header->target_name[c->tid]); + printf("%d\t%d\t", c->pos + 1, c->qual); + if (c->n_cigar == 0) putchar('*'); + else { + for (i = 0; i < c->n_cigar; ++i) + printf("%d%c", bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, "MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK]); + } + putchar('\t'); + if (c->mtid < 0) printf("*\t"); + else printf("%s\t", header->target_name[c->mtid]); + printf("%d\t%d\t", c->mpos + 1, c->isize); + for (i = 0; i < c->l_qseq; ++i) putchar(bam_nt16_rev_table[bam1_seqi(s, i)]); + putchar('\t'); + for (i = 0; i < c->l_qseq; ++i) putchar(t[i] + 33); + s = bam1_aux(b); + while (s < b->data + b->data_len) { + uint8_t type, key[2]; + key[0] = s[0]; key[1] = s[1]; + s += 2; type = *s; ++s; + printf("\t%c%c:", key[0], key[1]); + if (type == 'A') { printf("A:%c", *s); ++s; } + else if (type == 'C') { printf("i:%u", *s); ++s; } + else if (type == 'c') { printf("i:%d", *s); ++s; } + else if (type == 'S') { printf("i:%u", *(uint16_t*)s); s += 2; } + else if (type == 's') { printf("i:%d", *(int16_t*)s); s += 2; } + else if (type == 'I') { printf("i:%u", *(uint32_t*)s); s += 4; } + else if (type == 'i') { printf("i:%d", *(int32_t*)s); s += 4; } + else if (type == 'f') { printf("f:%g", *(float*)s); s += 4; } + else if (type == 'Z' || type == 'H') { printf("%c:", type); while (*s) putchar(*s++); ++s; } + } + putchar('\n'); +} diff --git a/bam.h b/bam.h new file mode 100644 index 0000000..4b3a688 --- /dev/null +++ b/bam.h @@ -0,0 +1,659 @@ +/* The MIT License + + Copyright (c) 2008 Genome Research Ltd (GRL). + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* Contact: Heng Li */ + +#ifndef BAM_BAM_H +#define BAM_BAM_H + +/*! + @header + + BAM library provides I/O and various operations on manipulating files + in the BAM (Binary Alignment/Mapping) or TAM (Text Alignment/Mapping) + format. It now supports importing from or exporting to TAM, sorting, + merging, generating pileup, and quickly retrieval of reads overlapped + with a specified region. + + @copyright Genome Research Ltd. + */ + +#include +#include +#include +#include +#include + +#if _IOLIB == 1 +#define BAM_TRUE_OFFSET +#include "razf.h" +/*! @abstract BAM file handler */ +typedef RAZF *bamFile; +#define bam_open(fn, mode) razf_open(fn, mode) +#define bam_dopen(fd, mode) razf_dopen(fd, mode) +#define bam_close(fp) razf_close(fp) +#define bam_read(fp, buf, size) razf_read(fp, buf, size) +#define bam_write(fp, buf, size) razf_write(fp, buf, size) +#define bam_tell(fp) razf_tell(fp) +#define bam_seek(fp, pos, dir) razf_seek(fp, pos, dir) +#elif _IOLIB == 2 +#define BAM_VIRTUAL_OFFSET16 +#include "bgzf.h" +/*! @abstract BAM file handler */ +typedef BGZF *bamFile; +#define bam_open(fn, mode) bgzf_open(fn, mode) +#define bam_dopen(fd, mode) bgzf_fdopen(fd, mode) +#define bam_close(fp) bgzf_close(fp) +#define bam_read(fp, buf, size) bgzf_read(fp, buf, size) +#define bam_write(fp, buf, size) bgzf_write(fp, buf, size) +#define bam_tell(fp) bgzf_tell(fp) +#define bam_seek(fp, pos, dir) bgzf_seek(fp, pos, dir) +#elif _IOLIB == 3 +#define BAM_VIRTUAL_OFFSET16 +#include "razf.h" +/*! @abstract BAM file handler */ +typedef RAZF *bamFile; +#define bam_open(fn, mode) razf_open2(fn, mode) +#define bam_dopen(fd, mode) razf_dopen2(fd, mode) +#define bam_close(fp) razf_close(fp) +#define bam_read(fp, buf, size) razf_read(fp, buf, size) +#define bam_write(fp, buf, size) razf_write(fp, buf, size) +#define bam_tell(fp) razf_tell2(fp) +#define bam_seek(fp, pos, dir) razf_seek2(fp, pos, dir) +#endif + +/*! @typedef + @abstract Structure for the alignment header. + @field n_targets number of reference sequences + @field target_name names of the reference sequences + @field target_len lengths of the referene sequences + @field hash hash table for fast name lookup + @field l_text length of the plain text in the header + @field text plain text + + @discussion Field hash points to null by default. It is a private + member. + */ +typedef struct { + int32_t n_targets; + char **target_name; + uint32_t *target_len; + void *hash; + int l_text; + char *text; +} bam_header_t; + +/*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */ +#define BAM_FPAIRED 1 +/*! @abstract the read is mapped in a proper pair */ +#define BAM_FPROPER_PAIR 2 +/*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */ +#define BAM_FUNMAP 4 +/*! @abstract the mate is unmapped */ +#define BAM_FMUNMAP 8 +#define BAM_FREVERSE 16 +#define BAM_FMREVERSE 32 +#define BAM_FREAD1 64 +#define BAM_FREAD2 128 +#define BAM_FSECONDARY 256 + +#define BAM_CORE_SIZE sizeof(bam1_core_t) + +/** + * Describing how CIGAR operation/length is packed in a 32-bit integer. + */ +#define BAM_CIGAR_SHIFT 4 +#define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1) + +/* + CIGAR operations. + */ +/*! @abstract CIGAR: match */ +#define BAM_CMATCH 0 +/*! @abstract CIGAR: insertion to the reference */ +#define BAM_CINS 1 +/*! @abstract CIGAR: deletion from the reference */ +#define BAM_CDEL 2 +/*! @abstract CIGAR: skip on the reference (e.g. spliced alignment) */ +#define BAM_CREF_SKIP 3 +/*! @abstract CIGAR: clip on the read with clipped sequence present in qseq */ +#define BAM_CSOFT_CLIP 4 +/*! @abstract CIGAR: clip on the read with clipped sequence trimmed off */ +#define BAM_CHARD_CLIP 5 +/*! @abstract CIGAR: padding */ +#define BAM_CPAD 6 + +/*! @typedef + @abstract Structure for core alignment information. + @field tid chromosome ID, defined by bam_header_t + @field pos 0-based leftmost coordinate + @field strand strand; 0 for forward and 1 otherwise + @field bin bin calculated by bam_reg2bin() + @field qual mapping quality + @field l_qname length of the query name + @field flag bitwise flag + @field n_cigar number of CIGAR operations + @field l_qseq length of the query sequence (read) + */ +typedef struct { + int32_t tid; + int32_t pos; + uint32_t bin:16, qual:8, l_qname:8; + uint32_t flag:16, n_cigar:16; + int32_t l_qseq; + int32_t mtid; + int32_t mpos; + int32_t isize; +} bam1_core_t; + +/*! @typedef + @abstract Structure for one alignment. + @field core core information about the alignment + @field l_aux length of auxiliary data + @field data_len current length of bam1_t::data + @field m_data maximum length of bam1_t::data + @field data all variable-length data, concatenated; structure: cigar-qname-seq-qual-aux + @field hash hash table for fast retrieval of tag-value pairs; private + + @discussion Notes: + + 1. qname is zero tailing and core.l_qname includes the tailing '\0'. + 2. l_qseq is calculated from the total length of an alignment block + on reading or from CIGAR. + */ +typedef struct { + bam1_core_t core; + int l_aux, data_len, m_data; + uint8_t *data; + void *hash; +} bam1_t; + +#define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0) +#define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0) + +/*! @function + @abstract Get the CIGAR array + @param b pointer to an alignment + @return pointer to the CIGAR array + + @discussion In the CIGAR array, each element is a 32-bit integer. The + lower 4 bits gives a CIGAR operation and the higher 28 bits keep the + length of a CIGAR. + */ +#define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname)) + +/*! @function + @abstract Get the name of the query + @param b pointer to an alignment + @return pointer to the name string, null terminated + */ +#define bam1_qname(b) ((char*)((b)->data)) + +/*! @function + @abstract Get query sequence + @param b pointer to an alignment + @return pointer to sequence + + @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G, + 8 for T and 15 for N. Two bases are packed in one byte with the base + at the higher 4 bits having smaller coordinate on the read. It is + recommended to use bam1_seqi() macro to get the base. + */ +#define bam1_seq(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname) + +/*! @function + @abstract Get query quality + @param b pointer to an alignment + @return pointer to quality string + */ +#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + ((b)->core.l_qseq + 1)/2) + +/*! @function + @abstract Get a base on read + @param s Query sequence returned by bam1_seq() + @param i The i-th position, 0-based + @return 4-bit integer representing the base. + */ +#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf) + +/*! @function + @abstract Get query sequence and quality + @param b pointer to an alignment + @return pointer to the concatenated auxiliary data + */ +#define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2) + +typedef struct { + int32_t qbeg, qend; + int32_t tbeg, tend; + int32_t cbeg, cend; +} bam_segreg_t; + +#ifndef kroundup32 +/*! @function + @abstract Round an integer to the next closest power-2 integer. + @param x integer to be rounded (in place) + @discussion x will be modified. + */ +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +/*! + @abstract Whether the machine is big-endian; modified only in + bam_header_init(). + */ +extern int bam_is_be; + +/*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */ +extern unsigned char bam_nt16_table[256]; + +/*! @abstract Table for converting a 4-bit encoded nucleotide to a letter. */ +extern char *bam_nt16_rev_table; + +extern char bam_nt16_nt4_table[]; + +#ifdef __cplusplus +extern "C" { +#endif + + /*! @abstract TAM file handler */ + typedef struct __tamFile_t *tamFile; + + /*! + @abstract Open a TAM file, either uncompressed or compressed by gzip/zlib. + @param fn TAM file name + @return TAM file handler + */ + tamFile sam_open(const char *fn); + + /*! + @abstract Close a TAM file handler + @param fp TAM file handler + */ + void sam_close(tamFile fp); + + /*! + @abstract Read one alignment from a TAM file handler + @param fp TAM file handler + @param header header information (ordered names of chromosomes) + @param b read alignment; all members in b will be updated + @return 0 if successful; otherwise negative + */ + int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b); + + /*! + @abstract Read header information from a TAB-delimited list file. + @param fn_list file name for the list + @return a pointer to the header structure + + @discussion Each line in this file consists of chromosome name and + the length of chromosome. + */ + bam_header_t *sam_header_read2(const char *fn_list); + +#define sam_write1(header, b) bam_view1(header, b) + + /*! + @abstract Initialize a header structure. + @return the pointer to the header structure + + @discussion This function also modifies the global variable + bam_is_be. + */ + bam_header_t *bam_header_init(); + + /*! + @abstract Destroy a header structure. + @param header pointer to the header + */ + void bam_header_destroy(bam_header_t *header); + + /*! + @abstract Read a header structure from BAM. + @param fp BAM file handler, opened by bam_open() + @return pointer to the header structure + + @discussion The file position indicator must be placed at the + beginning of the file. Upon success, the position indicator will + be set at the start of the first alignment. + */ + bam_header_t *bam_header_read(bamFile fp); + + /*! + @abstract Write a header structure to BAM. + @param fp BAM file handler + @param header pointer to the header structure + @return always 0 currently + */ + int bam_header_write(bamFile fp, const bam_header_t *header); + + /*! + @abstract Read an alignment from BAM. + @param fp BAM file handler + @param b read alignment; all members are updated. + @return number of bytes read from the file + + @discussion The file position indicator must be + placed right before an alignment. Upon success, this function + will set the position indicator to the start of the next + alignment. This function is not affected by the machine + endianness. + */ + int bam_read1(bamFile fp, bam1_t *b); + + /*! + @abstract Write an alignment to BAM. + @param fp BAM file handler + @param c pointer to the bam1_core_t structure + @param data_len total length of variable size data related to + the alignment + @param data pointer to the concatenated data + @return number of bytes written to the file + + @discussion This function is not affected by the machine + endianness. + */ + int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data); + + /*! + @abstract Write an alignment to BAM. + @param fp BAM file handler + @param b alignment to write + @return number of bytes written to the file + + @abstract It is equivalent to: + bam_write1_core(fp, &b->core, b->data_len, b->data) + */ + int bam_write1(bamFile fp, const bam1_t *b); + + /*! @function + @abstract Initiate a pointer to bam1_t struct + */ +#define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t))) + + /*! @function + @abstract Free the memory allocated for an alignment. + @param b pointer to an alignment + */ +#define bam_destroy1(b) do { \ + if ((b)->hash) bam_aux_destroy(b); free((b)->data); free(b); \ + } while (0) + + /*! + @abstract Print an alignment to the standard output in TAM format. + @param header pointer to the header structure + @param b alignment to print + */ + void bam_view1(const bam_header_t *header, const bam1_t *b); + + /*! + @abstract Merge multiple sorted BAM. + @param is_by_qname whether to sort by query name + @param out output BAM file name + @param n number of files to be merged + @param fn names of files to be merged + + @discussion Padding information may NOT correctly maintained. This + function is NOT thread safe. + */ + void bam_merge_core(int is_by_qname, const char *out, int n, char * const *fn); + + /*! + @abstract Sort an unsorted BAM file based on the chromosome order + and the leftmost position of an alignment + + @param is_by_qname whether to sort by query name + @param fn name of the file to be sorted + @param prefix prefix of the output and the temporary files; upon + sucessess, prefix.bam will be written. + @param max_mem approxiate maximum memory (very inaccurate) + + @discussion It may create multiple temporary subalignment files + and then merge them by calling bam_merge_core(). This function is + NOT thread safe. + */ + void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem); + + /*! @typedef + @abstract Structure for one alignment covering the pileup position. + @field b pointer to the alignment + @field qpos position of the read base at the pileup site, 0-based + @field indel indel length; 0 for no indel, positive for ins and negative for del + @field is_del 1 iff the base on the padded read is a deletion + @field level the level of the read in the "viewer" mode + + @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The + difference between the two functions is that the former does not + set bam_pileup1_t::level, while the later does. Level helps the + implementation of alignment viewers, but calculating this has some + overhead. + */ + typedef struct { + bam1_t *b; + int32_t qpos; + int indel, level; + uint32_t is_del:1, is_head:1, is_tail:1; + } bam_pileup1_t; + + struct __bam_plbuf_t; + /*! @abstract pileup buffer */ + typedef struct __bam_plbuf_t bam_plbuf_t; + + /*! @typedef + @abstract Type of function to be called by bam_plbuf_push(). + @param tid chromosome ID as is defined in the header + @param pos start coordinate of the alignment, 0-based + @param n number of elements in pl array + @param pl array of alignments + @param data user provided data + @discussion See also bam_plbuf_push(), bam_plbuf_init() and bam_pileup1_t. + */ + typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); + + void bam_plbuf_reset(bam_plbuf_t *buf); + + /*! + @abstract Initialize a buffer for pileup. + @param func fucntion to be called by bam_pileup_core() + @param data user provided data + @return pointer to the pileup buffer + */ + bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data); + + /*! + @abstract Destroy a pileup buffer. + @param buf pointer to the pileup buffer + */ + void bam_plbuf_destroy(bam_plbuf_t *buf); + + /*! + @abstract Push an alignment to the pileup buffer. + @param b alignment to be pushed + @param buf pileup buffer + @see bam_plbuf_init() + @return always 0 currently + + @discussion If all the alignments covering a particular site have + been collected, this function will call the user defined function + as is provided to bam_plbuf_init(). The coordinate of the site the + all the alignments will be transferred to the user defined + function as function parameters. + + When all the alignments are pushed to the buffer, this function + needs to be called with b equal to NULL. This will flush the + buffer. A pileup buffer cannot be reused. + */ + int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf); + + /*! + @abstract A more convenient interface to bam_plbuf_push() + @param fp BAM file handler + @param func user defined function + @param func_data user provided data + + @discussion The file position indicator must be placed right + before the start of an alignment. See also bam_plbuf_push(). + */ + int bam_pileup_file(bamFile fp, bam_pileup_f func, void *func_data); + + struct __bam_lplbuf_t; + typedef struct __bam_lplbuf_t bam_lplbuf_t; + + void bam_lplbuf_reset(bam_lplbuf_t *buf); + + /*! @abstract bam_plbuf_init() equivalent with level calculated. */ + bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data); + + /*! @abstract bam_plbuf_destroy() equivalent with level calculated. */ + void bam_lplbuf_destroy(bam_lplbuf_t *tv); + + /*! @abstract bam_plbuf_push() equivalent with level calculated. */ + int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf); + + /*! @abstract bam_plbuf_file() equivalent with level calculated. */ + int bam_lpileup_file(bamFile fp, bam_pileup_f func, void *func_data); + + struct __bam_index_t; + typedef struct __bam_index_t bam_index_t; + + /*! + @abstract Build index for a BAM file. + @discussion Index file "fn.bai" will be created. + @param fn name of the BAM file + @return always 0 currently + */ + int bam_index_build(const char *fn); + + /*! + @abstract Load index from file "fn.bai". + @param fn name of the BAM file (NOT the index file) + @return pointer to the index structure + */ + bam_index_t *bam_index_load(const char *fn); + + /*! + @abstract Destroy an index structure. + @param idx pointer to the index structure + */ + void bam_index_destroy(bam_index_t *idx); + + /*! @typedef + @abstract Type of function to be called by bam_fetch(). + @param b the alignment + @param data user provided data + */ + typedef int (*bam_fetch_f)(const bam1_t *b, void *data); + + /*! + @abstract Retrieve the alignments that are overlapped with the + specified region. + + @discussion A user defined function will be called for each + retrieved alignment ordered by its start position. + + @param fp BAM file handler + @param idx pointer to the alignment index + @param tid chromosome ID as is defined in the header + @param beg start coordinate, 0-based + @param end end coordinate, 0-based + @param data user provided data (will be transferred to func) + @param func user defined function + */ + int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func); + + /*! + @abstract Parse a region in the format: "chr2:100,000-200,000". + @discussion bam_header_t::hash will be initialized if empty. + @param header pointer to the header structure + @param str string to be parsed + @param ref_id the returned chromosome ID + @param begin the returned start coordinate + @param end the returned end coordinate + */ + void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end); + + int32_t bam_aux_geti(bam1_t *b, const char tag[2], int *err); + float bam_aux_getf(bam1_t *b, const char tag[2], int *err); + char bam_aux_getc(bam1_t *b, const char tag[2], int *err); + char *bam_aux_getZH(bam1_t *b, const char tag[2], int *err); + void bam_aux_destroy(bam1_t *b); + + /*! + @abstract Calculate the rightmost coordinate of an alignment on the + reference genome. + + @param c pointer to the bam1_core_t structure + @param cigar the corresponding CIGAR array (from bam1_t::cigar) + @return the rightmost coordinate, 0-based + */ + uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar); + + /*! + @abstract Calculate the length of the query sequence from CIGAR. + @param c pointer to the bam1_core_t structure + @param cigar the corresponding CIGAR array (from bam1_t::cigar) + @return length of the query sequence + */ + int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar); + + int bam_segreg(int32_t pos, const bam1_core_t *c, const uint32_t *cigar, bam_segreg_t *reg); + +#ifdef __cplusplus +} +#endif + +/*! + @abstract Calculate the minimum bin that contains a region [beg,end). + @param beg start of the region, 0-based + @param end end of the region, 0-based + @return bin + */ +static inline int bam_reg2bin(uint32_t beg, uint32_t end) +{ + --end; + if (beg>>14 == end>>14) return 4681 + (beg>>14); + if (beg>>17 == end>>17) return 585 + (beg>>17); + if (beg>>20 == end>>20) return 73 + (beg>>20); + if (beg>>23 == end>>23) return 9 + (beg>>23); + if (beg>>26 == end>>26) return 1 + (beg>>26); + return 0; +} + +static inline void bam_copy1(bam1_t *bdst, const bam1_t *bsrc) +{ + uint8_t *data = bdst->data; + int m_data = bdst->m_data; // backup data and m_data + if (m_data < bsrc->m_data) { // double the capacity + m_data = bsrc->m_data; kroundup32(m_data); + data = (uint8_t*)realloc(data, m_data); + } + memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data + *bdst = *bsrc; // copy the rest + // restore the backup + bdst->m_data = m_data; + bdst->data = data; +} + +#endif diff --git a/bam_aux.c b/bam_aux.c new file mode 100644 index 0000000..081f07b --- /dev/null +++ b/bam_aux.c @@ -0,0 +1,160 @@ +#include +#include "bam.h" +#include "khash.h" +KHASH_MAP_INIT_INT(aux, uint8_t*) +KHASH_MAP_INIT_STR(s, int) + +void bam_init_header_hash(bam_header_t *header) +{ + if (header->hash == 0) { + int ret, i; + khiter_t iter; + khash_t(s) *h; + header->hash = h = kh_init(s); + for (i = 0; i < header->n_targets; ++i) { + iter = kh_put(s, h, header->target_name[i], &ret); + kh_value(h, iter) = i; + } + } +} + +void bam_destroy_header_hash(bam_header_t *header) +{ + if (header->hash) + kh_destroy(s, (khash_t(s)*)header->hash); +} + +int32_t bam_get_tid(const bam_header_t *header, const char *seq_name) +{ + khint_t k; + khash_t(s) *h = (khash_t(s)*)header->hash; + k = kh_get(s, h, seq_name); + return k == kh_end(h)? -1 : kh_value(h, k); +} + +void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end) +{ + char *s, *p; + int i, l, k; + khiter_t iter; + khash_t(s) *h; + + bam_init_header_hash(header); + h = (khash_t(s)*)header->hash; + + l = strlen(str); + p = s = (char*)malloc(l+1); + /* squeeze out "," */ + for (i = k = 0; i != l; ++i) + if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i]; + s[k] = 0; + for (i = 0; i != k; ++i) if (s[i] == ':') break; + s[i] = 0; + iter = kh_get(s, h, s); /* get the ref_id */ + if (iter == kh_end(h)) { // name not found + *ref_id = -1; free(s); + return; + } + *ref_id = kh_value(h, iter); + if (i == k) { /* dump the whole sequence */ + *begin = 0; *end = 1<<29; free(s); + return; + } + for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; + *begin = atoi(p); + if (i < k) { + p = s + i + 1; + *end = atoi(p); + } else *end = 1<<29; + if (*begin > 0) --*begin; + assert(*begin <= *end); + free(s); +} + +void bam_aux_init(bam1_t *b) +{ + khash_t(aux) *h; + uint8_t *s; + if (b->hash == 0) { + h = kh_init(aux); + b->hash = h; + } else { + h = (khash_t(aux)*)b->hash; + kh_clear(aux, h); + } + s = bam1_aux(b); + while (s < b->data + b->data_len) { + uint32_t x = (uint32_t)s[0]<<8 | s[1]; + int ret, type; + khint_t k; + s += 2; type = toupper(*s); ++s; + k = kh_put(aux, h, x, &ret); + kh_value(h, k) = s; + if (type == 'C') ++s; + else if (type == 'S') s += 2; + else if (type == 'I') s += 4; + else if (type == 'F') s += 4; + else if (type == 'Z') { while (*s) putchar(*s++); ++s; } + } +} +void bam_aux_destroy(bam1_t *b) +{ + khash_t(aux) *h = (khash_t(aux)*)b->hash; + kh_destroy(aux, h); + b->hash = 0; +} +static uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) +{ + uint32_t x = (uint32_t)tag[0]<<8 | tag[1]; + khint_t k; + khash_t(aux) *h; + if (b->hash == 0) bam_aux_init(b); + h = (khash_t(aux)*)b->hash; + k = kh_get(aux, h, x); + if (k == kh_end(h)) return 0; + return kh_value(h, k); +} +int32_t bam_aux_geti(bam1_t *b, const char tag[2], int *err) +{ + int type; + uint8_t *s = bam_aux_get_core(b, tag); + *err = 0; + if (s == 0) { *err = -1; return 0; } + type = *s++; + if (type == 'c') return (int32_t)*(int8_t*)s; + else if (type == 'C') return (int32_t)*(uint8_t*)s; + else if (type == 's') return (int32_t)*(int16_t*)s; + else if (type == 'S') return (int32_t)*(uint16_t*)s; + else if (type == 'i' || type == 'I') return *(int32_t*)s; + else { *err = -2; return 0; } +} +float bam_aux_getf(bam1_t *b, const char tag[2], int *err) +{ + int type; + uint8_t *s = bam_aux_get_core(b, tag); + *err = 0; + type = *s++; + if (s == 0) { *err = -1; return 0; } + if (type == 'f') return *(float*)s; + else { *err = -2; return 0; } +} +char bam_aux_getc(bam1_t *b, const char tag[2], int *err) +{ + int type; + uint8_t *s = bam_aux_get_core(b, tag); + *err = 0; + type = *s++; + if (s == 0) { *err = -1; return 0; } + if (type == 'c') return *(char*)s; + else { *err = -2; return 0; } +} +char *bam_aux_getZH(bam1_t *b, const char tag[2], int *err) +{ + int type; + uint8_t *s = bam_aux_get_core(b, tag); + *err = 0; + type = *s++; + if (s == 0) { *err = -1; return 0; } + if (type == 'Z' || type == 'H') return (char*)s; + else { *err = -2; return 0; } +} diff --git a/bam_endian.h b/bam_endian.h new file mode 100644 index 0000000..0fc74a8 --- /dev/null +++ b/bam_endian.h @@ -0,0 +1,42 @@ +#ifndef BAM_ENDIAN_H +#define BAM_ENDIAN_H + +#include + +static inline int bam_is_big_endian() +{ + long one= 1; + return !(*((char *)(&one))); +} +static inline uint16_t bam_swap_endian_2(uint16_t v) +{ + return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); +} +static inline void *bam_swap_endian_2p(void *x) +{ + *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); + return x; +} +static inline uint32_t bam_swap_endian_4(uint32_t v) +{ + v = ((v & 0x0000FFFFU) << 16) | (v >> 16); + return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); +} +static inline void *bam_swap_endian_4p(void *x) +{ + *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); + return x; +} +static inline uint64_t bam_swap_endian_8(uint64_t v) +{ + v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); + v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); + return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); +} +static inline void *bam_swap_endian_8p(void *x) +{ + *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); + return x; +} + +#endif diff --git a/bam_import.c b/bam_import.c new file mode 100644 index 0000000..6b3b4bc --- /dev/null +++ b/bam_import.c @@ -0,0 +1,372 @@ +#include +#include +#include +#include +#include +#include +#include +#include "bam.h" +#include "kseq.h" +#include "khash.h" + +KSTREAM_INIT(gzFile, gzread, 8192) +KHASH_MAP_INIT_STR(ref, uint64_t) + +void bam_init_header_hash(bam_header_t *header); +void bam_destroy_header_hash(bam_header_t *header); +int32_t bam_get_tid(const bam_header_t *header, const char *seq_name); + +unsigned char bam_nt16_table[256] = { + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15, + 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, + 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, + 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, + 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, + 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15 +}; + +char *bam_nt16_rev_table = "=ACMGRSVTWYHKDBN"; + +struct __tamFile_t { + gzFile fp; + kstream_t *ks; + kstring_t *str; + uint64_t n_lines; +}; + +char **bam_load_pos(const char *fn, int *_n) +{ + char **list = 0, *s; + int n = 0, dret, m = 0, c; + gzFile fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r"); + kstream_t *ks; + kstring_t *str; + str = (kstring_t*)calloc(1, sizeof(kstring_t)); + ks = ks_init(fp); + while (ks_getuntil(ks, 0, str, &dret) > 0) { + if (n == m) { + m = m? m << 1 : 16; + list = (char**)realloc(list, m * sizeof(char*)); + } + s = list[n++] = (char*)calloc(str->l + 5, 1); + strcpy(s, str->s); + s += str->l + 1; + ks_getuntil(ks, 0, str, &dret); + *((uint32_t*)s) = atoi(str->s); + if (dret != '\n') + while ((c = ks_getc(fp)) >= 0 && c != '\n'); + } + ks_destroy(ks); + free(str->s); free(str); + *_n = n; + return list; +} + +static bam_header_t *hash2header(const kh_ref_t *hash) +{ + bam_header_t *header; + khiter_t k; + header = bam_header_init(); + header->n_targets = kh_size(hash); + header->target_name = (char**)calloc(kh_size(hash), sizeof(char*)); + header->target_len = (uint32_t*)calloc(kh_size(hash), 4); + for (k = kh_begin(hash); k != kh_end(hash); ++k) { + if (kh_exist(hash, k)) { + int i = (int)kh_value(hash, k); + header->target_name[i] = (char*)kh_key(hash, k); + header->target_len[i] = kh_value(hash, k)>>32; + } + } + bam_init_header_hash(header); + return header; +} +bam_header_t *sam_header_read2(const char *fn) +{ + bam_header_t *header; + int c, dret, ret; + gzFile fp; + kstream_t *ks; + kstring_t *str; + kh_ref_t *hash; + khiter_t k; + hash = kh_init(ref); + fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r"); + assert(fp); + ks = ks_init(fp); + str = (kstring_t*)calloc(1, sizeof(kstring_t)); + while (ks_getuntil(ks, 0, str, &dret) >= 0) { + char *s = strdup(str->s); + int len, i; + i = kh_size(hash); + ks_getuntil(ks, 0, str, &dret); + len = atoi(str->s); + k = kh_put(ref, hash, s, &ret); + kh_value(hash, k) = (uint64_t)len<<32 | i; + if (dret != '\n') + while ((c = ks_getc(ks)) != '\n' && c != -1); + } + ks_destroy(ks); + gzclose(fp); + free(str->s); free(str); + fprintf(stderr, "[sam_header_read2] %d sequences loaded.\n", kh_size(hash)); + header = hash2header(hash); + kh_destroy(ref, hash); + return header; +} +static inline uint8_t *alloc_data(bam1_t *b, int size) +{ + if (b->m_data < size) { + b->m_data = size; + kroundup32(b->m_data); + b->data = (uint8_t*)realloc(b->data, b->m_data); + } + return b->data; +} +static inline void parse_error(int64_t n_lines, const char * __restrict msg) +{ + fprintf(stderr, "Parse error at line %lld: %s\n", (long long)n_lines, msg); + abort(); +} +static inline void append_text(bam_header_t *header, kstring_t *str) +{ + int x = header->l_text, y = header->l_text + str->l + 2; // 2 = 1 byte dret + 1 byte null + kroundup32(x); kroundup32(y); + if (x < y) header->text = (char*)realloc(header->text, y); + strncpy(header->text + header->l_text, str->s, str->l+1); // we cannot use strcpy() here. + header->l_text += str->l + 1; + header->text[header->l_text] = 0; +} +int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) +{ + int ret, doff, doff0, dret; + bam1_core_t *c = &b->core; + kstring_t *str = fp->str; + kstream_t *ks = fp->ks; + + while ((ret = ks_getuntil(fp->ks, 0, str, &dret)) >= 0 && str->s[0] == '@') { // skip header + str->s[str->l] = dret; // note that str->s is NOT null terminated!! + append_text(header, str); + if (dret != '\n') { + ret = ks_getuntil(fp->ks, '\n', str, &dret); + str->s[str->l] = '\n'; // NOT null terminated!! + append_text(header, str); + } + ++fp->n_lines; + } + while (ret == 0) ret = ks_getuntil(fp->ks, 0, str, &dret); // special consideration for "\r\n" + if (ret < 0) return -1; + ++fp->n_lines; + doff = 0; + + { // name + c->l_qname = strlen(str->s) + 1; + memcpy(alloc_data(b, doff + c->l_qname) + doff, str->s, c->l_qname); + doff += c->l_qname; + } + { // flag, tid, pos, qual + ret = ks_getuntil(ks, 0, str, &dret); c->flag = atoi(str->s); + ret = ks_getuntil(ks, 0, str, &dret); c->tid = bam_get_tid(header, str->s); + ret = ks_getuntil(ks, 0, str, &dret); c->pos = isdigit(str->s[0])? atoi(str->s) - 1 : -1; + ret = ks_getuntil(ks, 0, str, &dret); c->qual = isdigit(str->s[0])? atoi(str->s) : 0; + if (ret < 0) return -2; + } + { // cigar + char *s, *t; + int i, op; + long x; + c->n_cigar = 0; + if (ks_getuntil(ks, 0, str, &dret) < 0) return -3; + if (str->s[0] != '*') { + for (s = str->s; *s; ++s) { + if (isalpha(*s)) ++c->n_cigar; + else if (!isdigit(*s)) parse_error(fp->n_lines, "invalid CIGAR character"); + } + b->data = alloc_data(b, doff + c->n_cigar * 4); + for (i = 0, s = str->s; i != c->n_cigar; ++i) { + x = strtol(s, &t, 10); + op = toupper(*t); + if (op == 'M') op = BAM_CMATCH; + else if (op == 'I') op = BAM_CINS; + else if (op == 'D') op = BAM_CDEL; + else if (op == 'N') op = BAM_CREF_SKIP; + else if (op == 'S') op = BAM_CSOFT_CLIP; + else if (op == 'H') op = BAM_CHARD_CLIP; + else if (op == 'P') op = BAM_CPAD; + else parse_error(fp->n_lines, "invalid CIGAR operation"); + s = t + 1; + bam1_cigar(b)[i] = x << BAM_CIGAR_SHIFT | op; + } + if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation"); + c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b))); + doff += c->n_cigar * 4; + } + } + { // mtid, mpos, isize + ret = ks_getuntil(ks, 0, str, &dret); c->mtid = strcmp(str->s, "=")? bam_get_tid(header, str->s) : c->tid; + ret = ks_getuntil(ks, 0, str, &dret); c->mpos = isdigit(str->s[0])? atoi(str->s) - 1 : -1; + ret = ks_getuntil(ks, 0, str, &dret); c->isize = (str->s[0] == '-' || isdigit(str->s[0]))? atoi(str->s) : 0; + if (ret < 0) return -4; + } + { // seq and qual + int i; + uint8_t *p; + if (ks_getuntil(ks, 0, str, &dret) < 0) return -5; // seq + c->l_qseq = strlen(str->s); + if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) + parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent"); + p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff; + bzero(p, (c->l_qseq+1)/2); + for (i = 0; i < c->l_qseq; ++i) + p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2); + if (ks_getuntil(ks, 0, str, &dret) < 0) return -6; // qual + if (c->l_qseq != strlen(str->s)) + parse_error(fp->n_lines, "sequence and quality are inconsistent"); + p += (c->l_qseq+1)/2; + for (i = 0; i < c->l_qseq; ++i) p[i] = str->s[i] - 33; + doff += c->l_qseq + (c->l_qseq+1)/2; + } + doff0 = doff; + if (dret != '\n' && dret != '\r') { // aux + while (ks_getuntil(ks, 0, str, &dret) >= 0) { + uint8_t *s, type, key[2]; + if (str->l < 6 || str->s[2] != ':' || str->s[4] != ':') + parse_error(fp->n_lines, "missing colon in auxiliary data"); + key[0] = str->s[0]; key[1] = str->s[1]; + type = str->s[3]; + s = alloc_data(b, doff + 3) + doff; + s[0] = key[0]; s[1] = key[1]; s += 2; doff += 2; + if (type == 'A' || type == 'a') { + s = alloc_data(b, doff + 2) + doff; + *s++ = type; *s = str->s[5]; + doff += 2; + } else if (type == 'I' || type == 'i') { + long long x; + s = alloc_data(b, doff + 5) + doff; + x = (long long)atoll(str->s + 5); + if (x < 0) { + if (x >= -127) { + *s++ = 'c'; *(int8_t*)s = (int8_t)x; + s += 1; doff += 2; + } else if (x >= -32767) { + *s++ = 's'; *(int16_t*)s = (int16_t)x; + s += 2; doff += 3; + } else { + *s++ = 'i'; *(int32_t*)s = (int32_t)x; + s += 4; doff += 5; + if (x < -2147483648ll) + fprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.", + (long long)fp->n_lines, x); + } + } else { + if (x <= 255) { + *s++ = 'C'; *s++ = (uint8_t)x; + doff += 2; + } else if (x <= 65535) { + *s++ = 'S'; *(uint16_t*)s = (uint16_t)x; + s += 2; doff += 3; + } else { + *s++ = 'I'; *(uint32_t*)s = (uint32_t)x; + s += 4; doff += 5; + if (x > 4294967295ll) + fprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.", + (long long)fp->n_lines, x); + } + } + } else if (type == 'f') { + s = alloc_data(b, doff + 5) + doff; + *s++ = 'f'; + *(float*)s = (float)atof(str->s + 5); + s += 4; doff += 5; + } else if (type == 'Z' || type == 'H') { + int size = 1 + (str->l - 5) + 1; + if (type == 'H') { // check whether the hex string is valid + int i; + if ((str->l - 5) % 2 == 1) parse_error(fp->n_lines, "length of the hex string not even"); + for (i = 0; i < str->l - 5; ++i) { + int c = toupper(str->s[5 + i]); + if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'))) + parse_error(fp->n_lines, "invalid hex character"); + } + } + s = alloc_data(b, doff + size) + doff; + *s++ = type; + memcpy(s, str->s + 5, str->l - 5); + s[str->l - 5] = 0; + doff += size; + } else parse_error(fp->n_lines, "unrecognized type"); + if (dret == '\n' || dret == '\r') break; + } + } + b->l_aux = doff - doff0; + b->data_len = doff; + return 0; +} + +tamFile sam_open(const char *fn) +{ + tamFile fp; + fp = (tamFile)calloc(1, sizeof(struct __tamFile_t)); + fp->str = (kstring_t*)calloc(1, sizeof(kstring_t)); + fp->fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r"); + fp->ks = ks_init(fp->fp); + fp->n_lines = 0; + return fp; +} + +void sam_close(tamFile fp) +{ + if (fp) { + ks_destroy(fp->ks); + gzclose(fp->fp); + free(fp->str->s); free(fp->str); + free(fp); + } +} + +static void taf2baf_core(const char *fntaf, const char *fnbaf, bam_header_t *header) +{ + bamFile fpbaf; + bam1_t *b; + tamFile fp; + int ret; + + b = (bam1_t*)calloc(1, sizeof(bam1_t)); + fpbaf = bam_open(fnbaf, "w"); + fp = sam_open(fntaf); + ret = sam_read1(fp, header, b); + bam_header_write(fpbaf, header); + if (ret >= 0) { + bam_write1(fpbaf, b); + while (sam_read1(fp, header, b) >= 0) bam_write1(fpbaf, b); + } + bam_close(fpbaf); + free(b->data); free(b); + sam_close(fp); +} + +int bam_taf2baf(int argc, char *argv[]) +{ + int c; + bam_header_t *header; + + while ((c = getopt(argc, argv, "")) >= 0) { + } + if (optind + 3 > argc) { + fprintf(stderr, "Usage: bamtk import \n"); + return 1; + } + header = sam_header_read2(argv[optind]); + taf2baf_core(argv[optind+1], argv[optind+2], header); + bam_header_destroy(header); + return 0; +} diff --git a/bam_index.c b/bam_index.c new file mode 100644 index 0000000..2b01815 --- /dev/null +++ b/bam_index.c @@ -0,0 +1,452 @@ +#include +#include "bam.h" +#include "khash.h" +#include "ksort.h" +#include "bam_endian.h" + +/*! + @header + + Alignment indexing. Before indexing, BAM must be sorted based on the + leftmost coordinate of alignments. In indexing, BAM uses two indices: + a UCSC binning index and a simple linear index. The binning index is + efficient for alignments spanning long distance, while the auxiliary + linear index helps to reduce unnecessary seek calls especially for + short alignments. + + The UCSC binning scheme was suggested by Richard Durbin and Lincoln + Stein and is explained by Kent et al. (2002). In this scheme, each bin + represents a contiguous genomic region which can be fully contained in + another bin; each alignment is associated with a bin which represents + the smallest region containing the entire alignment. The binning + scheme is essentially another representation of R-tree. A distinct bin + uniquely corresponds to a distinct internal node in a R-tree. Bin A is + a child of Bin B if region A is contained in B. + + In BAM, each bin may span 2^29, 2^26, 2^23, 2^20, 2^17 or 2^14 bp. Bin + 0 spans a 512Mbp region, bins 1-8 span 64Mbp, 9-72 8Mbp, 73-584 1Mbp, + 585-4680 128Kbp and bins 4681-37449 span 16Kbp regions. If we want to + find the alignments overlapped with a region [rbeg,rend), we need to + calculate the list of bins that may be overlapped the region and test + the alignments in the bins to confirm the overlaps. If the specified + region is short, typically only a few alignments in six bins need to + be retrieved. The overlapping alignments can be quickly fetched. + + */ + +#define BAM_MIN_CHUNK_GAP 32768 +#define BAM_LIDX_SHIFT 14 + +typedef struct { + uint64_t u, v; +} pair64_t; + +#define pair64_lt(a,b) ((a).u < (b).u) +KSORT_INIT(off, pair64_t, pair64_lt) + +typedef struct { + uint32_t m, n; + pair64_t *list; +} bam_binlist_t; + +typedef struct { + int32_t n, m; + uint64_t *offset; +} bam_lidx_t; + +KHASH_MAP_INIT_INT(i, bam_binlist_t) + +struct __bam_index_t { + int32_t n; + khash_t(i) **index; + bam_lidx_t *index2; +}; + +// requirement: len <= LEN_MASK +static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end) +{ + khint_t k; + bam_binlist_t *l; + int ret; + k = kh_put(i, h, bin, &ret); + l = &kh_value(h, k); + if (ret) { // not present + l->m = 1; l->n = 0; + l->list = (pair64_t*)calloc(l->m, 16); + } + if (l->n == l->m) { + l->m <<= 1; + l->list = (pair64_t*)realloc(l->list, l->m * 16); + } + l->list[l->n].u = beg; l->list[l->n++].v = end; +} + +static inline void insert_offset2(bam_lidx_t *index2, int last, int curr, uint64_t offset) +{ + int i; + if (index2->m < curr + 1) { + index2->m = curr + 1; + kroundup32(index2->m); + index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8); + } + if (last > curr) last = -1; + for (i = last + 1; i <= curr; ++i) index2->offset[i] = offset; + index2->n = curr + 1; +} + +static void merge_chunks(bam_index_t *idx) +{ +#if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16) + khash_t(i) *index; + int i, l, m; + khint_t k; + for (i = 0; i < idx->n; ++i) { + index = idx->index[i]; + for (k = kh_begin(index); k != kh_end(index); ++k) { + bam_binlist_t *p; + if (!kh_exist(index, k)) continue; + p = &kh_value(index, k); + m = 0; + for (l = 1; l < p->n; ++l) { +#ifdef BAM_TRUE_OFFSET + if (p->list[m].v + BAM_MIN_CHUNK_GAP > p->list[l].u) p->list[m].v = p->list[l].v; +#else + if (p->list[m].v>>16 == p->list[l].u>>16) p->list[m].v = p->list[l].v; +#endif + else p->list[++m] = p->list[l]; + } // ~for(l) + p->n = m + 1; + } // ~for(k) + } // ~for(i) +#endif // defined(BAM_TRUE_OFFSET) || defined(BAM_BGZF) +} + +bam_index_t *bam_index_core(bamFile fp) +{ + bam1_t *b; + bam_header_t *h; + int i, ret; + bam_index_t *idx; + uint32_t last_coor, last_tid, last_bin, save_bin, save_tid; + bam1_core_t *c; + uint64_t save_off, last_off; + + idx = (bam_index_t*)calloc(1, sizeof(bam_index_t)); + b = (bam1_t*)calloc(1, sizeof(bam1_t)); + h = bam_header_read(fp); + c = &b->core; + + idx->n = h->n_targets; + bam_header_destroy(h); + idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*)); + for (i = 0; i < idx->n; ++i) idx->index[i] = kh_init(i); + idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t)); + + save_bin = save_tid = last_tid = last_bin = 0xffffffffu; + save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu; + while ((ret = bam_read1(fp, b)) >= 0) { + if (last_tid != c->tid) { // change of chromosomes + last_tid = c->tid; + last_bin = 0xffffffffu; + } else if (last_coor > c->pos) { + fprintf(stderr, "[bam_index_core] the alignment is not sorted. Abort!\n"); + exit(1); + } + if (last_coor>>BAM_LIDX_SHIFT != b->core.pos>>BAM_LIDX_SHIFT) // then write the linear index + insert_offset2(&idx->index2[b->core.tid], last_coor>>BAM_LIDX_SHIFT, b->core.pos>>BAM_LIDX_SHIFT, last_off); + if (c->bin != last_bin) { // then possibly write the binning index + if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record + insert_offset(idx->index[save_tid], save_bin, save_off, last_off); + save_off = last_off; + save_bin = last_bin = c->bin; + save_tid = c->tid; + } + if (bam_tell(fp) <= last_off) { + fprintf(stderr, "[bam_index_core] bug in BGZF/RAZF: %llx < %llx\n", + (unsigned long long)bam_tell(fp), (unsigned long long)last_off); + exit(1); + } + last_off = bam_tell(fp); + last_coor = b->core.pos; + } + insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp)); + merge_chunks(idx); + if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret); + free(b->data); free(b); + return idx; +} + +void bam_index_destroy(bam_index_t *idx) +{ + khint_t k; + int i; + if (idx == 0) return; + for (i = 0; i < idx->n; ++i) { + khash_t(i) *index = idx->index[i]; + bam_lidx_t *index2 = idx->index2 + i; + for (k = kh_begin(index); k != kh_end(index); ++k) { + if (kh_exist(index, k)) + free(kh_value(index, k).list); + } + kh_destroy(i, index); + free(index2->offset); + } + free(idx->index); free(idx->index2); + free(idx); +} + +void bam_index_save(const bam_index_t *idx, FILE *fp) +{ + int32_t i, size; + khint_t k; + fwrite("BAI\1", 1, 4, fp); + if (bam_is_be) { + uint32_t x = idx->n; + fwrite(bam_swap_endian_4p(&x), 4, 1, fp); + } else fwrite(&idx->n, 4, 1, fp); + for (i = 0; i < idx->n; ++i) { + khash_t(i) *index = idx->index[i]; + bam_lidx_t *index2 = idx->index2 + i; + // write binning index + size = kh_size(index); + if (bam_is_be) { // big endian + uint32_t x = size; + fwrite(bam_swap_endian_4p(&x), 4, 1, fp); + } else fwrite(&size, 4, 1, fp); + for (k = kh_begin(index); k != kh_end(index); ++k) { + if (kh_exist(index, k)) { + bam_binlist_t *p = &kh_value(index, k); + if (bam_is_be) { // big endian + uint32_t x; + x = kh_key(index, k); fwrite(bam_swap_endian_4p(&x), 4, 1, fp); + x = p->n; fwrite(bam_swap_endian_4p(&x), 4, 1, fp); + for (x = 0; (int)x < p->n; ++x) { + bam_swap_endian_8p(&p->list[x].u); + bam_swap_endian_8p(&p->list[x].v); + } + fwrite(p->list, 16, p->n, fp); + for (x = 0; (int)x < p->n; ++x) { + bam_swap_endian_8p(&p->list[x].u); + bam_swap_endian_8p(&p->list[x].v); + } + } else { + fwrite(&kh_key(index, k), 4, 1, fp); + fwrite(&p->n, 4, 1, fp); + fwrite(p->list, 16, p->n, fp); + } + } + } + // write linear index (index2) + if (bam_is_be) { + int x = index2->n; + fwrite(bam_swap_endian_4p(&x), 4, 1, fp); + } else fwrite(&index2->n, 4, 1, fp); + if (bam_is_be) { // big endian + int x; + for (x = 0; (int)x < index2->n; ++x) + bam_swap_endian_8p(&index2->offset[x]); + fwrite(index2->offset, 8, index2->n, fp); + for (x = 0; (int)x < index2->n; ++x) + bam_swap_endian_8p(&index2->offset[x]); + } else fwrite(index2->offset, 8, index2->n, fp); + } + fflush(fp); +} + +bam_index_t *bam_index_load(const char *fn) +{ + bam_index_t *idx; + FILE *fp; + int i; + char *fnidx, magic[4]; + + fnidx = (char*)calloc(strlen(fn) + 5, 1); + strcpy(fnidx, fn); strcat(fnidx, ".bai"); + if ((fp = fopen(fnidx, "r")) == 0) { + fprintf(stderr, "[bam_index_load] the alignment is not indexed. Please run `index' command first. Abort!\n"); + exit(1); + } + free(fnidx); + + fread(magic, 1, 4, fp); + if (strncmp(magic, "BAI\1", 4)) { + fprintf(stderr, "[bam_index_load] wrong magic number.\n"); + fclose(fp); + return 0; + } + idx = (bam_index_t*)calloc(1, sizeof(bam_index_t)); + fread(&idx->n, 4, 1, fp); + if (bam_is_be) bam_swap_endian_4p(&idx->n); + idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*)); + idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t)); + for (i = 0; i < idx->n; ++i) { + khash_t(i) *index; + bam_lidx_t *index2 = idx->index2 + i; + uint32_t key, size; + khint_t k; + int j, ret; + bam_binlist_t *p; + index = idx->index[i] = kh_init(i); + // load binning index + fread(&size, 4, 1, fp); + if (bam_is_be) bam_swap_endian_4p(&size); + for (j = 0; j < (int)size; ++j) { + fread(&key, 4, 1, fp); + if (bam_is_be) bam_swap_endian_4p(&key); + k = kh_put(i, index, key, &ret); + p = &kh_value(index, k); + fread(&p->n, 4, 1, fp); + if (bam_is_be) bam_swap_endian_4p(&p->n); + p->m = p->n; + p->list = (pair64_t*)malloc(p->m * 16); + fread(p->list, 16, p->n, fp); + if (bam_is_be) { + int x; + for (x = 0; x < p->n; ++x) { + bam_swap_endian_8p(&p->list[x].u); + bam_swap_endian_8p(&p->list[x].v); + } + } + } + // load linear index + fread(&index2->n, 4, 1, fp); + if (bam_is_be) bam_swap_endian_4p(&index2->n); + index2->m = index2->n; + index2->offset = (uint64_t*)calloc(index2->m, 8); + fread(index2->offset, index2->n, 8, fp); + if (bam_is_be) + for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]); + } + fclose(fp); + return idx; +} + +int bam_index_build(const char *fn) +{ + char *fnidx; + FILE *fpidx; + bamFile fp; + bam_index_t *idx; + assert(fp = bam_open(fn, "r")); + idx = bam_index_core(fp); + bam_close(fp); + fnidx = (char*)calloc(strlen(fn) + 5, 1); + strcpy(fnidx, fn); strcat(fnidx, ".bai"); + assert(fpidx = fopen(fnidx, "w")); + bam_index_save(idx, fpidx); + bam_index_destroy(idx); + fclose(fpidx); + free(fnidx); + return 0; +} + +int bam_index(int argc, char *argv[]) +{ + if (argc < 2) { + fprintf(stderr, "Usage: samtools index \n"); + return 1; + } + bam_index_build(argv[1]); + return 0; +} + +#define MAX_BIN 37450 // =(8^6-1)/7+1 + +static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[MAX_BIN]) +{ + int i = 0, k; + --end; + list[i++] = 0; + for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) list[i++] = k; + for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) list[i++] = k; + for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) list[i++] = k; + for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) list[i++] = k; + for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) list[i++] = k; + return i; +} + +static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b) +{ + uint32_t rbeg = b->core.pos; + uint32_t rend = bam_calend(&b->core, bam1_cigar(b)); + return (rend > beg && rbeg < end); +} + +int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func) +{ + uint16_t *bins; + int i, n_bins, n_off; + pair64_t *off; + khint_t k; + khash_t(i) *index; + uint64_t min_off; + + bins = (uint16_t*)calloc(MAX_BIN, 2); + n_bins = reg2bins(beg, end, bins); + index = idx->index[tid]; + min_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? 0 : idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT]; + for (i = n_off = 0; i < n_bins; ++i) { + if ((k = kh_get(i, index, bins[i])) != kh_end(index)) + n_off += kh_value(index, k).n; + } + if (n_off == 0) { + free(bins); return 0; + } + off = (pair64_t*)calloc(n_off, 16); + for (i = n_off = 0; i < n_bins; ++i) { + if ((k = kh_get(i, index, bins[i])) != kh_end(index)) { + int j; + bam_binlist_t *p = &kh_value(index, k); + for (j = 0; j < p->n; ++j) + if (p->list[j].v > min_off) off[n_off++] = p->list[j]; + } + } + free(bins); + { + bam1_t *b; + int ret, n_seeks; + uint64_t curr_off; + b = (bam1_t*)calloc(1, sizeof(bam1_t)); + ks_introsort(off, n_off, off); + // resolve overlaps between adjecent blocks; this may happen due to the merge in indexing + for (i = 1; i < n_off; ++i) + if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u; + { // merge adjacent blocks +#if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16) + int l; + for (i = 1, l = 0; i < n_off; ++i) { +#ifdef BAM_TRUE_OFFSET + if (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v; +#else + if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v; +#endif + else off[++l] = off[i]; + } + n_off = l + 1; +#endif + } + // retrive alignments + n_seeks = 0; i = -1; curr_off = 0; + for (;;) { + if (curr_off == 0 || curr_off >= off[i].v) { // then jump to the next chunk + if (i == n_off - 1) break; // no more chunks + if (i >= 0) assert(curr_off == off[i].v); // otherwise bug + if (i < 0 || off[i].v != off[i+1].u) { // not adjacent chunks; then seek + bam_seek(fp, off[i+1].u, SEEK_SET); + curr_off = bam_tell(fp); + ++n_seeks; + } + ++i; + } + if ((ret = bam_read1(fp, b)) > 0) { + curr_off = bam_tell(fp); + if (b->core.tid != tid || b->core.pos >= end) break; // no need to proceed + else if (is_overlap(beg, end, b)) func(b, data); + } else break; // end of file + } +// fprintf(stderr, "[bam_fetch] # seek calls: %d\n", n_seeks); + bam_destroy1(b); + } + free(off); + return 0; +} diff --git a/bam_lpileup.c b/bam_lpileup.c new file mode 100644 index 0000000..83f91c2 --- /dev/null +++ b/bam_lpileup.c @@ -0,0 +1,196 @@ +#include +#include +#include "bam.h" +#include "ksort.h" + +#define TV_GAP 2 + +typedef struct __freenode_t { + uint32_t level:28, cnt:4; + struct __freenode_t *next; +} freenode_t, *freenode_p; + +#define freenode_lt(a,b) ((a)->cnt < (b)->cnt || ((a)->cnt == (b)->cnt && (a)->level < (b)->level)) +KSORT_INIT(node, freenode_p, freenode_lt) + +/* Memory pool, similar to the one in bam_pileup.c */ +typedef struct { + int cnt, n, max; + freenode_t **buf; +} mempool_t; + +static mempool_t *mp_init() +{ + return (mempool_t*)calloc(1, sizeof(mempool_t)); +} +static void mp_destroy(mempool_t *mp) +{ + int k; + for (k = 0; k < mp->n; ++k) free(mp->buf[k]); + free(mp->buf); free(mp); +} +static inline freenode_t *mp_alloc(mempool_t *mp) +{ + ++mp->cnt; + if (mp->n == 0) return (freenode_t*)calloc(1, sizeof(freenode_t)); + else return mp->buf[--mp->n]; +} +static inline void mp_free(mempool_t *mp, freenode_t *p) +{ + --mp->cnt; p->next = 0; p->cnt = TV_GAP; + if (mp->n == mp->max) { + mp->max = mp->max? mp->max<<1 : 256; + mp->buf = (freenode_t**)realloc(mp->buf, sizeof(freenode_t*) * mp->max); + } + mp->buf[mp->n++] = p; +} + +/* core part */ +struct __bam_lplbuf_t { + int max, n_cur, n_pre; + int max_level, *cur_level, *pre_level; + mempool_t *mp; + freenode_t **aux, *head, *tail; + int n_nodes, m_aux; + bam_pileup_f func; + void *user_data; + bam_plbuf_t *plbuf; +}; + +void bam_lplbuf_reset(bam_lplbuf_t *buf) +{ + freenode_t *p, *q; + bam_plbuf_reset(buf->plbuf); + for (p = buf->head; p->next;) { + q = p->next; + mp_free(buf->mp, p); + p = q; + } + buf->head = buf->tail; + buf->max_level = 0; + buf->n_cur = buf->n_pre = 0; + buf->n_nodes = 0; +} + +static int tview_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) +{ + bam_lplbuf_t *tv = (bam_lplbuf_t*)data; + freenode_t *p; + int i, l, max_level; + // allocate memory if necessary + if (tv->max < n) { // enlarge + tv->max = n; + kroundup32(tv->max); + tv->cur_level = (int*)realloc(tv->cur_level, sizeof(int) * tv->max); + tv->pre_level = (int*)realloc(tv->pre_level, sizeof(int) * tv->max); + } + tv->n_cur = n; + // update cnt + for (p = tv->head; p->next; p = p->next) + if (p->cnt > 0) --p->cnt; + // calculate cur_level[] + max_level = 0; + for (i = l = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (p->qpos == 0) { + if (tv->head->next && tv->head->cnt == 0) { // then take a free slot + freenode_t *p = tv->head->next; + tv->cur_level[i] = tv->head->level; + mp_free(tv->mp, tv->head); + tv->head = p; + --tv->n_nodes; + } else tv->cur_level[i] = ++tv->max_level; + } else { + tv->cur_level[i] = tv->pre_level[l++]; + if (p->qpos == p->b->core.l_qseq - 1) { // then return a free slot + tv->tail->level = tv->cur_level[i]; + tv->tail->next = mp_alloc(tv->mp); + tv->tail = tv->tail->next; + ++tv->n_nodes; + } + } + if (tv->cur_level[i] > max_level) max_level = tv->cur_level[i]; + ((bam_pileup1_t*)p)->level = tv->cur_level[i]; + } + assert(l == tv->n_pre); + tv->func(tid, pos, n, pl, tv->user_data); + // sort the linked list + if (tv->n_nodes) { + freenode_t *q; + if (tv->n_nodes + 1 > tv->m_aux) { // enlarge + tv->m_aux = tv->n_nodes + 1; + kroundup32(tv->m_aux); + tv->aux = (freenode_t**)realloc(tv->aux, sizeof(void*) * tv->m_aux); + } + for (p = tv->head, i = l = 0; p->next;) { + if (p->level > max_level) { // then discard this entry + q = p->next; + mp_free(tv->mp, p); + p = q; + } else { + tv->aux[i++] = p; + p = p->next; + } + } + tv->aux[i] = tv->tail; // add a proper tail for the loop below + tv->n_nodes = i; + if (tv->n_nodes) { + ks_introsort(node, tv->n_nodes, tv->aux); + for (i = 0; i < tv->n_nodes; ++i) tv->aux[i]->next = tv->aux[i+1]; + tv->head = tv->aux[0]; + } else tv->head = tv->tail; + } + // clean up + tv->max_level = max_level; + memcpy(tv->pre_level, tv->cur_level, tv->n_cur * 4); + // squeeze out terminated levels + for (i = l = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (p->qpos != p->b->core.l_qseq - 1) + tv->pre_level[l++] = tv->pre_level[i]; + } + tv->n_pre = l; + return 0; +} + +bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data) +{ + bam_lplbuf_t *tv; + tv = (bam_lplbuf_t*)calloc(1, sizeof(bam_lplbuf_t)); + tv->mp = mp_init(); + tv->head = tv->tail = mp_alloc(tv->mp); + tv->func = func; + tv->user_data = data; + tv->plbuf = bam_plbuf_init(tview_func, tv); + return (bam_lplbuf_t*)tv; +} + +void bam_lplbuf_destroy(bam_lplbuf_t *tv) +{ + mp_free(tv->mp, tv->head); + mp_destroy(tv->mp); + free(tv->cur_level); free(tv->pre_level); + bam_plbuf_destroy(tv->plbuf); + free(tv->aux); + free(tv); +} + +int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *tv) +{ + return bam_plbuf_push(b, tv->plbuf); +} + +int bam_lpileup_file(bamFile fp, bam_pileup_f func, void *func_data) +{ + bam_lplbuf_t *buf; + int ret; + bam1_t *b; + b = (bam1_t*)calloc(1, sizeof(bam1_t)); + buf = bam_lplbuf_init(func, func_data); + while ((ret = bam_read1(fp, b)) >= 0) + bam_lplbuf_push(b, buf); + bam_lplbuf_push(0, buf); + bam_lplbuf_destroy(buf); + free(b->data); free(b); + return 0; +} diff --git a/bam_maqcns.c b/bam_maqcns.c new file mode 100644 index 0000000..c8009aa --- /dev/null +++ b/bam_maqcns.c @@ -0,0 +1,451 @@ +#include +#include "bam.h" +#include "bam_maqcns.h" +#include "ksort.h" +KSORT_INIT_GENERIC(uint32_t) + +typedef struct __bmc_aux_t { + int max; + uint32_t *info; +} bmc_aux_t; + +typedef struct { + float esum[4], fsum[4]; + uint32_t c[4]; + uint32_t mapQ_max; +} glf_call_aux_t; + +/* + P() = \theta \sum_{i=1}^{N-1} 1/i + P(D|) = \sum_{k=1}^{N-1} p_k 1/2 [(k/N)^n_2(1-k/N)^n_1 + (k/N)^n1(1-k/N)^n_2] + p_k = i/k / \sum_{i=1}^{N-1} 1/i + */ +static void cal_het(bam_maqcns_t *aa) +{ + int k, n1, n2; + double sum_harmo; // harmonic sum + double poly_rate; + double p1 = 0.0, p3 = 0.0; // just for testing + + free(aa->lhet); + aa->lhet = (double*)calloc(256 * 256, sizeof(double)); + sum_harmo = 0.0; + for (k = 1; k <= aa->n_hap - 1; ++k) + sum_harmo += 1.0 / k; + for (n1 = 0; n1 < 256; ++n1) { + for (n2 = 0; n2 < 256; ++n2) { + long double sum = 0.0; + double lC = lgamma(n1+n2+1) - lgamma(n1+1) - lgamma(n2+1); // \binom{n1+n2}{n1} + for (k = 1; k <= aa->n_hap - 1; ++k) { + double pk = 1.0 / k / sum_harmo; + double log1 = log((double)k/aa->n_hap); + double log2 = log(1.0 - (double)k/aa->n_hap); + sum += pk * 0.5 * (expl(log1*n2) * expl(log2*n1) + expl(log1*n1) * expl(log2*n2)); + } + aa->lhet[n1<<8|n2] = lC + logl(sum); + if (n1 == 17 && n2 == 3) p3 = lC + logl(expl(logl(0.5) * 20)); + if (n1 == 19 && n2 == 1) p1 = lC + logl(expl(logl(0.5) * 20)); + } + } + poly_rate = aa->het_rate * sum_harmo; + aa->q_r = -4.343 * log(2.0 * poly_rate / (1.0 - poly_rate)); +} + +/** initialize the helper structure */ +static void cal_coef(bam_maqcns_t *aa) +{ + int k, n, q; + long double sum_a[257], b[256], q_c[256], tmp[256], fk2[256]; + double *lC; + + lC = (double*)calloc(256 * 256, sizeof(double)); + // aa->lhet will be allocated and initialized + free(aa->fk); free(aa->coef); + aa->fk = (double*)calloc(256, sizeof(double)); + aa->coef = (double*)calloc(256*256*64, sizeof(double)); + aa->fk[0] = fk2[0] = 1.0; + for (n = 1; n != 256; ++n) { + aa->fk[n] = pow(aa->theta, n) * (1.0 - aa->eta) + aa->eta; + fk2[n] = aa->fk[n>>1]; // this is an approximation, assuming reads equally likely come from both strands + } + for (n = 1; n != 256; ++n) + for (k = 1; k <= n; ++k) + lC[n<<8|k] = lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1); + for (q = 1; q != 64; ++q) { + double e = pow(10.0, -q/10.0); + double le = log(e); + double le1 = log(1.0-e); + for (n = 1; n != 256; ++n) { + double *coef = aa->coef + (q<<16|n<<8); + sum_a[n+1] = 0.0; + for (k = n; k >= 0; --k) { // a_k = \sum_{i=k}^n C^n_k \epsilon^k (1-\epsilon)^{n-k} + sum_a[k] = sum_a[k+1] + expl(lC[n<<8|k] + k*le + (n-k)*le1); + b[k] = sum_a[k+1] / sum_a[k]; + if (b[k] > 0.99) b[k] = 0.99; + } + for (k = 0; k != n; ++k) // log(\bar\beta_{nk}(\bar\epsilon)^{f_k}) + q_c[k] = -4.343 * fk2[k] * logl(b[k] / e); + for (k = 1; k != n; ++k) q_c[k] += q_c[k-1]; // \prod_{i=0}^k c_i + for (k = 0; k <= n; ++k) { // powl() in 64-bit mode seems broken on my Mac OS X 10.4.9 + tmp[k] = -4.343 * logl(1.0 - expl(fk2[k] * logl(b[k]))); + coef[k] = (k? q_c[k-1] : 0) + tmp[k]; // this is the final c_{nk} + } + } + } + free(lC); +} + +bam_maqcns_t *bam_maqcns_init() +{ + bam_maqcns_t *bm; + bm = (bam_maqcns_t*)calloc(1, sizeof(bam_maqcns_t)); + bm->aux = (bmc_aux_t*)calloc(1, sizeof(bmc_aux_t)); + bm->het_rate = 0.001; + bm->theta = 0.85; + bm->n_hap = 2; + bm->eta = 0.03; + return bm; +} + +void bam_maqcns_prepare(bam_maqcns_t *bm) +{ + cal_coef(bm); cal_het(bm); +} + +void bam_maqcns_destroy(bam_maqcns_t *bm) +{ + if (bm == 0) return; + free(bm->lhet); free(bm->fk); free(bm->coef); free(bm->aux->info); + free(bm->aux); free(bm); +} + +glf1_t *bam_maqcns_glfgen(int _n, const bam_pileup1_t *pl, uint8_t ref_base, bam_maqcns_t *bm) +{ + glf_call_aux_t *b; + int i, j, k, w[8], c, n; + glf1_t *g = (glf1_t*)calloc(1, sizeof(glf1_t)); + float p[16], min_p = 1e30; + + g->ref_base = ref_base; + if (_n == 0) return g; + + // construct aux array + if (bm->aux->max < _n) { + bm->aux->max = _n; + kroundup32(bm->aux->max); + bm->aux->info = (uint32_t*)realloc(bm->aux->info, 4 * bm->aux->max); + } + for (i = n = 0; i < _n; ++i) { + const bam_pileup1_t *p = pl + i; + uint32_t q, x = 0; + if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue; + q = (uint32_t)bam1_qual(p->b)[p->qpos]; + x |= (uint32_t)bam1_strand(p->b) << 18 | q << 8 | p->b->core.qual; + if (p->b->core.qual < q) q = p->b->core.qual; + x |= q << 24; + q = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; + if (!p->is_del && q < 4) x |= 1 << 21 | q << 16; + bm->aux->info[n++] = x; + } + ks_introsort(uint32_t, n, bm->aux->info); + // generate esum and fsum + b = (glf_call_aux_t*)calloc(1, sizeof(glf_call_aux_t)); + for (k = 0; k != 8; ++k) w[k] = 0; + b->mapQ_max = 0; + for (j = n - 1; j >= 0; --j) { // calculate esum and fsum + uint32_t info = bm->aux->info[j]; + if (info>>24 < 4 && (info>>8&0x3f) != 0) info = 4<<24 | (info&0xffffff); + k = info>>16&7; + if (info>>24 > 0) { + b->esum[k&3] += bm->fk[w[k]] * (info>>24); + b->fsum[k&3] += bm->fk[w[k]]; + if (w[k] < 0xff) ++w[k]; + ++b->c[k&3]; + } + if (b->mapQ_max < (info&0x7f)) b->mapQ_max = info&0x7f; + } + // rescale ->c[] + for (j = c = 0; j != 4; ++j) c += b->c[j]; + if (c > 255) { + for (j = 0; j != 4; ++j) b->c[j] = (int)(254.0 * b->c[j] / c + 0.5); + for (j = c = 0; j != 4; ++j) c += b->c[j]; + } + // generate likelihood + for (j = 0; j != 4; ++j) { + // homozygous + float tmp1, tmp3; + int tmp2, bar_e; + for (k = 0, tmp1 = tmp3 = 0.0, tmp2 = 0; k != 4; ++k) { + if (j == k) continue; + tmp1 += b->esum[k]; tmp2 += b->c[k]; tmp3 += b->fsum[k]; + } + if (tmp2) { + bar_e = (int)(tmp1 / tmp3 + 0.5); + if (bar_e < 4) bar_e = 4; // should not happen + if (bar_e > 63) bar_e = 63; + p[j<<2|j] = tmp1 + bm->coef[bar_e<<16|c<<8|tmp2]; + } else p[j<<2|j] = 0.0; // all the bases are j + // heterozygous + for (k = j + 1; k < 4; ++k) { + for (i = 0, tmp2 = 0, tmp1 = tmp3 = 0.0; i != 4; ++i) { + if (i == j || i == k) continue; + tmp1 += b->esum[i]; tmp2 += b->c[i]; tmp3 += b->fsum[i]; + } + if (tmp2) { + bar_e = (int)(tmp1 / tmp3 + 0.5); + if (bar_e < 4) bar_e = 4; + if (bar_e > 63) bar_e = 63; + p[j<<2|k] = p[k<<2|j] = -4.343 * bm->lhet[b->c[j]<<8|b->c[k]] + tmp1 + bm->coef[bar_e<<16|c<<8|tmp2]; + } else p[j<<2|k] = p[k<<2|j] = -4.343 * bm->lhet[b->c[j]<<8|b->c[k]]; // all the bases are either j or k + } + // + for (k = 0; k != 4; ++k) + if (p[j<<2|k] < 0.0) p[j<<2|k] = 0.0; + } + + // convert necessary information to glf1_t + g->ref_base = ref_base; g->max_mapQ = b->mapQ_max; + g->depth = n > 16777215? 16777215 : n; + for (j = 0; j != 4; ++j) + for (k = j; k < 4; ++k) + if (p[j<<2|k] < min_p) min_p = p[j<<2|k]; + g->min_lk = min_p > 255.0? 255 : (int)(min_p + 0.5); + for (j = c = 0; j != 4; ++j) + for (k = j; k < 4; ++k) + g->lk[c++] = p[j<<2|k]-min_p > 255.0? 255 : (int)(p[j<<2|k]-min_p + 0.5); + + free(b); + return g; +} + +uint32_t glf2cns(const glf1_t *g, int q_r) +{ + int i, j, k, tmp[16], min = 10000, min2 = 10000, min3 = 10000, min_g = -1, min_g2 = -1; + uint32_t x = 0; + for (i = k = 0; i < 4; ++i) + for (j = i; j < 4; ++j) { + tmp[j<<2|i] = -1; + tmp[i<<2|j] = g->lk[k++] + (i == j? 0 : q_r); + } + for (i = 0; i < 16; ++i) { + if (tmp[i] < 0) continue; + if (tmp[i] < min) { + min3 = min2; min2 = min; min = tmp[i]; min_g2 = min_g; min_g = i; + } else if (tmp[i] < min2) { + min3 = min2; min2 = tmp[i]; min_g2 = i; + } else if (tmp[i] < min3) min3 = tmp[i]; + } + x = min_g >= 0? (1U<<(min_g>>2&3) | 1U<<(min_g&3)) << 28 : 0xf << 28; + x |= min_g2 >= 0? (1U<<(min_g2>>2&3) | 1U<<(min_g2&3)) << 24 : 0xf << 24; + x |= (uint32_t)g->max_mapQ << 16; + x |= min2 < 10000? (min2 - min < 256? min2 - min : 255) << 8 : 0xff << 8; + x |= min2 < 10000 && min3 < 10000? (min3 - min2 < 256? min3 - min2 : 255) : 0xff; + return x; +} + +uint32_t bam_maqcns_call(int n, const bam_pileup1_t *pl, bam_maqcns_t *bm) +{ + glf1_t *g; + uint32_t x; + if (n) { + g = bam_maqcns_glfgen(n, pl, 0xf, bm); + x = glf2cns(g, (int)(bm->q_r + 0.5)); + free(g); + } else x = 0xfU<<28 | 0xfU<<24; + return x; +} + +/************** *****************/ + +bam_maqindel_opt_t *bam_maqindel_opt_init() +{ + bam_maqindel_opt_t *mi = (bam_maqindel_opt_t*)calloc(1, sizeof(bam_maqindel_opt_t)); + mi->mm_penalty = 3; + mi->indel_err = 4; + mi->ambi_thres = 10; + return mi; +} + +void bam_maqindel_ret_destroy(bam_maqindel_ret_t *mir) +{ + if (mir == 0) return; + free(mir->s1); free(mir->s2); free(mir); +} + +#define MINUS_CONST 0x10000000 + +bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, const bam_pileup1_t *pl, const char *ref) +{ + int i, j, n_types, *types, left, right; + bam_maqindel_ret_t *ret = 0; + for (i = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (!(p->b->core.flag&BAM_FUNMAP) && p->indel != 0) break; + } + if (i == n) return 0; // no indel + { // calculate how many types of indels are available (set n_types and types) + int m; + uint32_t *aux; + aux = (uint32_t*)calloc(n+1, 4); + m = 0; + aux[m++] = MINUS_CONST; // zero indel is always a type + for (i = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (!(p->b->core.flag&BAM_FUNMAP) && p->indel != 0) + aux[m++] = MINUS_CONST + p->indel; + } + ks_introsort(uint32_t, m, aux); + n_types = 1; + for (i = 1; i < m; ++i) + if (aux[i] != aux[i-1]) ++n_types; + types = (int*)calloc(n_types, sizeof(int)); + j = 0; + types[j++] = aux[0] - MINUS_CONST; + for (i = 1; i < m; ++i) { + if (aux[i] != aux[i-1]) + types[j++] = aux[i] - MINUS_CONST; + } + free(aux); + } + { // calculate left and right boundary + bam_segreg_t seg; + left = 0x7fffffff; right = 0; + for (i = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (!(p->b->core.flag&BAM_FUNMAP)) { + bam_segreg(pos, &p->b->core, bam1_cigar(p->b), &seg); + if (seg.tbeg < left) left = seg.tbeg; + if (seg.tend > right) right = seg.tend; + } + } + } + { // the core part + char *ref2, *inscns = 0; + int k, l, *score, max_ins = types[n_types-1]; + ref2 = (char*)calloc(right - left + types[n_types-1] + 2, 1); + if (max_ins > 0) { // get the consensus of inserted sequences + int *inscns_aux = (int*)calloc(4 * n_types * max_ins, sizeof(int)); + // count occurrences + for (i = 0; i < n_types; ++i) { + if (types[i] <= 0) continue; // not insertion + for (j = 0; j < n; ++j) { + const bam_pileup1_t *p = pl + j; + if (!(p->b->core.flag&BAM_FUNMAP) && p->indel == types[i]) { + for (k = 1; k <= p->indel; ++k) { + int c = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos + k)]; + if (c < 4) ++inscns_aux[i*max_ins*4 + (k-1)*4 + c]; + } + } + } + } + // construct the consensus + inscns = (char*)calloc(n_types * max_ins, sizeof(char)); + for (i = 0; i < n_types; ++i) { + for (j = 0; j < types[i]; ++j) { + int max = 0, max_k = -1, *ia = inscns_aux + i*max_ins*4 + j*4; + for (k = 0; k < 4; ++k) { + if (ia[k] > max) { + max = ia[k]; + max_k = k; + } + } + inscns[i*max_ins + j] = max? 1<b->core; + int s; + bam_segreg_t seg; + if (c->flag&BAM_FUNMAP) continue; + cigar = bam1_cigar(p->b); + bam_segreg(pos, c, cigar, &seg); + for (s = 0, l = seg.qbeg; c->pos + l < right && l < seg.qend; ++l) { + int cq = bam1_seqi(bam1_seq(p->b), l), ct; + ct = c->pos + l >= left? ref2[c->pos + l - left] : 15; // "<" should not happen if there is no bug + if (cq < 15 && ct < 15) + s += cq == ct? 1 : -mi->mm_penalty; + } + score[i*n + j] = s; + if (types[i] != 0) { // then try the other way to calculate the score + for (s = 0, l = seg.qbeg; c->pos + l + types[i] < right && l < seg.qend; ++l) { + int cq = bam1_seqi(bam1_seq(p->b), l), ct; + ct = c->pos + l + types[i] >= left? ref2[c->pos + l + types[i] - left] : 15; + if (cq < 15 && ct < 15) + s += cq == ct? 1 : -mi->mm_penalty; + } + } + if (score[i*n+j] < s) score[i*n+j] = s; // choose the higher of the two scores + if (types[i] != 0) score[i*n+j] -= mi->indel_err; + //printf("%d, %d, %d, %d\n", i, types[i], j, score[i*n+j]); + } + } + { // get final result + int *sum, max1, max2, max1_i, max2_i; + // pick up the best two score + sum = (int*)calloc(n_types, sizeof(int)); + for (i = 0; i < n_types; ++i) + for (j = 0; j < n; ++j) + sum[i] += score[i*n+j]; + max1 = max2 = -0x7fffffff; max1_i = max2_i = -1; + for (i = 0; i < n_types; ++i) { + if (sum[i] > max1) { + max2 = max1; max2_i = max1_i; max1 = sum[i]; max1_i = i; + } else if (sum[i] > max2) { + max2 = sum[i]; max2_i = i; + } + } + free(sum); + // write ret + ret = (bam_maqindel_ret_t*)calloc(1, sizeof(bam_maqindel_ret_t)); + ret->indel1 = types[max1_i]; ret->indel2 = types[max2_i]; + ret->s1 = (char*)calloc(abs(ret->indel1) + 2, 1); + ret->s2 = (char*)calloc(abs(ret->indel2) + 2, 1); + if (ret->indel1 > 0) { + ret->s1[0] = '+'; + for (k = 0; k < ret->indel1; ++k) + ret->s1[k+1] = bam_nt16_rev_table[(int)inscns[max1_i*max_ins + k]]; + } else if (ret->indel1 < 0) { + ret->s1[0] = '-'; + for (k = 0; k < -ret->indel1 && ref[pos + k + 1]; ++k) + ret->s1[k+1] = ref[pos + k + 1]; + } else ret->s1[0] = '*'; + if (ret->indel2 > 0) { + ret->s2[0] = '+'; + for (k = 0; k < ret->indel2; ++k) + ret->s2[k+1] = bam_nt16_rev_table[(int)inscns[max2_i*max_ins + k]]; + } else if (ret->indel2 < 0) { + ret->s2[0] = '-'; + for (k = 0; k < -ret->indel2 && ref[pos + k + 1]; ++k) + ret->s2[k+1] = ref[pos + k + 1]; + } else ret->s2[0] = '*'; + for (j = 0; j < n; ++j) { + if (score[max1_i*n+j] < 0 && score[max2_i*n+j] < 0) ++ret->cnt_anti; + else { + int diff = score[max1_i*n+j] - score[max2_i*n+j]; + if (diff > mi->ambi_thres) ++ret->cnt1; + else if (diff < -mi->ambi_thres) ++ret->cnt2; + else ++ret->cnt_ambi; + } + } + } + free(score); free(ref2); free(inscns); + } + free(types); + return ret; +} diff --git a/bam_maqcns.h b/bam_maqcns.h new file mode 100644 index 0000000..5d410ef --- /dev/null +++ b/bam_maqcns.h @@ -0,0 +1,48 @@ +#ifndef BAM_MAQCNS_H +#define BAM_MAQCNS_H + +#include "glf.h" + +struct __bmc_aux_t; + +typedef struct { + float het_rate, theta; + int n_hap; + + float eta, q_r; + double *fk, *coef; + double *lhet; + struct __bmc_aux_t *aux; +} bam_maqcns_t; + +typedef struct { + int mm_penalty, indel_err, ambi_thres; +} bam_maqindel_opt_t; + +typedef struct { + int indel1, indel2; + int cnt1, cnt2, cnt_ambi, cnt_anti; + char *s1, *s2; +} bam_maqindel_ret_t; + +#ifdef __cplusplus +extern "C" { +#endif + + bam_maqcns_t *bam_maqcns_init(); + void bam_maqcns_prepare(bam_maqcns_t *bm); + void bam_maqcns_destroy(bam_maqcns_t *bm); + glf1_t *bam_maqcns_glfgen(int n, const bam_pileup1_t *pl, uint8_t ref_base, bam_maqcns_t *bm); + uint32_t bam_maqcns_call(int n, const bam_pileup1_t *pl, bam_maqcns_t *bm); + // return: cns<<28 | cns2<<24 | mapQ<<16 | cnsQ<<8 | cnsQ2 + uint32_t glf2cns(const glf1_t *g, int q_r); + + bam_maqindel_opt_t *bam_maqindel_opt_init(); + bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, const bam_pileup1_t *pl, const char *ref); + void bam_maqindel_ret_destroy(bam_maqindel_ret_t*); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/bam_pileup.c b/bam_pileup.c new file mode 100644 index 0000000..d01f9a3 --- /dev/null +++ b/bam_pileup.c @@ -0,0 +1,213 @@ +#include +#include +#include +#include "bam.h" + +typedef struct __linkbuf_t { + bam1_t b; + uint32_t beg, end; + struct __linkbuf_t *next; +} lbnode_t; + +/* --- BEGIN: Memory pool */ + +typedef struct { + int cnt, n, max; + lbnode_t **buf; +} mempool_t; + +static mempool_t *mp_init() +{ + mempool_t *mp; + mp = (mempool_t*)calloc(1, sizeof(mempool_t)); + return mp; +} +static void mp_destroy(mempool_t *mp) +{ + int k; + for (k = 0; k < mp->n; ++k) { + free(mp->buf[k]->b.data); + free(mp->buf[k]); + } + free(mp->buf); + free(mp); +} +static inline lbnode_t *mp_alloc(mempool_t *mp) +{ + ++mp->cnt; + if (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t)); + else return mp->buf[--mp->n]; +} +static inline void mp_free(mempool_t *mp, lbnode_t *p) +{ + --mp->cnt; p->next = 0; // clear lbnode_t::next here + if (mp->n == mp->max) { + mp->max = mp->max? mp->max<<1 : 256; + mp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max); + } + mp->buf[mp->n++] = p; +} + +/* --- END: Memory pool */ + +/* --- BEGIN: Auxiliary functions */ + +static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos) +{ + unsigned k; + bam1_t *b = p->b; + bam1_core_t *c = &b->core; + uint32_t x = c->pos, y = 0; + int ret = 1, is_restart = 1; + + if (c->flag&BAM_FUNMAP) return 0; // unmapped read + assert(x <= pos); + p->qpos = -1; p->indel = 0; p->is_del = p->is_head = p->is_tail = 0; + for (k = 0; k < c->n_cigar; ++k) { + int op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation + int l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length + if (op == BAM_CMATCH) { // NOTE: this assumes the first and the last operation MUST BE a match or a clip + if (x + l > pos) { // overlap with pos + p->indel = p->is_del = 0; + p->qpos = y + (pos - x); + if (x == pos && is_restart) p->is_head = 1; + if (x + l - 1 == pos) { // come to the end of a match + if (k < c->n_cigar - 1) { // there are additional operation(s) + uint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR + int op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation + if (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del + else if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins + if (op_next == BAM_CSOFT_CLIP || op_next == BAM_CREF_SKIP || op_next == BAM_CHARD_CLIP) + p->is_tail = 1; // tail + } else p->is_tail = 1; // this is the last operation; set tail + } + } + x += l; y += l; + } else if (op == BAM_CDEL) { // then set ->is_del + if (x + l > pos) { + p->indel = 0; p->is_del = 1; + p->qpos = y + (pos - x); + } + x += l; + } else if (op == BAM_CREF_SKIP) x += l; + else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; + is_restart = (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP); + if (x > pos) { + if (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all + break; + } + } + assert(x > pos); + return ret; +} + +/* --- END: Auxiliary functions */ + +struct __bam_plbuf_t { + mempool_t *mp; + lbnode_t *head, *tail, *dummy; + bam_pileup_f func; + void *func_data; + int32_t tid, pos, max_tid, max_pos; + int max_pu, is_eof; + bam_pileup1_t *pu; +}; + +void bam_plbuf_reset(bam_plbuf_t *buf) +{ + lbnode_t *p, *q; + buf->max_tid = buf->max_pos = -1; + buf->tid = buf->pos = 0; + buf->is_eof = 0; + for (p = buf->head; p->next;) { + q = p->next; + mp_free(buf->mp, p); + p = q; + } + buf->head = buf->tail; +} + +bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data) +{ + bam_plbuf_t *buf; + buf = (bam_plbuf_t*)calloc(1, sizeof(bam_plbuf_t)); + buf->func = func; buf->func_data = data; + buf->mp = mp_init(); + buf->head = buf->tail = mp_alloc(buf->mp); + buf->dummy = mp_alloc(buf->mp); + buf->max_tid = buf->max_pos = -1; + return buf; +} + +void bam_plbuf_destroy(bam_plbuf_t *buf) +{ + mp_free(buf->mp, buf->dummy); + mp_free(buf->mp, buf->head); + if (buf->mp->cnt != 0) + fprintf(stderr, "[bam_plbuf_destroy] memory leak: %d. Continue anyway.\n", buf->mp->cnt); + mp_destroy(buf->mp); + free(buf->pu); + free(buf); +} + +int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf) +{ + if (b) { // fill buffer + bam_copy1(&buf->tail->b, b); + buf->tail->beg = b->core.pos; buf->tail->end = bam_calend(&b->core, bam1_cigar(b)); + if (!(b->core.tid >= buf->max_tid || (b->core.tid == buf->max_tid && buf->tail->beg >= buf->max_pos))) { + fprintf(stderr, "[bam_pileup_core] the input is not sorted. Abort!\n"); + abort(); + } + buf->max_tid = b->core.tid; buf->max_pos = buf->tail->beg; + if (buf->tail->end > buf->pos) { + buf->tail->next = mp_alloc(buf->mp); + buf->tail = buf->tail->next; + } + } else buf->is_eof = 1; + while (buf->is_eof || buf->max_tid > buf->tid || (buf->max_tid == buf->tid && buf->max_pos > buf->pos)) { + int n_pu = 0; + lbnode_t *p, *q; + buf->dummy->next = buf->head; + for (p = buf->head, q = buf->dummy; p->next; q = p, p = p->next) { + if (p->b.core.tid < buf->tid || (p->b.core.tid == buf->tid && p->end <= buf->pos)) { // then remove from the list + q->next = p->next; mp_free(buf->mp, p); p = q; + } else if (p->b.core.tid == buf->tid && p->beg <= buf->pos) { // here: p->end > pos; then add to pileup + if (n_pu == buf->max_pu) { // then double the capacity + buf->max_pu = buf->max_pu? buf->max_pu<<1 : 256; + buf->pu = (bam_pileup1_t*)realloc(buf->pu, sizeof(bam_pileup1_t) * buf->max_pu); + } + buf->pu[n_pu].b = &p->b; + if (resolve_cigar(buf->pu + n_pu, buf->pos)) ++n_pu; // skip the read if we are looking at BAM_CREF_SKIP + } + } + buf->head = buf->dummy->next; // dummy->next may be changed + if (n_pu) { // then call user defined function + buf->func(buf->tid, buf->pos, n_pu, buf->pu, buf->func_data); + } + // update tid and pos + if (buf->head->next) assert(buf->tid <= buf->head->b.core.tid); // otherwise, not sorted + if (buf->tid < buf->head->b.core.tid) { // come to a new reference sequence + buf->tid = buf->head->b.core.tid; buf->pos = buf->head->beg; // jump to the next reference + } else if (buf->pos < buf->head->beg) { // here: tid == head->b.core.tid + buf->pos = buf->head->beg; // jump to the next position + } else ++buf->pos; // scan contiguously + if (buf->is_eof && buf->head->next == 0) break; + } + return 0; +} + +int bam_pileup_file(bamFile fp, bam_pileup_f func, void *func_data) +{ + bam_plbuf_t *buf; + int ret; + bam1_t *b; + b = (bam1_t*)calloc(1, sizeof(bam1_t)); + buf = bam_plbuf_init(func, func_data); + while ((ret = bam_read1(fp, b)) >= 0) + bam_plbuf_push(b, buf); + bam_plbuf_push(0, buf); + bam_plbuf_destroy(buf); + free(b->data); free(b); + return 0; +} diff --git a/bam_plcmd.c b/bam_plcmd.c new file mode 100644 index 0000000..0140c66 --- /dev/null +++ b/bam_plcmd.c @@ -0,0 +1,194 @@ +#include +#include +#include +#include "bam.h" +#include "faidx.h" +#include "bam_maqcns.h" +#include "khash.h" +KHASH_SET_INIT_INT64(64) + +#define BAM_PLF_SIMPLE 0x01 +#define BAM_PLF_CNS 0x02 + +typedef struct { + bam_header_t *h; + bam_maqcns_t *c; + bam_maqindel_opt_t *ido; + faidx_t *fai; + khash_t(64) *hash; + uint32_t format; + int tid, len; + char *ref; +} pu_data_t; + +char **bam_load_pos(const char *fn, int *_n); +void bam_init_header_hash(bam_header_t *header); +int32_t bam_get_tid(const bam_header_t *header, const char *seq_name); + +static khash_t(64) *load_pos(const char *fn, bam_header_t *h) +{ + int n, tmp, i; + char **list, *s; + uint64_t x; + khash_t(64) *hash; + bam_init_header_hash(h); + list = bam_load_pos(fn, &n); + hash = kh_init(64); + for (i = 0; i < n; ++i) { + x = (uint64_t)bam_get_tid(h, list[i]) << 32; + s = list[i]; + while (*s++); + x |= *((uint32_t*)s) - 1; + kh_put(64, hash, x, &tmp); + free(list[i]); + } + free(list); + return hash; +} + +static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data) +{ + pu_data_t *d = (pu_data_t*)data; + bam_maqindel_ret_t *r = 0; + int i, j, rb; + uint32_t x; + if (d->hash && kh_get(64, d->hash, (uint64_t)tid<<32|pos) == kh_end(d->hash)) return 0; + if (d->fai && (int)tid != d->tid) { + free(d->ref); + d->ref = fai_fetch(d->fai, d->h->target_name[tid], &d->len); + d->tid = tid; + } + rb = (d->ref && (int)pos < d->len)? d->ref[pos] : 'N'; + printf("%s\t%d\t%c\t", d->h->target_name[tid], pos + 1, rb); + if (d->format & BAM_PLF_CNS) { // consensus + int ref_q, rb4 = bam_nt16_table[rb]; + x = bam_maqcns_call(n, pu, d->c); + ref_q = 0; + if (rb4 != 15 && x>>28 != 15 && x>>28 != rb4) { // a SNP + ref_q = ((x>>24&0xf) == rb4)? x>>8&0xff : (x>>8&0xff) + (x&0xff); + if (ref_q > 255) ref_q = 255; + } + printf("%c\t%d\t%d\t%d\t", bam_nt16_rev_table[x>>28], x>>8&0xff, ref_q, x>>16&0xff); + if (d->ref) // indel calling + r = bam_maqindel(n, pos, d->ido, pu, d->ref); + } + // pileup strings + printf("%d\t", n); + for (i = 0; i < n; ++i) { + const bam_pileup1_t *p = pu + i; + if (p->is_head) printf("^%c", p->b->core.qual > 93? 126 : p->b->core.qual + 33); + if (!p->is_del) { + int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; + if (toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; + else bam1_strand(p->b)? tolower(c) : toupper(c); + putchar(c); + if (p->indel > 0) { + printf("+%d", p->indel); + for (j = 1; j <= p->indel; ++j) { + c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)]; + putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); + } + } else if (p->indel < 0) { + printf("%d", p->indel); + for (j = 1; j <= -p->indel; ++j) { + c = (d->ref && (int)pos+j < d->len)? d->ref[pos+j] : 'N'; + putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); + } + } + } else putchar('*'); + if (p->is_tail) putchar('$'); + } + putchar('\t'); + for (i = 0; i < n; ++i) { + const bam_pileup1_t *p = pu + i; + int c = bam1_qual(p->b)[p->qpos] + 33; + if (c > 126) c = 126; + putchar(c); + } + if (d->format & BAM_PLF_SIMPLE) { + putchar('\t'); + for (i = 0; i < n; ++i) { + int c = pu[i].b->core.qual + 33; + if (c > 126) c = 126; + putchar(c); + } + } + putchar('\n'); + if (r) { // then print indel line + printf("%s\t%d\t*\t%s/%s\t", d->h->target_name[tid], pos + 1, r->s1, r->s2); + printf("%d\t%d\t%d\t%d\n", r->cnt1, r->cnt2, r->cnt_ambi, r->cnt_anti); + bam_maqindel_ret_destroy(r); + } + return 0; +} + +int bam_pileup(int argc, char *argv[]) +{ + int c; + char *fn_list = 0, *fn_fa = 0, *fn_pos = 0; + pu_data_t *d = (pu_data_t*)calloc(1, sizeof(pu_data_t)); + d->tid = -1; + d->c = bam_maqcns_init(); + while ((c = getopt(argc, argv, "st:f:cT:N:r:l:")) >= 0) { + switch (c) { + case 's': d->format |= BAM_PLF_SIMPLE; break; + case 't': fn_list = strdup(optarg); break; + case 'l': fn_pos = strdup(optarg); break; + case 'f': fn_fa = strdup(optarg); break; + case 'T': d->c->theta = atof(optarg); break; + case 'N': d->c->n_hap = atoi(optarg); break; + case 'r': d->c->het_rate = atoi(optarg); break; + case 'c': d->format |= BAM_PLF_CNS; break; + default: fprintf(stderr, "Unrecognizd option '-%c'.\n", c); return 1; + } + } + if (optind == argc) { + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: bamtk pileup [options] |\n\n"); + fprintf(stderr, "Option: -s simple (yet incomplete) pileup format\n"); + fprintf(stderr, " -t FILE list of reference sequences (assume the input is in SAM)\n"); + fprintf(stderr, " -l FILE list of sites at which pileup is output\n"); + fprintf(stderr, " -f FILE reference sequence in the FASTA format\n\n"); + fprintf(stderr, " -c output the maq consensus sequence\n"); + fprintf(stderr, " -T FLOAT theta in maq consensus calling model (for -c only) [%f]\n", d->c->theta); + fprintf(stderr, " -N INT number of haplotypes in the sample (for -c only) [%d]\n", d->c->n_hap); + fprintf(stderr, " -r FLOAT prior of a difference between any two haplotypes (for -c only) [%f]\n\n", + d->c->het_rate); + free(fn_list); free(fn_fa); free(d); + return 1; + } + if (fn_fa) d->fai = fai_load(fn_fa); + free(fn_fa); + bam_maqcns_prepare(d->c); + d->ido = bam_maqindel_opt_init(); + if (fn_list) { + tamFile fp; + bam1_t *b; + int ret; + bam_plbuf_t *buf = bam_plbuf_init(pileup_func, d); + d->h = sam_header_read2(fn_list); + if (fn_pos) d->hash = load_pos(fn_pos, d->h); + fp = sam_open(argv[optind]); + b = (bam1_t*)calloc(1, sizeof(bam1_t)); + while ((ret = sam_read1(fp, d->h, b)) >= 0) + bam_plbuf_push(b, buf); + bam_plbuf_push(0, buf); + bam_plbuf_destroy(buf); + bam_destroy1(b); + sam_close(fp); + } else { + bamFile fp; + fp = (strcmp(argv[optind], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[optind], "r"); + d->h = bam_header_read(fp); + if (fn_pos) d->hash = load_pos(fn_pos, d->h); + bam_pileup_file(fp, pileup_func, d); + bam_close(fp); + } + free(fn_pos); free(fn_list); + kh_destroy(64, d->hash); + bam_header_destroy(d->h); + if (d->fai) fai_destroy(d->fai); + bam_maqcns_destroy(d->c); + free(d->ido); free(d->ref); free(d); + return 0; +} diff --git a/bam_sort.c b/bam_sort.c new file mode 100644 index 0000000..c5ed583 --- /dev/null +++ b/bam_sort.c @@ -0,0 +1,229 @@ +#include +#include +#include +#include +#include +#include +#include "bam.h" +#include "ksort.h" + +static int g_is_by_qname = 0; + +static inline int strnum_cmp(const char *a, const char *b) +{ + char *pa, *pb; + pa = (char*)a; pb = (char*)b; + while (*pa && *pb) { + if (isdigit(*pa) && isdigit(*pb)) { + long ai, bi; + ai = strtol(pa, &pa, 10); + bi = strtol(pb, &pb, 10); + if (ai != bi) return aibi? 1 : 0; + } else { + if (*pa != *pb) break; + ++pa; ++pb; + } + } + if (*pa == *pb) + return (pa-a) < (pb-b)? -1 : (pa-a) > (pb-b)? 1 : 0; + return *pa<*pb? -1 : *pa>*pb? 1 : 0; +} + +#define HEAP_EMPTY 0xffffffffffffffffull + +typedef struct { + int i; + uint64_t pos; + bam1_t *b; +} heap1_t; + +static inline int heap_lt(const heap1_t a, const heap1_t b) +{ + if (g_is_by_qname) { + int t = strnum_cmp(bam1_qname(a.b), bam1_qname(b.b)); + return (t > 0 || (t == 0 && a.pos > b.pos)); + } else return (a.pos > b.pos); +} + +KSORT_INIT(heap, heap1_t, heap_lt) + +void bam_merge_core(int by_qname, const char *out, int n, char * const *fn) +{ + bamFile fpout, *fp; + heap1_t *heap; + bam_header_t *hout = 0; + int i, j; + + g_is_by_qname = by_qname; + fp = (bamFile*)calloc(n, sizeof(bamFile)); + heap = (heap1_t*)calloc(n, sizeof(heap1_t)); + for (i = 0; i != n; ++i) { + heap1_t *h; + bam_header_t *hin; + assert(fp[i] = bam_open(fn[i], "r")); + hin = bam_header_read(fp[i]); + if (i == 0) hout = hin; + else { // validate multiple baf + if (hout->n_targets != hin->n_targets) { + fprintf(stderr, "[bam_merge_core] file '%s' has different number of target sequences. Abort!\n", fn[i]); + abort(); + } + for (j = 0; j < hout->n_targets; ++j) { + if (strcmp(hout->target_name[j], hin->target_name[j]) || hout->target_len[j] != hin->target_len[j]) { + fprintf(stderr, "[bam_merge_core] file '%s' has a different target sequence. Abort!\n", fn[i]); + abort(); + } + } + bam_header_destroy(hin); + } + h = heap + i; + h->i = i; + h->b = (bam1_t*)calloc(1, sizeof(bam1_t)); + if (bam_read1(fp[i], h->b) >= 0) + h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)h->b->core.pos<<1 | bam1_strand(h->b); + else h->pos = HEAP_EMPTY; + } + fpout = strcmp(out, "-")? bam_open(out, "w") : bam_dopen(fileno(stdout), "w"); + assert(fpout); + bam_header_write(fpout, hout); + bam_header_destroy(hout); + + ks_heapmake(heap, n, heap); + while (heap->pos != HEAP_EMPTY) { + bam1_t *b = heap->b; + bam_write1_core(fpout, &b->core, b->data_len, b->data); + if ((j = bam_read1(fp[heap->i], b)) >= 0) + heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)b->core.pos<<1 | bam1_strand(b); + else if (j == -1) heap->pos = HEAP_EMPTY; + else fprintf(stderr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]); + ks_heapadjust(heap, 0, n, heap); + } + + for (i = 0; i != n; ++i) { + bam_close(fp[i]); + free(heap[i].b->data); + free(heap[i].b); + } + bam_close(fpout); + free(fp); free(heap); +} +int bam_merge(int argc, char *argv[]) +{ + int c, is_by_qname = 0; + while ((c = getopt(argc, argv, "n")) >= 0) { + switch (c) { + case 'n': is_by_qname = 1; break; + } + } + if (optind + 3 >= argc) { + fprintf(stderr, "Usage: samtools merge [-n] [...]\n"); + return 1; + } + bam_merge_core(is_by_qname, argv[optind], argc - optind - 1, argv + optind + 1); + return 0; +} + +typedef bam1_t *bam1_p; + +static inline int bam1_lt(const bam1_p a, const bam1_p b) +{ + if (g_is_by_qname) { + int t = strnum_cmp(bam1_qname(a), bam1_qname(b)); + return (t < 0 || (t == 0 && (((uint64_t)a->core.tid<<32|a->core.pos) < ((uint64_t)b->core.tid<<32|b->core.pos)))); + } else return (((uint64_t)a->core.tid<<32|a->core.pos) < ((uint64_t)b->core.tid<<32|b->core.pos)); +} +KSORT_INIT(sort, bam1_p, bam1_lt) + +static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam_header_t *h) +{ + char *name; + int i; + bamFile fp; + ks_mergesort(sort, k, buf, 0); + name = (char*)calloc(strlen(prefix) + 20, 1); + if (n >= 0) sprintf(name, "%s.%.4d.bam", prefix, n); + else sprintf(name, "%s.bam", prefix); + assert(fp = bam_open(name, "w")); + free(name); + bam_header_write(fp, h); + for (i = 0; i < k; ++i) + bam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data); + bam_close(fp); +} + +void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem) +{ + int n, ret, k, i; + size_t mem; + bam_header_t *header; + bamFile fp; + bam1_t *b, **buf; + + g_is_by_qname = is_by_qname; + n = k = 0; mem = 0; + fp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r"); + assert(fp); + header = bam_header_read(fp); + buf = (bam1_t**)calloc(max_mem / BAM_CORE_SIZE, sizeof(bam1_t*)); + // write sub files + for (;;) { + if (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t)); + b = buf[k]; + if ((ret = bam_read1(fp, b)) < 0) break; + mem += ret; + ++k; + if (mem >= max_mem) { + sort_blocks(n++, k, buf, prefix, header); + mem = 0; k = 0; + } + } + if (ret != -1) + fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n"); + if (n == 0) sort_blocks(-1, k, buf, prefix, header); + else { // then merge + char **fns, *fnout; + fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n+1); + sort_blocks(n++, k, buf, prefix, header); + fnout = (char*)calloc(strlen(prefix) + 20, 1); + sprintf(fnout, "%s.bam", prefix); + fns = (char**)calloc(n, sizeof(char*)); + for (i = 0; i < n; ++i) { + fns[i] = (char*)calloc(strlen(prefix) + 20, 1); + sprintf(fns[i], "%s.%.4d.bam", prefix, i); + } + bam_merge_core(0, fnout, n, fns); + free(fnout); + for (i = 0; i < n; ++i) { + unlink(fns[i]); + free(fns[i]); + } + free(fns); + } + for (k = 0; k < max_mem / BAM_CORE_SIZE; ++k) { + if (buf[k]) { + free(buf[k]->data); + free(buf[k]); + } + } + free(buf); + bam_header_destroy(header); + bam_close(fp); +} + +int bam_sort(int argc, char *argv[]) +{ + size_t max_mem = 500000000; + int c, is_by_qname = 0; + while ((c = getopt(argc, argv, "nm:")) >= 0) { + switch (c) { + case 'n': is_by_qname = 1; break; + case 'm': max_mem = atol(optarg); break; + } + } + if (optind + 2 > argc) { + fprintf(stderr, "Usage: samtools sort [-n] [-m ] \n"); + return 1; + } + bam_sort_core(is_by_qname, argv[optind], argv[optind+1], max_mem); + return 0; +} diff --git a/bam_tview.c b/bam_tview.c new file mode 100644 index 0000000..3dfb201 --- /dev/null +++ b/bam_tview.c @@ -0,0 +1,315 @@ +#ifndef _NO_CURSES +#include +#include +#include +#include +#include "bam.h" +#include "faidx.h" +#include "bam_maqcns.h" + +#define TV_MIN_ALNROW 2 +#define TV_MAX_GOTO 40 +#define TV_LOW_MAPQ 10 + +#define TV_COLOR_MAPQ 0 +#define TV_COLOR_BASEQ 1 +#define TV_COLOR_NUCL 2 + +typedef struct { + int mrow, mcol; + WINDOW *wgoto, *whelp; + + bam_index_t *idx; + bam_lplbuf_t *lplbuf; + bam_header_t *header; + bamFile fp; + int curr_tid, left_pos; + faidx_t *fai; + bam_maqcns_t *bmc; + + int ccol, last_pos, row_shift, color_for, is_nucl, l_ref; + char *ref; +} tview_t; + +char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 }; + +int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) +{ + tview_t *tv = (tview_t*)data; + int i, j, c, rb, attr, max_ins = 0; + uint32_t call = 0; + if (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen + // print referece + rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N'; + for (i = tv->last_pos + 1; i < pos; ++i) { + if (i%10 == 0) mvprintw(0, tv->ccol, "%-d", i+1); + c = tv->ref? tv->ref[i - tv->left_pos] : 'N'; + mvaddch(1, tv->ccol++, c); + } + if (pos%10 == 0) mvprintw(0, tv->ccol, "%-d", pos+1); + // print consensus + call = bam_maqcns_call(n, pl, tv->bmc); + attr = A_UNDERLINE; + c = ",ACMGRSVTWYHKDBN"[call>>28&0xf]; + i = (call>>8&0xff)/10+1; + if (i > 4) i = 4; + attr |= COLOR_PAIR(i); + if (c == toupper(rb)) c = '.'; + attron(attr); + mvaddch(2, tv->ccol, c); + attroff(attr); + // calculate maximum insert + for (i = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (p->indel > 0 && max_ins < p->indel) max_ins = p->indel; + } + // core loop + for (j = 0; j <= max_ins; ++j) { + for (i = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + int row = TV_MIN_ALNROW + p->level - tv->row_shift; + if (j == 0) { + if (!p->is_del) { + c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; + if (!tv->is_nucl && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; + } else c = '*'; + } else { // padding + if (j > p->indel) c = '*'; + else { // insertion + c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)]; + if (j == 0 && !tv->is_nucl && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; + } + } + if (row > TV_MIN_ALNROW && row < tv->mrow) { + int x; + attr = 0; + if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR)) + || (p->b->core.flag & BAM_FSECONDARY)) attr |= A_UNDERLINE; + if (tv->color_for == TV_COLOR_BASEQ) { + x = bam1_qual(p->b)[p->qpos]/10 + 1; + if (x > 4) x = 4; + attr |= COLOR_PAIR(x); + } else if (tv->color_for == TV_COLOR_MAPQ) { + x = p->b->core.qual/10 + 1; + if (x > 4) x = 4; + attr |= COLOR_PAIR(x); + } else if (tv->color_for == TV_COLOR_NUCL) { + x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)] + 5; + attr |= COLOR_PAIR(x); + } + attron(attr); + mvaddch(row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c)); + attroff(attr); + } + } + c = j? '*' : rb; + if (c == '*') { + attr = COLOR_PAIR(8); + attron(attr); + mvaddch(1, tv->ccol++, c); + attroff(attr); + } else mvaddch(1, tv->ccol++, c); + } + tv->last_pos = pos; + return 0; +} + +tview_t *tv_init(const char *fn, const char *fn_fa) +{ + tview_t *tv = (tview_t*)calloc(1, sizeof(tview_t)); + tv->idx = bam_index_load(fn); + tv->fp = bam_open(fn, "r"); + assert(tv->fp); + tv->header = bam_header_read(tv->fp); + tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv); + if (fn_fa) tv->fai = fai_load(fn_fa); + tv->bmc = bam_maqcns_init(); + bam_maqcns_prepare(tv->bmc); + + initscr(); + keypad(stdscr, TRUE); + clear(); + noecho(); + cbreak(); +#ifdef NCURSES_VERSION + getmaxyx(stdscr, tv->mrow, tv->mcol); +#else + tv->mrow = 80; tv->mcol = 40; +#endif + tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5); + tv->whelp = newwin(22,40, 5, 5); + tv->color_for = TV_COLOR_MAPQ; + start_color(); + init_pair(1, COLOR_BLUE, COLOR_BLACK); + init_pair(2, COLOR_GREEN, COLOR_BLACK); + init_pair(3, COLOR_YELLOW, COLOR_BLACK); + init_pair(4, COLOR_WHITE, COLOR_BLACK); + init_pair(5, COLOR_GREEN, COLOR_BLACK); + init_pair(6, COLOR_CYAN, COLOR_BLACK); + init_pair(7, COLOR_YELLOW, COLOR_BLACK); + init_pair(8, COLOR_RED, COLOR_BLACK); + init_pair(9, COLOR_BLUE, COLOR_BLACK); + return tv; +} + +void tv_destroy(tview_t *tv) +{ + delwin(tv->wgoto); delwin(tv->whelp); + endwin(); + + bam_lplbuf_destroy(tv->lplbuf); + bam_maqcns_destroy(tv->bmc); + bam_index_destroy(tv->idx); + if (tv->fai) fai_destroy(tv->fai); + free(tv->ref); + bam_header_destroy(tv->header); + bam_close(tv->fp); + free(tv); +} + +int tv_fetch_func(const bam1_t *b, void *data) +{ + tview_t *tv = (tview_t*)data; + bam_lplbuf_push(b, tv->lplbuf); + return 0; +} + +int tv_draw_aln(tview_t *tv, int tid, int pos) +{ + int end; + // reset + clear(); + tv->curr_tid = tid; tv->left_pos = pos; + tv->last_pos = tv->left_pos - 1; + tv->ccol = 0; + // print ref and consensus + if (tv->fai) { + char *str; + if (tv->ref) free(tv->ref); + str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1); + sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol); + tv->ref = fai_fetch(tv->fai, str, &tv->l_ref); + free(str); + } + // draw aln + bam_lplbuf_reset(tv->lplbuf); + bam_fetch(tv->fp, tv->idx, tv->curr_tid, tv->left_pos, tv->left_pos + tv->mcol, tv, tv_fetch_func); + bam_lplbuf_push(0, tv->lplbuf); + return 0; +} + +static void tv_win_goto(tview_t *tv, int *tid, int *pos) +{ + char str[256]; + int i, l = 0; + wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+'); + mvwprintw(tv->wgoto, 1, 2, "Goto: "); + for (;;) { + int c = wgetch(tv->wgoto); + wrefresh(tv->wgoto); + if (c == KEY_BACKSPACE || c == '\010' || c == '\177') { + --l; + } else if (c == KEY_ENTER || c == '\012' || c == '\015') { + int _tid = -1, _beg, _end; + bam_parse_region(tv->header, str, &_tid, &_beg, &_end); + if (_tid >= 0) { + *tid = _tid; *pos = _beg; + return; + } + } else if (isgraph(c)) { + if (l < TV_MAX_GOTO) str[l++] = c; + } else if (c == '\027') l = 0; + else if (c == '\033') return; + str[l] = '\0'; + for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' '); + mvwprintw(tv->wgoto, 1, 8, "%s", str); + } +} + +static void tv_win_help(tview_t *tv) { + int r = 1; + WINDOW *win = tv->whelp; + wborder(win, '|', '|', '-', '-', '+', '+', '+', '+'); + mvwprintw(win, r++, 2, " -=- Help -=- "); + r++; + mvwprintw(win, r++, 2, "? This window"); + mvwprintw(win, r++, 2, "Arrows Small scroll movement"); + mvwprintw(win, r++, 2, "h,j,k,l Small scroll movement"); + mvwprintw(win, r++, 2, "H,J,K,L Large scroll movement"); + mvwprintw(win, r++, 2, "ctrl-H Scroll 1k left"); + mvwprintw(win, r++, 2, "ctrl-L Scroll 1k right"); + mvwprintw(win, r++, 2, "space Scroll one screen"); + mvwprintw(win, r++, 2, "backspace Scroll back one screen"); + mvwprintw(win, r++, 2, "g Go to specific location"); + mvwprintw(win, r++, 2, "b Color for base quality"); + mvwprintw(win, r++, 2, "m Color for mapping qual"); + mvwprintw(win, r++, 2, "n Color for nucleotide"); + mvwprintw(win, r++, 2, ". Toggle on/off dot view"); + mvwprintw(win, r++, 2, "q Exit"); + r++; + mvwprintw(win, r++, 2, "Underline: Secondary or orphan"); + mvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19"); + mvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30"); + wrefresh(win); + wgetch(win); +} + +void tv_loop(tview_t *tv) +{ + int tid, pos; + tid = tv->curr_tid; pos = tv->left_pos; + while (1) { + int c = getch(); + switch (c) { + case '?': tv_win_help(tv); break; + case '\033': + case 'q': goto end_loop; + case 'g': tv_win_goto(tv, &tid, &pos); break; + case 'b': tv->color_for = TV_COLOR_BASEQ; break; + case 'm': tv->color_for = TV_COLOR_MAPQ; break; + case 'n': tv->color_for = TV_COLOR_NUCL; break; + case KEY_LEFT: + case 'h': --pos; break; + case KEY_RIGHT: + case 'l': ++pos; break; + case KEY_SLEFT: + case 'H': pos -= 20; break; + case KEY_SRIGHT: + case 'L': pos += 20; break; + case '.': tv->is_nucl = !tv->is_nucl; break; + case '\010': pos -= 1000; break; + case '\014': pos += 1000; break; + case ' ': pos += tv->mcol; break; + case KEY_UP: + case 'j': --tv->row_shift; break; + case KEY_DOWN: + case 'k': ++tv->row_shift; break; + case KEY_BACKSPACE: + case '\177': pos -= tv->mcol; break; +#ifdef KEY_RESIZE + case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break; +#endif + default: continue; + } + if (pos < 0) pos = 0; + if (tv->row_shift < 0) tv->row_shift = 0; + tv_draw_aln(tv, tid, pos); + } +end_loop: + return; +} + +int bam_tview_main(int argc, char *argv[]) +{ + tview_t *tv; + if (argc == 1) { + fprintf(stderr, "Usage: bamtk tview [ref.fasta]\n"); + return 1; + } + tv = tv_init(argv[1], (argc == 2)? 0 : argv[2]); + tv_draw_aln(tv, 0, 0); + tv_loop(tv); + tv_destroy(tv); + return 0; +} +#endif diff --git a/bamtk.c b/bamtk.c new file mode 100644 index 0000000..54ef455 --- /dev/null +++ b/bamtk.c @@ -0,0 +1,112 @@ +#include +#include +#include "bam.h" + +#ifndef PACKAGE_VERSION +#define PACKAGE_VERSION "0.1.1" +#endif + +int bam_taf2baf(int argc, char *argv[]); +int bam_pileup(int argc, char *argv[]); +int bam_merge(int argc, char *argv[]); +int bam_index(int argc, char *argv[]); +int bam_sort(int argc, char *argv[]); +int bam_tview_main(int argc, char *argv[]); +int faidx_main(int argc, char *argv[]); + +static int view_aux(const bam1_t *b, void *data) +{ + bam_view1((bam_header_t*)data, b); + return 0; +} +static int view_auxb(const bam1_t *b, void *data) +{ + bam_write1((bamFile)data, b); + return 0; +} + +int bam_view(int argc, char *argv[]) +{ + bamFile fp, fpout = 0; + bam_header_t *header; + bam1_t *b; + int ret, c, is_bam = 0; + while ((c = getopt(argc, argv, "b")) >= 0) { + switch (c) { + case 'b': is_bam = 1; break; + default: fprintf(stderr, "Unrecognized option: -%c\n", c); return 1; + } + } + if (argc == optind) { + fprintf(stderr, "Usage: samtools view [-b] [ [...]]\n"); + return 1; + } + fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); + assert(fp); + header = bam_header_read(fp); + if (is_bam) { + assert(fpout = bam_dopen(fileno(stdout), "w")); + bam_header_write(fpout, header); + } + if (optind + 1 == argc) { + b = (bam1_t*)calloc(1, sizeof(bam1_t)); + while ((ret = bam_read1(fp, b)) >= 0) bam_view1(header, b); + if (ret < -1) fprintf(stderr, "[bam_view] truncated file? Continue anyway. (%d)\n", ret); + free(b->data); free(b); + } else { + int i; + bam_index_t *idx; + idx = bam_index_load(argv[optind]); + for (i = optind + 1; i < argc; ++i) { + int tid, beg, end; + bam_parse_region(header, argv[i], &tid, &beg, &end); + if (is_bam) bam_fetch(fp, idx, tid, beg, end, fpout, view_auxb); + else bam_fetch(fp, idx, tid, beg, end, header, view_aux); + } + bam_index_destroy(idx); + } + bam_header_destroy(header); + bam_close(fp); + if (is_bam) bam_close(fpout); + return 0; +} + +static int usage() +{ + fprintf(stderr, "\n"); + fprintf(stderr, "Program: samtools (Tools for alignments in the SAM format)\n"); + fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION); + fprintf(stderr, "Usage: samtools [options]\n\n"); + fprintf(stderr, "Command: import import from the text format\n"); + fprintf(stderr, " view export to the text format\n"); + fprintf(stderr, " sort sort alignment file\n"); + fprintf(stderr, " merge merge multiple sorted alignment files\n"); + fprintf(stderr, " pileup generate pileup output\n"); + fprintf(stderr, " faidx index/extract FASTA\n"); +#ifndef _NO_CURSES + fprintf(stderr, " tview text alignment viewer\n"); +#endif + fprintf(stderr, " index index alignment\n"); + fprintf(stderr, "\n"); + return 1; +} + +int main(int argc, char *argv[]) +{ + if (argc < 2) return usage(); + if (strcmp(argv[1], "view") == 0) return bam_view(argc-1, argv+1); + else if (strcmp(argv[1], "import") == 0) return bam_taf2baf(argc-1, argv+1); + else if (strcmp(argv[1], "pileup") == 0) return bam_pileup(argc-1, argv+1); + else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1); + else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1); + else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1); + else if (strcmp(argv[1], "faidx") == 0) return faidx_main(argc-1, argv+1); +#ifndef _NO_CURSES + else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1); +#endif + else { + fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]); + return 1; + } + return 0; +} diff --git a/bgzf.c b/bgzf.c new file mode 100644 index 0000000..4314c70 --- /dev/null +++ b/bgzf.c @@ -0,0 +1,488 @@ +/* + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2008 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. + * Neither the Broad Institute nor MIT can be responsible for its use, misuse, + * or functionality. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "bgzf.h" + +extern off_t ftello(FILE *stream); +extern int fseeko(FILE *stream, off_t offset, int whence); + +typedef int8_t byte; + +static const int DEFAULT_BLOCK_SIZE = 64 * 1024; +static const int MAX_BLOCK_SIZE = 64 * 1024; + +static const int BLOCK_HEADER_LENGTH = 18; +static const int BLOCK_FOOTER_LENGTH = 8; + +static const int GZIP_ID1 = 31; +static const int GZIP_ID2 = 139; +static const int CM_DEFLATE = 8; +static const int FLG_FEXTRA = 4; +static const int OS_UNKNOWN = 255; +static const int BGZF_ID1 = 66; // 'B' +static const int BGZF_ID2 = 67; // 'C' +static const int BGZF_LEN = 2; +static const int BGZF_XLEN = 6; // BGZF_LEN+4 + +static const int GZIP_WINDOW_BITS = -15; // no zlib header +static const int Z_DEFAULT_MEM_LEVEL = 8; + + +inline +void +packInt16(uint8_t* buffer, uint16_t value) +{ + buffer[0] = value; + buffer[1] = value >> 8; +} + +inline +int +unpackInt16(const uint8_t* buffer) +{ + return (buffer[0] | (buffer[1] << 8)); +} + +inline +void +packInt32(uint8_t* buffer, uint32_t value) +{ + buffer[0] = value; + buffer[1] = value >> 8; + buffer[2] = value >> 16; + buffer[3] = value >> 24; +} + +inline +int +min(int x, int y) +{ + return (x < y) ? x : y; +} + +static +void +report_error(BGZF* fp, const char* message) { + fp->error = message; +} + +static +BGZF* +open_read(int fd) +{ + FILE* file = fdopen(fd, "r"); + BGZF* fp = malloc(sizeof(BGZF)); + fp->file_descriptor = fd; + fp->open_mode = 'r'; + fp->owned_file = 0; + fp->file = file; + fp->uncompressed_block_size = MAX_BLOCK_SIZE; + fp->uncompressed_block = malloc(MAX_BLOCK_SIZE); + fp->compressed_block_size = MAX_BLOCK_SIZE; + fp->compressed_block = malloc(MAX_BLOCK_SIZE); + fp->block_address = 0; + fp->block_offset = 0; + fp->block_length = 0; + fp->error = NULL; + return fp; +} + +static +BGZF* +open_write(int fd) +{ + FILE* file = fdopen(fd, "w"); + BGZF* fp = malloc(sizeof(BGZF)); + fp->file_descriptor = fd; + fp->open_mode = 'w'; + fp->owned_file = 0; + fp->file = file; + fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE; + fp->uncompressed_block = NULL; + fp->compressed_block_size = MAX_BLOCK_SIZE; + fp->compressed_block = malloc(MAX_BLOCK_SIZE); + fp->block_address = 0; + fp->block_offset = 0; + fp->block_length = 0; + fp->error = NULL; + return fp; +} + +BGZF* +bgzf_open(const char* __restrict path, const char* __restrict mode) +{ + BGZF* fp = NULL; + if (strcasecmp(mode, "r") == 0) { + int oflag = O_RDONLY; + int fd = open(path, oflag); + fp = open_read(fd); + } else if (strcasecmp(mode, "w") == 0) { + int oflag = O_WRONLY | O_CREAT | O_TRUNC; + int fd = open(path, oflag, 0644); + fp = open_write(fd); + } + if (fp != NULL) { + fp->owned_file = 1; + } + return fp; +} + +BGZF* +bgzf_fdopen(int fd, const char * __restrict mode) +{ + if (strcasecmp(mode, "r") == 0) { + return open_read(fd); + } else if (strcasecmp(mode, "w") == 0) { + return open_write(fd); + } else { + return NULL; + } +} + +static +int +deflate_block(BGZF* fp, int block_length) +{ + // Deflate the block in fp->uncompressed_block into fp->compressed_block. + // Also adds an extra field that stores the compressed block length. + + byte* buffer = fp->compressed_block; + int buffer_size = fp->compressed_block_size; + + // Init gzip header + buffer[0] = GZIP_ID1; + buffer[1] = GZIP_ID2; + buffer[2] = CM_DEFLATE; + buffer[3] = FLG_FEXTRA; + buffer[4] = 0; // mtime + buffer[5] = 0; + buffer[6] = 0; + buffer[7] = 0; + buffer[8] = 0; + buffer[9] = OS_UNKNOWN; + buffer[10] = BGZF_XLEN; + buffer[11] = 0; + buffer[12] = BGZF_ID1; + buffer[13] = BGZF_ID2; + buffer[14] = BGZF_LEN; + buffer[15] = 0; + buffer[16] = 0; // placeholder for block length + buffer[17] = 0; + + // loop to retry for blocks that do not compress enough + int input_length = block_length; + int compressed_length = 0; + while (1) { + + z_stream zs; + zs.zalloc = NULL; + zs.zfree = NULL; + zs.next_in = fp->uncompressed_block; + zs.avail_in = input_length; + zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH]; + zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; + + int status = deflateInit2(&zs, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY); + if (status != Z_OK) { + report_error(fp, "deflate init failed"); + return -1; + } + status = deflate(&zs, Z_FINISH); + if (status != Z_STREAM_END) { + deflateEnd(&zs); + if (status == Z_OK) { + // Not enough space in buffer. + // Can happen in the rare case the input doesn't compress enough. + // Reduce the amount of input until it fits. + input_length -= 1024; + if (input_length <= 0) { + // should never happen + report_error(fp, "input reduction failed"); + return -1; + } + continue; + } + report_error(fp, "deflate failed"); + return -1; + } + status = deflateEnd(&zs); + if (status != Z_OK) { + report_error(fp, "deflate end failed"); + return -1; + } + compressed_length = zs.total_out; + compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; + if (compressed_length > MAX_BLOCK_SIZE) { + // should never happen + report_error(fp, "deflate overflow"); + return -1; + } + break; + } + + packInt16((uint8_t*)&buffer[16], compressed_length-1); + uint32_t crc = crc32(0L, NULL, 0L); + crc = crc32(crc, fp->uncompressed_block, input_length); + packInt32((uint8_t*)&buffer[compressed_length-8], crc); + packInt32((uint8_t*)&buffer[compressed_length-4], input_length); + + int remaining = block_length - input_length; + if (remaining > 0) { + if (remaining > input_length) { + // should never happen (check so we can use memcpy) + report_error(fp, "remainder too large"); + return -1; + } + memcpy(fp->uncompressed_block, + fp->uncompressed_block + input_length, + remaining); + } + fp->block_offset = remaining; + return compressed_length; +} + +static +int +inflate_block(BGZF* fp, int block_length) +{ + // Inflate the block in fp->compressed_block into fp->uncompressed_block + + z_stream zs; + zs.zalloc = NULL; + zs.zfree = NULL; + zs.next_in = fp->compressed_block + 18; + zs.avail_in = block_length - 16; + zs.next_out = fp->uncompressed_block; + zs.avail_out = fp->uncompressed_block_size; + + int status = inflateInit2(&zs, GZIP_WINDOW_BITS); + if (status != Z_OK) { + report_error(fp, "inflate init failed"); + return -1; + } + status = inflate(&zs, Z_FINISH); + if (status != Z_STREAM_END) { + inflateEnd(&zs); + report_error(fp, "inflate failed"); + return -1; + } + status = inflateEnd(&zs); + if (status != Z_OK) { + report_error(fp, "inflate failed"); + return -1; + } + return zs.total_out; +} + +static +int +check_header(const byte* header) +{ + return (header[0] == GZIP_ID1 && + header[1] == (byte) GZIP_ID2 && + header[2] == Z_DEFLATED && + (header[3] & FLG_FEXTRA) != 0 && + unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN && + header[12] == BGZF_ID1 && + header[13] == BGZF_ID2 && + unpackInt16((uint8_t*)&header[14]) == BGZF_LEN); +} + +static +int +read_block(BGZF* fp) +{ + byte header[BLOCK_HEADER_LENGTH]; + int64_t block_address = ftello(fp->file); + int count = fread(header, 1, sizeof(header), fp->file); + if (count == 0) { + fp->block_length = 0; + return 0; + } + if (count != sizeof(header)) { + report_error(fp, "read failed"); + return -1; + } + if (!check_header(header)) { + report_error(fp, "invalid block header"); + return -1; + } + int block_length = unpackInt16((uint8_t*)&header[16]) + 1; + byte* compressed_block = (byte*) fp->compressed_block; + memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); + int remaining = block_length - BLOCK_HEADER_LENGTH; + count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file); + if (count != remaining) { + report_error(fp, "read failed"); + return -1; + } + count = inflate_block(fp, block_length); + if (count < 0) { + return -1; + } + if (fp->block_length != 0) { + // Do not reset offset if this read follows a seek. + fp->block_offset = 0; + } + fp->block_address = block_address; + fp->block_length = count; + return 0; +} + +int +bgzf_read(BGZF* fp, void* data, int length) +{ + if (length <= 0) { + return 0; + } + if (fp->open_mode != 'r') { + report_error(fp, "file not open for reading"); + return -1; + } + + int bytes_read = 0; + byte* output = data; + while (bytes_read < length) { + int available = fp->block_length - fp->block_offset; + if (available <= 0) { + if (read_block(fp) != 0) { + return -1; + } + available = fp->block_length - fp->block_offset; + if (available <= 0) { + break; + } + } + int copy_length = min(length-bytes_read, available); + byte* buffer = fp->uncompressed_block; + memcpy(output, buffer + fp->block_offset, copy_length); + fp->block_offset += copy_length; + output += copy_length; + bytes_read += copy_length; + } + if (fp->block_offset == fp->block_length) { + fp->block_address = ftello(fp->file); + fp->block_offset = 0; + fp->block_length = 0; + } + return bytes_read; +} + +static +int +flush_block(BGZF* fp) +{ + while (fp->block_offset > 0) { + int block_length = deflate_block(fp, fp->block_offset); + if (block_length < 0) { + return -1; + } + int count = fwrite(fp->compressed_block, 1, block_length, fp->file); + if (count != block_length) { + report_error(fp, "write failed"); + return -1; + } + fp->block_address += block_length; + } + return 0; +} + +int +bgzf_write(BGZF* fp, const void* data, int length) +{ + if (fp->open_mode != 'w') { + report_error(fp, "file not open for writing"); + return -1; + } + + if (fp->uncompressed_block == NULL) { + fp->uncompressed_block = malloc(fp->uncompressed_block_size); + } + + const byte* input = data; + int block_length = fp->uncompressed_block_size; + int bytes_written = 0; + while (bytes_written < length) { + int copy_length = min(block_length - fp->block_offset, length - bytes_written); + byte* buffer = fp->uncompressed_block; + memcpy(buffer + fp->block_offset, input, copy_length); + fp->block_offset += copy_length; + input += copy_length; + bytes_written += copy_length; + if (fp->block_offset == block_length) { + if (flush_block(fp) != 0) { + break; + } + } + } + return bytes_written; +} + +int +bgzf_close(BGZF* fp) +{ + if (fp->open_mode == 'w') { + if (flush_block(fp) != 0) { + return -1; + } + if (fflush(fp->file) != 0) { + report_error(fp, "flush failed"); + return -1; + } + } + if (fp->owned_file) { + if (fclose(fp->file) != 0) { + return -1; + } + } + free(fp->uncompressed_block); + free(fp->compressed_block); + free(fp); + return 0; +} + +int64_t +bgzf_tell(BGZF* fp) +{ + return ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)); +} + +int64_t +bgzf_seek(BGZF* fp, int64_t pos, int where) +{ + if (fp->open_mode != 'r') { + report_error(fp, "file not open for read"); + return -1; + } + if (where != SEEK_SET) { + report_error(fp, "unimplemented seek option"); + return -1; + } + int block_offset = pos & 0xFFFF; + int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL; + if (fseeko(fp->file, block_address, SEEK_SET) != 0) { + report_error(fp, "seek failed"); + return -1; + } + fp->block_length = 0; // indicates current block is not loaded + fp->block_address = block_address; + fp->block_offset = block_offset; + return 0; +} + diff --git a/bgzf.h b/bgzf.h new file mode 100644 index 0000000..4ed5c29 --- /dev/null +++ b/bgzf.h @@ -0,0 +1,102 @@ +/* + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2008 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. + * Neither the Broad Institute nor MIT can be responsible for its use, misuse, + * or functionality. + */ + +#ifndef __BCGZ_H +#define __BGZF_H + +#include +#include +#include "zlib.h" +#include +//#include "zutil.h" + +//typedef int8_t bool; + +typedef struct { + int file_descriptor; + char open_mode; // 'r' or 'w' + bool owned_file; + FILE* file; + int uncompressed_block_size; + int compressed_block_size; + void* uncompressed_block; + void* compressed_block; + int64_t block_address; + int block_length; + int block_offset; + const char* error; +} BGZF; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Open an existing file descriptor for reading or writing. + * Mode must be either "r" or "w". + * A subsequent bgzf_close will not close the file descriptor. + * Returns null on error. + */ +BGZF* bgzf_fdopen(int fd, const char* __restrict mode); + +/* + * Open the specified file for reading or writing. + * Mode must be either "r" or "w". + * Returns null on error. + */ +BGZF* bgzf_open(const char* path, const char* __restrict mode); + +/* + * Close the BGZ file and free all associated resources. + * Does not close the underlying file descriptor if created with bgzf_fdopen. + * Returns zero on success, -1 on error. + */ +int bgzf_close(BGZF* fp); + +/* + * Read up to length bytes from the file storing into data. + * Returns the number of bytes actually read. + * Returns zero on end of file. + * Returns -1 on error. + */ +int bgzf_read(BGZF* fp, void* data, int length); + +/* + * Write length bytes from data to the file. + * Returns the number of bytes written. + * Returns -1 on error. + */ +int bgzf_write(BGZF* fp, const void* data, int length); + +/* + * Return a virtual file pointer to the current location in the file. + * No interpetation of the value should be made, other than a subsequent + * call to bgzf_seek can be used to position the file at the same point. + * Return value is non-negative on success. + * Returns -1 on error. + */ +int64_t bgzf_tell(BGZF* fp); + +/* + * Set the file to read from the location specified by pos, which must + * be a value previously returned by bgzf_tell for this file (but not + * necessarily one returned by this file handle). + * The where argument must be SEEK_SET. + * Seeking on a file opened for write is not supported. + * Returns zero on success, -1 on error. + */ +int64_t bgzf_seek(BGZF* fp, int64_t pos, int where); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/bgzip.c b/bgzip.c new file mode 100644 index 0000000..c58d55d --- /dev/null +++ b/bgzip.c @@ -0,0 +1,166 @@ +/* + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2008 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. + * Neither the Broad Institute nor MIT can be responsible for its use, misuse, + * or functionality. + */ +#include +#include +#include +#include +#include +#include +#include "bgzf.h" + +static const int WINDOW_SIZE = 64 * 1024; + +static int bgzip_main_usage() +{ + printf("\n"); + printf("Usage: bgzip [options] [file] ...\n\n"); + printf("Options: -c write on standard output, keep original files unchanged\n"); + printf(" -d decompress\n"); + // printf(" -l list compressed file contents\n"); + printf(" -b INT decompress at virtual file pointer INT\n"); + printf(" -s INT decompress INT bytes in the uncompressed file\n"); + printf(" -h give this help\n"); + printf("\n"); + return 0; +} + +static int write_open(const char *fn, int is_forced) +{ + int fd = -1; + char c; + if (!is_forced) { + if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0644)) < 0 && errno == EEXIST) { + printf("bgzip: %s already exists; do you wish to overwrite (y or n)? ", fn); + scanf("%c", &c); + if (c != 'Y' && c != 'y') { + printf("bgzip: not overwritten\n"); + exit(1); + } + } + } + if (fd < 0) { + if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0) { + fprintf(stderr, "bgzip: %s: Fail to write\n", fn); + exit(1); + } + } + return fd; +} + +static +void +fail(BGZF* fp) +{ + printf("Error: %s\n", fp->error); + exit(1); +} + +int main(int argc, char **argv) +{ + int c, compress, pstdout, is_forced; + BGZF *rz; + void *buffer; + long start, end, size; + + compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; + while((c = getopt(argc, argv, "cdlhfb:s:")) >= 0){ + switch(c){ + case 'h': return bgzip_main_usage(); + case 'd': compress = 0; break; + case 'c': pstdout = 1; break; + // case 'l': compress = 2; break; + case 'b': start = atol(optarg); break; + case 's': size = atol(optarg); break; + case 'f': is_forced = 1; break; + } + } + if (size >= 0) end = start + size; + if(end >= 0 && end < start){ + fprintf(stderr, " -- Illegal region: [%ld, %ld] --\n", start, end); + return 1; + } + if(compress == 1){ + int f_src, f_dst = -1; + if(argc > optind){ + if((f_src = open(argv[optind], O_RDONLY)) < 0){ + fprintf(stderr, " -- Cannot open file: %s --\n", argv[optind]); + return 1; + } + if(pstdout){ + f_dst = fileno(stdout); + } else { + char *name = malloc(sizeof(strlen(argv[optind]) + 5)); + strcpy(name, argv[optind]); + strcat(name, ".gz"); + f_dst = write_open(name, is_forced); + if (f_dst < 0) return 1; + free(name); + } + } else if(pstdout){ + f_src = fileno(stdin); + f_dst = fileno(stdout); + } else return bgzip_main_usage(); + rz = bgzf_fdopen(f_dst, "w"); + buffer = malloc(WINDOW_SIZE); + while((c = read(f_src, buffer, WINDOW_SIZE)) > 0) { + if (bgzf_write(rz, buffer, c) < 0) { + fail(rz); + } + } + // f_dst will be closed here + if (bgzf_close(rz) < 0) { + fail(rz); + } + if (argc > optind) unlink(argv[optind]); + free(buffer); + close(f_src); + return 0; + } else { + if(argc <= optind) return bgzip_main_usage(); + int f_dst; + if (argc > optind && !pstdout) { + char *name; + if (strstr(argv[optind], ".gz") - argv[optind] != strlen(argv[optind]) - 3) { + printf("bgzip: %s: unknown suffix -- ignored\n", argv[optind]); + return 1; + } + name = strdup(argv[optind]); + name[strlen(name) - 3] = '\0'; + f_dst = write_open(name, is_forced); + free(name); + } else f_dst = fileno(stdout); + rz = bgzf_open(argv[optind], "r"); + if (rz == NULL) { + printf("Could not open file: %s\n", argv[optind]); + return 1; + } + buffer = malloc(WINDOW_SIZE); + if (bgzf_seek(rz, start, SEEK_SET) < 0) { + fail(rz); + } + while(1){ + if(end < 0) c = bgzf_read(rz, buffer, WINDOW_SIZE); + else c = bgzf_read(rz, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); + if(c == 0) break; + if (c < 0) fail(rz); + start += c; + write(f_dst, buffer, c); + if(end >= 0 && start >= end) break; + } + free(buffer); + if (bgzf_close(rz) < 0) { + fail(rz); + } + if (!pstdout) unlink(argv[optind]); + return 0; + } +} + diff --git a/examples/00README.txt b/examples/00README.txt new file mode 100644 index 0000000..5dd123c --- /dev/null +++ b/examples/00README.txt @@ -0,0 +1,28 @@ +NA18507_part.fa contains two sequences cut from the human genome +build36. They were exatracted with command: + + samtools faidx human_b36.fa 2:2043966-2045540 20:67967-69550 + +Sequence names were changed manually for simplicity. ex1.fa.fai is the +index for the sequence file, generated by: + + samtools faidx ex1.fa + +This index file also works as the reference list file used by `import' +and `pileup' commands of samtools. ex1.sam.gz contains MAQ alignments +exatracted with: + + (samtools view NA18507_maq.bam 2:2044001-2045500; + samtools view NA18507_maq.bam 20:68001-69500) + +and processed with an awk command to make everything consistent as a +standalone alignment. + +To try samtools, you may run the following commands: + + samtools import ex1.fa.fai ex1.sam.gz ex1.bam + samtools index ex1.bam + samtools tview ex1.bam ex1.fa + samtools pileup -cf ex1.fa ex1.bam + samtools pileup -cf ex1.fa -t ex1.fa.fai ex1.sam.gz + diff --git a/examples/ex1.fa b/examples/ex1.fa new file mode 100644 index 0000000..ef611b4 --- /dev/null +++ b/examples/ex1.fa @@ -0,0 +1,56 @@ +>seq1 +CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCT +GTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCAC +GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAG +TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTC +AGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAA +CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACC +AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT +CTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA +ATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGC +AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAAC +AACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACAC +ATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATAC +CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCT +TTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT +TCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT +GCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT +ACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGA +ACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTG +TGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTA +CGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAG +TCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC +TTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTC +TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTG +TTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGG +AGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATA +TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTC +TCCCTCGTCTTCTTA +>seq2 +TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAG +CTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT +TATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTT +CAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA +AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT +AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC +ATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAG +GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCAT +CAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATT +TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTA +AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA +ATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT +TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATA +AAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACC +TCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATA +GATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATT +AATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA +AATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGT +AAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATAT +AACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAAT +ACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGAT +GATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG +CGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATA +GCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAA +AAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAA +TTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGC +CAGAAAAAAATATTTACAGTAACT diff --git a/examples/ex1.fa.fai b/examples/ex1.fa.fai new file mode 100644 index 0000000..bac151a --- /dev/null +++ b/examples/ex1.fa.fai @@ -0,0 +1,2 @@ +seq1 1575 6 60 61 +seq2 1584 1614 60 61 diff --git a/examples/ex1.sam.gz b/examples/ex1.sam.gz new file mode 100644 index 0000000000000000000000000000000000000000..1a213d15ff9b6c79cec4b497021a9cbdf83ae88c GIT binary patch literal 107374 zcmV(-K-|9{iwFq6t4~P)15QCPI5jXgUvOb^bS`sYZ2+8HNt5F`az4|)V@f3zr7G+z z^(HimnHnFSBOI2`b@IW-3IG2HX98RRZ~YkJAU$d!5jGayXo@F zk54kbcb(VGpZ{?G>!@iS|AJS|oB#L|UP<%c{~2EXN$_`@e^Pw+Z-0OMmlQurI_;=r zjw|CR)0a24P4nTk<+O;M2E!z1AH7tcS;mpp{xhnv>TuOIJkM~OLeT;4LV5$jvRcr zAPl0-8b(H04dZKu5n*++4E!Tl%(AxDnt$V6(_RGFqK z#7rM2oG3-nb#2#CVJ-Ed5G37U*We_XV@x_l3@rO(*vw>-WUASQ2(CN}iNWcdaGg~L zl5APqp5LTB7ghs)IhCYXUu_O1!@cA)d`pp=Cq zmE9M&!IMjfi3&`}um;RKatm78b_L+~BZcU@*#+{n-9g(%_Jb6S3J(7spca#Z+;8UPz(aXRk%-sk9{-?dm`~5@eD{JNHD%-+nxQrOtV$`Oi17gU2AS@d0 zL>dseZnQ`=Qa4L7d?qMg!EQ`1L7+c3+O-gGPTol=y}W&B{UvDuQ*VYEMEL_87s4R$ zNDAD7Gd_V7yfmA2Ap$&j`-~cSahU@;fj+WMDuy52^zBw?X~JWriO8F zag@cPNxvo9Ta9<=pZ*1%zvVWlZ`)=XK0q-C_J$*~7H+H?!J1jy2Q zD+DK?@0~HNI=n;v;^+x5TEo@ww%oPD^m~$b^emHt`#u2C! zKy8P0SEMApt18)pe9U5_>w<-M=K$Er>-ntD%cOU3m~4slQosoCB%}|L&=TUf?rRVu z$Y+GOw(g%no>xP})j+^L8XNoU0X#PNOb9(zPcU-}Vu7z6KpFVBg5*IN&bjZ**<9)M zFMlaKk*ZQ5`a^*L0ZfcN+NCB0f7qnqQ7>a07W?s>EXdpp>zHBO-yp+bco}1^A7}Q) zZY;;hXqME1Z@o!p)1&gbsUccmyr)pQsG zX#%aZfY5+jVl0)W&a*MF@b$GuQT8yW@YwbBmhJ*e&yoJJ_wH6^W4$zaZ#xU=7aGR!=D>+l+m;S}9b$-{ z_!~_(VM3#UMZP**Y7nlzwC%h5d|#%PqEB}CG6QD7*&H*%G4Pu-HJ%cuUBLN0in7Ps z+>`6wguA=9_ZZI z4n0(cmg>?Dm~Y2B=Xm$--Mc@Umn|Tqe1b>--GC3kyh2?FQpm^S5eytFS@04GUFLv- zTH*^2cdfOQZ97CA%Z3U-tecTg(}lEE4J9c;k}t;!=_5!11@VZQmBVTJ*f9na6Kc(( z&lrH=cE0xIlxhujo+ z3}u+~iKKo9ALSlSG*zr~v+S2%26kvex>|H%^sbd>p*Bw-@3XK9YBCMVQ@ad}j7{t4 z0m2G(pMA9W#HUaD(Fu?$Py>;h#=^HxwgF$C*+yMLv05B2jW>mFAwv$&>h2sHM6}N~ z`ieUPmcRlrvA|y^`Pg5{XXabMX?X(PLxJ;}39vQ`X^%?v9HpfatJF;uWG|)`Z)SKvfR1QyycUEBQCelS;~4W?8fw7c!XS~H z>Kl~J`l3~gt4SnAat~3`7;B3hvQqNz3q=qpC^$Lz;Z!FWRh~j(rn?O=?Le(4p7dSW zmZ&oIu;Rdf!MT7V*3O7@y-jMeAoQk+@p~EN6yE$<&Z;o=SkV(V$YVoBZm3CN7QIo` z)KPARG`KiIE@0Nx61h68whk!;Q%S>-mK?YW@R<@LK1dIu0wNQ_b~nff4#%_J!a*HD zj2cV_3Bd%D&kC3|kc(ZOa_tYRWg2!dpumd5s!K8QU+`6l7X&k8vs`;# z|N1Gg;X0{btW(}Wa>L!k*TaD=1PpM-=;lHvDV*i-nYeU zg(zA{WT`SL!89R>HUyKZBwzsFR7pKgV-me3!?3hz&Cy~g3%02FxRSK}EF6g0$HbxhFqB4Zl>WwsSj`h$f?rIAKZKt3UpMjL z2N}p=!DG?hPAEgOV4Gjm&jTqb#QOcqx2YK2SK5TEkUQ;<@0_jyRSr2&M z-n!+8vEIs#RWh8B1C2zwMUy;Did>egser`kg=~^0dGgV^J$1+XH0Xr{5JAulXC*iy zo|c&@wrFHt&Rc{S`kp}@aD2gg0b*<6=SHaar(5{o1TmOj&ZmzLri(S_G&6BpHb0X3 z0SD@;agf0z$$3!@r(w!#MT!9;u6om#Z4Dhn>7&7ac?C%Tv4oeS0eW9&0YVvrJOw!{ zOF)X;U50vI1tYl2avU-Y6va>SC- zYqsdoQ+5;OxjMHT$$O{C^dd}y9b*QWK9{JYsXNlS+l0(;Yq=xLAWk=$QRX|nz~5_U z%DHVqJ>h^fnn0o|vAmTAb6rgDlBSOFg?4T+&1dj(^f>RfyXrQbr9Ri6npqEC0FGzL~7*li^bZNA75*m5y6soA2p&VvAjoO29vlun@y^j`97UunsuJ<@)I`MbIk-tY6h2202$t0-o zsbhf~Q3k0ki8P7y5E&_9^4z$C%f@ScbRUXWjg&et;( ze?o={f^2c$06(4h>2_<$< z17Cl>oGwHu4qUNQN_?ntc9SmV+5^Qc+pG{&6vc554lGwPFx_NQ3UljhRLV9ROZPxo z!TYqyx^^8cFvS5~+!MmUW<@rSU7$o7egNfkMZ6X>Z)u5zjP_7Jr=N$!jsbW;MVu4~ z3%4?}T5y0*M6reNk#ikC#~L3CvZMEq9gSmWLO5Z8hYOkE1P7B+ogFxvY#)e?WA~$D zfsoSi(r7CvN3{%9D$q-lCJVNh8^@HUBFWcdA&pHF@6bR#q?Y|?9K+#UrvPV`!y*C8 zR-89gh~LZAS(f33ym47C2U+sfwgDC`oK=v788$~cj!W%8x>lXFDb%t9vt!?v#!+2} z%SIAs_>^knd`DT4--P#^%qKM*H-6%~uWq4furu+(&h@F|})DXtA@M_hhPP81O`5Tq3B#d1y1lIOKF0FQ=Jz=8Y zNN`9w4WAERj}8XD+hm!F-X$U-j4*dlT}8i?y5Ilmy1VVAvjA<6*?g z3Gmx0wOcC6@=hJ%qZxK;`e`~7=W#fGaJvWo^9y?jEEg!G1up;jCaWc^-(6|i^NUqhXZz*Cl{H0Zl zuWSxCzeW`}+^Y$e6Y6bqx-m)%QFEjVG zyf}Q#B{-#IPJpp|>m2+8cHrCs8W&w1rQ*G2($Lw~I4)pQan82=>;#EzLexp!8%;8) zV&{d?9V2?>RT!T7?`Mg2N7wY9WWay0T8N^=}dCU~qyo%M+Mxv}puM_DRlAK5~@bVLQWj(@}t? zd`lq5((j!n{stce&LP7IJIMRrO7c-06S6oou&|#uk*&!cJ^T$dxuSCl!wiEgbs|%$ zmnB=-(!0FxD}ia#onJTH`c+5yXyn&L@y`<^^!hA?I1?$)L`ZScnkcE1(q$8Q*RV*r zV}G(Jo9@`Zg)RF4^EttMEWtdflhs5w(F`$L9ic>hp*_& zhZe_3$X(K$(>U<$;RUqxe%HS2b??|JyRap3lqM&6t;HhD8p@c_`Zn6SU(Udf?n&(X zo)pLHfPrAWEv;GdV#kTyh4V}KI3(ob`2z6#8QfQHT{bnE@uz>YDkg$0*jL_DFL z#T+pS#g+=lEV^edKDg5pynG(sX);O^r$xYzvScc(K#7^g9+XjrxbsEn_-Mns_^iRW z3RzYr2m0{_9S*b!1C?ntofms{;P0$LOls79Z>JYx(w+E4acy=m$7DSs_w3Q4KN8ak!7|O;_0byEGTf<(1$dJxR(bW~ zR=5#0{iM0Im$%FN?Y+HTJEGL}av_$q?j(#N5bosZ6c`Cvn&Zy)=}(HNid@p{5nU5{ zV4d)`Ki{G+b)<{DyivdEc$AC5h6?;4<#VQ;y09P(aF zb0_~;4E@n&hZ+vP$Cgcp4%gC$frjO2x~<8oeg{0#cAoG4Vfk2mJSe7WD=V4gVBfBj zv!d3n2})$Y*v7-os7dL%D9Q)C*c?;Tcp?7*t&|E+%1gS9LZua9%zwB2 zP24%Bd!x~KY}!c5FF-QdPPe>$D~qj6Bw`OrjJs%2vUL-oJUb)+&E?cs5WNq`Qx| zWl{1v#$m;AHw48!WT_;J%?4~2%F>|^Qj9*7U=Tfg5WvD=F_ZmBH7~)`PzIcCu8QeZ z9&*C+)3xNn=^_1Sjtbj~2p1^NRG@0^k@f&xzIfM%s1m5kfr6u2)=X$ah0gmf;g zxXY{zgAP6c`G_E&;pm_NB(?>{Y~pe~2}Gw)8oc}E^2xonxOa9Sy~8;{Ls_$Topj+? z<{qeVESw94+s@OgVLZgM@ z;R~ObHJ;M;-%9_lo}(?DPcoTOffFl6dvFdr2lvbKS0|JtIAwt|oKdok5-dr5b>4_6 z(=9hc&2aA6(;bsY9%ww}cmi_DkX5kwv7in5S%>*}jz^TlhWU`@ehwf*YA1LLt5=o` z>s)#1u(e51b*`wqKTmNRZ`0`yu7_$-WX;nRRpaOev#Hf)z)M+%TZbo4s){2?CJN||BAb$2C$pc zdAX*JzI3tJHFKrd76gI9s^b7a>hv+Dl1WtwkG$zSmFJ^SQVd?-kLJ{>PGKz*Vmk%P z1u0%EklpSJ&rxMJ@cx%Ct?V+<>%G4y&oAtIZ^)ns7D|B5q^1S`;NTNbx_Zi!E0UdT zYw^N9tW*6YkN^+dpN2*Q2CseU}>UZwL3suMYhu->9C6M+mz275n%te9g zA}@y}jvQ>522des;?E0c`&BsF)wk*qnCzp2Lv#VAzd%RpO)2Z3grQSxOgN?)p9w!k zC__x|YIUIA>_7P276cPnzvQGJz(*v~z6chgM4T6t-!z+MQB4+RWXvztk-hFzJ=2)! zBE&L1B-Mb?24TYy>px2o7T_4lwyQ!JoFKX0nf7b%4v_u4Uhl`wkV|bBr{VQ$*%U$2 zb7_KtNA`1dHu8jta7Wv*7*Dv#wcoE3u=Kc3A0Hn|<-NQN+IT{!%PC;^DMfpTtS=(M z{prW+#M+|L^X6Gz0+f^0Pe$wrvaPgBCUp7YaC`Em(6Wmnp_q+|zZXv!eJ8l|+?v^hkZ z=VU0GvX_WDZIs-H8a#&O#Z(o{bW2Y2I&t$lb@Ou)%avz>C_4fjwMp4`Qgq@FrDrv{ zj?#TxL)`>QIevJmsOQnPFd+Rs*r!yUpsZW+_*@4cBDcU_nyW|LEjgSGLxgumhP5sm zQJBDov-i+{izt7ae_rn|_x-c}{POZ_e|~wnKfS!L7&Gqg?o8McJehC$PGF;`nPqDZ}7c8fZ`oj@nE zW$ot3e3w8Tm*YIU?jzhQp%2$#cDfVxOCL1w4{hvu45BGREizl0CF#1T8Z1#1RrW22 z^uq0#VFoHe+=^ZJyoccAV%-|GXw4b)s#IF&NeA0>FL07XV98{5tPbVr=kg)S@&NBA zsAC>GRgX^aeR*a&O#@?Tx&2?9Aa~S9_fTRkBA|^QIK??8HcBWDj5!0knuMAys_XPc zoMShzCGSEsp1$@NA)=qBXA|B=$ad^AsZKHVlru4-zlC$&IS!PrZK*0F16z*VJt`UU zH(@E;V{{Uv9Au_tsVeeiT%LN<|KlBhNNkosf{U>Dd4LFtb9n}9&Kg%**%U#NbA*$8 zU%PiRuhI3pJ;Rf^teR^xxe(J1EKl+b}$hBXV;pu-kN+$Ga z@@WpxVrY%(s}iBN*^iA5bJ?Wfdjt`Ga;!4y1yR}b{@0&w`|Ix_hrjav*LpH9fBx{E z5T?O3q*=`jooBG*nk_j_`pIGwzpKUBO&`mPbToA9SBljZ-8-KD^Sovz5Q6hg^XfJ{ zx8C_Q458b>)Lq*W&adw%tlayL>5Ffvo0s7_u|MhL_UKLMi`Qy|{m&qR-nZMm1qR`{~s0q^jtcx0?4LVTum7HNkHf|LkZu<#TSGo|x@RG8N0dwW!(GqBWqF378W(PS<;W9VdOfS3>&=zJP{bJe#eGvmXRDM3ib;ldm@09R z$|%1YZ6I~yVj=WZIS%nTbVRR?MPxWVgCnHa5ps>QC79;V(m(g_{p1SjeP*}jTjURz z+5NnAN1Z7nqF^}&J2P&NN0Ud{lzeOMQP4aK`zK?6;#B+<2i+HfAcZciFDGT6;9N^V zT@B8|5dBV#=62gy9u=RF%lwpDMiQAGBqf3=F=NM`_p}V z%$f?5|5UXab-3ni9#~eQq%7L0F z%-n6XQrIo-;OkrdoFDTGXM{oagU;Hxsimi2O3K9DJR6`|(z__PF>Bhm6N2s|m=p(9 zz@+Y7oNyWqUQ|3DoH^&^HFv$S3h5(jNE@k};P@nE{xZgr&OGePL$xw@yw7o=%N(WY zF1j+=42*shCGE(bZX`7>2@hmd{VyVR-j|y>MH=)8=K!h1vJ$)epG#xAF-$$7Z@aaw zd#FS_@Nv9Kl#Abs^lc(Omp7zn@L7~l31>!CPE$^Bh^#H``y<0msSk2nCm8)K{TfEh zizm)lTqAsM|1e)xSA^@<* z95-g0VBE5qV-3cW7ZSxa2)iv4w`7_4!HtecbQGc|TgMA7q9G@{XDe`0ta%95I3pV) zj1ZmV8bCFAFo^`vevtUUC|IV7aGNCv%nm-6ihx;-fjtWHC}rTq)flyz>9kzISukgo z8h?&@OF-?ZBOu#l85maOAl7un zNu2oJKI~$YQ*xqS6i~#<(6ib3mE;l zO}Dy7+IdEw-b*s7`y9A)RK6@}?gIRl9clsAD-QDPk~N<=PT+Hp$M-tpe6F`4?o6ru%)oPugGA4#8VD_bD9^sW2Dltpdw1UKR@lAo*DQ*v1etQOeAErSyQD+>@rAu ziayl*u|7nngWuI&(OTpeauV9U={l$Z0cG4K!$5vzLFPZwgPt^}c&X8`qV^MPOfw0L z)#vx_E6Mg%gz1+6W^xPwaI+-Go^U z2l1Cdbi-BRDYD~JXHEGAcUX~86CxUqQurjcIhaT85wKZ54h%^~#7K28e9_6KY|OnC zAL(r8qIcpI5~FYJ(%+Zo56y-t%MuwbKWz6T)?MQ<3Oo-c)ap9N3m}hsh=ha_qbzO+ z-MVt1oC0;R4ofU5PQZ3F=zD6+?_9xm_l6}hvMR|YlquXQDH6h|-s)f41KTC_OoG z^pU41@O*Ls+oVU7R(fLTUkGJmRZKAIg&~Q?Ykmj7%nO$}z^; z*?~i3gWn-~(V5oqFr480UU6n`qnY;Rv*}=0K7!6l13GhSCK5>dw&WvLJ<&H6ZTZx? z4CY$4+Rz@5`>PmaW~jksw$O-xW*$;;*>`0h_3?mVsJP%qRIxLq+n1?-x!$jrN~`Vn zkYj-*6dv0?B1Mn+wuX~9u87o16jq*cfCrvlC){-W6Dv&q`~=7 zVfg~m7$MN#1E9a&IoidV!ncT+#OSo!$e4Rb712;u04q37{8^Vx*|d-F9#&KaxXSA> z%FfiGT-&#f-}cAmgEXQc3lCqyKtr4KS%1u!VbwAnW2zQs(_Wk{9PA%VhU$`I{N-CT zgA#5X6lZ9N8V&lV4<-*YvRcmjk{kA7_NFNra1%wE;o(tk)#PA~2`vcBT1>Bs9W?kgFmy^6-( z#=<6SpJ0;h?bwuI9ul^!IWJ@mFXs(&GBewY6!Xf^tW_E2ccbVtA<$I(nP!*f z)N=-Btq&3jxCCd2fpy}~P`RDh6^2~8O9}hb>u}o2 zBoi1lFpsYEr8_$6Wu+M|cDFBiDxFb8`1RFr(fiEjk(L>zyLpJx?uHta?(SKRuBFU< zRNLnfsk7}cBXKs*TArZ%jZuD461jPvr@MH&5pU&Wzn})AY^13tH)W|Yo3Ze>h^e#X z93`LgLZj|vq?$$@o_JQ#ut8?gtVkQPMbEe)m&?^u_J@2=YYhvA{XRTN%mEQi8D)I2iz;eik0 zH_;~D?nQ{dKPNi7pR*pvIXy{LoG4sLY`fw`!rkH|Y|mkSHEIT!*H3K-+klF{$GLvL zS{%wr`s_li0U3}+ND zo}RZZB*q(f>KpUxXls<}th1!;o7snx2bKpE4AX(W?M7!TV$moZ}ol8O@nR{-u2y++ zB;3_0Np8{@yqM5w<810hl$c&?Y3Osm)l(-lNg+%dTaP#JB}7aXL_)7c zl$l!wUYXBd;RwI+UiJ#53EzT?8NY69ClSoZGL(?`eh6iRcNtQCs0vBDjFwQa>U@;S zCD+e!uG)J*C6|M7i>o=zj9lD)&v5c3qNY5@`19SYkDY>4%f2Cx-@h8!j*M~`$GPW( zMGa1HHJAap>w%4PK1^|wlP5j4rmnIkH9Jjyt@C_Am*=UxJ8lg{5@#YPMiNXJjBgJW zDB}|}Por5p4-+Vpt;@eWvlfn8tvGveqTzZJlMFffMaRkj_MR#)_&2z$+&JPojrf1c zoaoXUFe=+wpy7qYnhtbRgK{|}5Yg}~g#G9G?e6PSLLh&If<@3kn&51!aISPai&tnt z-{@k{tvA^pm3^cby=#RQXsW!ouX>*u+Xuq06Ob0>ZGL+Dcz)|{q$JW)w_d+=%U@!Y zn6&0o$E)@pYt7`n?4-5`Dr`9h2dHOLU-kf%3|XpAIepNp$$@R;xJ6fUKZ&5@Ui6G; z8s6C%Kuo6|zm)zZ>K;M4Zp@ypO$gourHIlkdXMf=KbXQNfrg8XCOa`@kC>aJ4gym$4FO$^G5Ew|bQ3bwoM^g^ zmNJpE3ywK2L+Yjs6q|GUvzX`gqFgc#`sI0e>K>I7x-&6JrGu-TtIKyy8KXNvI|2Ia zYG$5ZVs_sz@64S1>$WwA3>#??rj5hiv^Ead8YtFQf$#Q~^@jIwk^PT%rJoj$htB)`1kSe}_fR&;7IY zbeA`@V9n~TmJxh@;j|fxsQu&Da}v63E+N)M-jDAf!s+ZcQbt~3emR(5Y#HH9yuHn> zl=Dp7Zat{I?n^(r@jV|-Feg&PK;yjGu{PO$G^{B_PG8KsPnvSr6+*uQ^?z_wm@~+T3NTj?u)eJT^s$j z^D>DG>!ID=mh(@v`*Q#5!S<7lYNPFUc88F$nrRvwAs+b%yW%zes&hy6VnW9;M5AF1 zvJ}=^gf_)yAr)AN9nD;3$1WsY$+4{1d}5)@|9W5{)u=wQP{}~#tXLP+|4+g!{XrC3 zZz1u&l0 z7*F-6SG!&AF3O3&ZC9*lBgdGS^@=Ee8v}W_`EypTC38*-XphyPX^I=EU=UPVUv9lM z>_x0pa43ohPxO=B$JkfjFSxbG)rP`Q^>w#pB>GC8Tmd7VEJraR??&3kZ@Jm+sm#Pl zdbWdexlobQQD%rE-?IYoYT4D|Zj#|(2tx@imIBBxciy`M)!>q{ z_SWfMmx<4%#!7YFv3#~hr{0JWz&2kZ^X7mZ|n)DgfvGY*K4mvaY1a?vm$1ulA zm@d&t;f9NaXzCM_$kxA9QZaqPvLH)F_iuMnIlGm?G;;AQH8&#`antV*rnP9>$;Q7E zk@74r*{%7YB5?rJ#Fx-k*lSZvJ$9(3;znrB zF1?n@7k#0Z)IO;ExYy*jC28AqBefrB9r__-Tyh&I-C4Bff1=O4(6?owpKV8j%FIcY(gB~`12=&bcnuxlH^_l?w90UB`h9d>_h0?pM-J1S=S;U+D zJ8@fVWOABK5}CZ;;N{XErsd1RyAdqzm1afpBXN$-KC^Vq-MTRnL#UBF5QjY+jY0gZ-TBi zMh^t(&H})%pga}evPdVIp~CQ~GTe!*;T|O?67G&hwxSQb3A;2=*$44W5#=vBh(@<9 zxwOvFIj@>8wlU)*^kiSYHgq8EjjBSa0IK_xFuF{^sTlV~hi4lvMi-;9Z?s+WV7;Rj z4$_(|LM$u0+2t2p38tcTC!^!IFF7BQB)6=9lf2x9dJG{s-Mc=2FuRhA=8v${x-6e zW=1*Um~-}W3YglAM!Mn=diOzvm80y(DwJp?o2TO`xddf}uj#UEogeM#|wOGz@V>)o1s@O@~?D8IqF^N!Q9nlsZxdNsAWQ7x%)GGJ9}hhWrH zaNeEXDdwHyggu|d@^QeXCuL@H7MYM;o1Sa^hg?x`&>8Cv`c0}ZW~;=Mv=)LctH-9} z5L#&Jbj4dEnJLBIKw7clTlazc>_6euix2a0c?5c+K-QSdpN2sDc7-_XiONey#|!IB zlG=^Nx3O)Jvb|0dgGQ|$jm<)!D<#~Iya$T-Q432=?X7N^=Lqfb!Kt0!)1+%4AU4?t zLsE>psFhVHGk1@x?oFgy^r&Y^hv!?PP#!fj zpPLcvlwtDR&zjLWvZ0%~5B8#ze|QV-cxuY+Z3NB^@^fvFFJfFPJ#qG)Li|m6n&*$* zjI*onio7Yyp=2m=ykA6yhrE4LhWXW?z&Oi^!bmB?_h)OvVL8IGY?hbsQ#~5kMf7}g zltUd#FN3%wdBMZ$@&Q%e?eut%k&Zzbq|Xsj4B0s2I+%t zm)j7WUxy~W>QIJ9DBh?TFMepIw5P&o=M+VQ@QiV-pk=d!F(KV>lWBfa-sk%=2qP|= z(ig}&Z68Q9X$6#Y3Ci&@N}e`arhD4+RMi(gQUg2Y#(QP&%%I~M1SVa4e99>8J3bPk zQLlcwjx3uVpROvwu4%Z>zCmb!5y6$oc~-hAlwnml;RrEQYJTj>9KgUopeQlzwoEg2mBLP6PN5{LW%f zO6hB@4}0=@bh{Gy_BM2cO=bqMi1VjuLGyETr$2hLBWED3?`E$g9riuMv^A76C?aH) zfCwuFq%Hc_{*io^69~6au4-+htHa|XQ4!yA25nmdbJHw|@AMg^mT4Z0y);F8`IxUl z8+PFh1Py}l(XjEUuBio?ej=+rGUf%<7kd{`DvR9pV9{g$4OB%XlwYgXVDg!_q-HWF zS;MHqMNij@eQdeXx@<}isib zjeGBHpviosuBvTpq?C-k=}qex5Y_|Fj7h1?DqAjzYS!#ZY_rm%o@ZX}t?CkG%2~mZ zvlba>XH}R}OB&&rT;|B{&NwQ%xBIU2oMIulVdwZNkTm~HEW*vsCnO*|8pH< zs(8B}b>44F3_zy{f6x}ZCW>-sAG2gmWpanfMRlBd);UU){pNToW{!A!k=cA;!FA_D zj1d~)y$p{}GW}6TnTyw=s|c;vmm5z=K1-tizsCf38+adMOjLwSVHxBp9m~O*muqRB zgQr8f1B8yP3Q)4Nt7*z0k5Fzl9n;!R%_I%wWCnPh(1DSGaO2pq#}Xs^JIvoa%&$f) zju!8I(fU0o{0ZlgsKlC#jVu|E%_^WC!bIk^Y|XF62&1(z;vGDWqW9qjMnHtKYj@3+ z%W9w|DEI8qScCG%yB^ON;iVjLj*#W@veUI`-J=#$Ns!75fikC~gVRyQ`CjixqhwAsOQ zs-E{y8bw!jAVfEItS1h1Rt2l#W_myEL&%-@+Nrr%%yHkPJy)XZn=_c+u8uV*&k%KP z&@=fodXf}eIwji)S$D}`iami?G-jAu(5^R*Rvo@mPmd>8Nvz2vQ4euT695+F7+Nw? zt#Q{<%?1TclmIW7f6qtfz8k#`SERNa%eJI`GwnK1@F4B=$`xo7372g7_1nD9@9)E` z-!&x`X2%n>%QT~+art8M33$ju4Cs*>L%*|UU2%^1x@6AqQu6kkaP)`i@1pv@&o47e z$1;Xynd!wh{r?W6T%#rl_gI9lW-pRFXA!0KkL?yjmtMBPFn@vytL_QmAT^0t!v?{D z;0mE^!-PV6i1Ir?35b_EvnvV(?xq*S$kW+pQx09Y>X8zAr$r4;%-XEyeUISWH~3C7 zl)C>cB=*L;<0_N0lBkaC3uOd#8}61JV+s6+N1%RZfzYmWO#YV?`*V36OfsiWJ;;OQ*i zOg~8uEIjJ?hWw|PjY$+P>arr8s})+`4l^%pkPGAdX7+k5nw8WO=OtEjjgV&yTlPsZ zBEI{f2g4a_qAJ7mSN>iv+y~G1pDu3zH{Yb7P1tFL>T8HIZSO0?bc;WWQ`OAt8`V8a zI8oMp>Lw$Dkn8wvMcRN|Ui(ui3D9uqN1aieCp{S(i0WWAn4=__weq6p`7pu{ zFCA9S2HV%Y?~t{fWT;s;3ZAcruuTc3edXt^`LynSs%SdT31e^9li&$^v*P?Z!|5OM zon6qnpRM$sewVvPbq) zctPhVY@Qv3Xk^QmojN`@ls_>_Sh~60(adcF+4+7m2G*QKKPaH3r6v2d?4Um#+8&sf36K!n%F(+F|!C^7fpMBs8u}7V+JD2t8gpf(aTOmmFzK`vvA5Bx;rGd#sq zZds!N%#J2tdOb|w64;+~#&(4RR782mzX)Rb`T5?>oq4<3VU?4X-}mLn6U)*a^sh6R zaOPVl>AFx545cmZGoKEbj7K$iBt*suAsmP)wngUzO|mK4GAt%2X3SU59XPRwLzXut zO6}t0(0M(!A#jS_Z()OAj*Ig7#t+X%b4IrAMMX!1A2!+G6(YK z_WdoMiPBDCl@3|qi+f3Pa~$cx0?;C6)k^ohW5U&N6ZQCoZdR`Ee#Bd zW0*r7OpF|iZi!{8e3dh2&AmA)xbUF$gR0)?Y*u@YkIe(;QPxLQhdAk}EL!smJT}Ju;|2)@U7Yqk z2a%);I1(m^G6_*mVEhnfE2>~ZG>QZb_q&L1&dU@kE0q_da(lE`g<*zb#MG31KL>fT zIX&w6fu4S!)$SyO?FQDHnh>}luU$g<)6CP{&7b;np6+xZ%{O9sK9NeO4K4s^K$pLJ zO2bM#L@IPQ74zf9DxBVWv1fc=-tE%Azc2RQRl5^Z$mZ}#%zqfts9y$ z#wb$vgv!&5RKjdgX=o?kIjMi`+%OYf43?eqw?QGJ1oW$sy&O950D%2@7LZ}X5B@_x zF{D*}=%9`5F|a2l(T06BjK9sZ`k#aR@&b3;u`yoZZgLl=G3K2=mX*X6X#B&V1R{;l zn7j?$m0HhH{xnbWzRY(CqFC#CEhfW2b)ApMjoxc$P7x2{es8K&7USgk@gTpNKle{5 zJDl&&$$sgU(H-yZ=%llXOO8>+Pn;m3Fd(%shD$D!c&!5Ec9}mLWnXZ+QnH@JY0T>8vFf>R= z{5>8o?e|=WJy+2HC()6{~HURE|PDITpRp7Q4 zY?<$$h!Qj7^oT#qbxVT-B(qV_Daha7X1v|dvG1WhJH<7><{olBx;Q8a`OyuDyGWwF zONTAz9^{sDmrc=-zj=Y;)t1xZRZK76nOpk=DwXgyp(432x=Nyp8ldw!y-wc+WR4&( zLOltTq~p{{wc}b~MuPwfi8u}%=i1!mI-`p?>F2|Th@iWRY63;f)kD7}_koFa+lOctlWCTKdxjC#C5w=4X7W%Mnt^7Hzy?U>Q@o zby_wx+cQUixlDn&oz~V}f2f^rK=-~v>r7-!PU(h?S%#TwIXiugsyG5r33M#$-9Qe= zwlfVI)p9|`rK28HsW+~T2PSeU$N>L8ST+QmpZzrgr<=@>8m}vJx}7( z-Z~#@LNw$wB0uQcJZG%g$J3)-rdYi$dWW*5^^SEH?am=~l~hwU=g$MHUi%&mxHiX8 z0`7Q-6YO&}I1i3_n1{cbe0Q-1IG;3VYK9>7&M?xg`5?x5^Fyph!rjg~QbfdU$b5iW zWeiVH%spp8mqAjEZ96b;_7Bz@{vb-Kc?6BlGR#ahY^h$5PHO8Kp&e#d zJa9Qc>L!AH**4ykU#F(rK9t?EFXP=jBiA*VI~u7>Yw|9~`QSBG(vf&Ky@*w@Mw9rQ zJ1=73_>?gIG$p#9$F`-$5c|-NbhnFWMx5j9 zyGop!^?Ykw0{Q{|7oz;ZL#a@{iXtMzJWW{%la_Wxt3#oW#dP>}nS3slrQQ!Y>p=oB zQ4!=L^Ov=3)|&4F|jM$o^6ZQ8PBol=;$F2ek2bm^o&#xqHySOXY9 z>|+#asxq7yysUw_qe*87Fp#Md#^|yXn#P)|0Rv5w(7S6S+mx2~93CVsa~spQ%1q0W zEB#gk7n?kipcqJoqgNcJeVrbJ8Gn-OOeHa%Bx)}Wk@F$3<;qafsS1#pS8|^3JC$3S zKq}`p0}247|3B0=VJ57#SDW(O%U1vuD;a3i1KrXnrqzq3IR=67m-1>tJ z&U)~{@2OYWwaF#)0La-RC9E2{P1i_7Hx8n~4bjUXc)?eUazEY`7*A7gPC~g$hHfga zN@Ov?ABx(v34wNTw2Pzj08T%z>`V%7R&LBoT)ZQxQ}n6e8Nyqk7@UAP1$j3m#!(J{BG=x zsho@9HSY-F3lUKH81Ax(}khdCy{oTo1E5N_;@5 znrz+fMY}!v32p8BzU-4$vogv@?)EC#RY`$)Cngg}BS}s*MtZ;{h+lp#c|MEIADm0p zXp$KvROCW7f&{1wDvCgNJX8FAIs#fD#R%~XSW>nqR)*H(Q{G)u#`xWk;x}~8+kswqN@17N%O?=H6qc}wzOeyoI?f5!mcz%K3&D=@6Hh|9%@9xM7Sr1 zKDs@7Iy(hR{{DXb6rs{iu<#_;(5YH@H4#a;>+snv${MuC8n7jt$i!dncBe8W3T)cC zbkYRHH94?Xv!$8El0lnZ3G>$|N5?90gSP%2-D;JO^b^xGAZ7>bInn`dkL1K+rV3@| z+ys|BkX}DS)DdZ)AbWl=P^$vo9)^;x%tuh}EE%xwJ-v;p<{MH8@a?G_;wExes4O0$ zMr&hl@ntq@HvY}Qtk7DxJnoMxH<*SxtcNg3hqVk7Ov}<`wmea=;S_j}^ufAJvAs1> z#%Kd;3O+p~H67fQ^~btH!u0*%+aZ_2j(wO*XIj+~Sv z&p446U(j``qS=R-BxT&CJ#;~WGiH>6PEr};deBusd}SSC#}jR&z3!|lay9RfQ`X)u zcflubh-kk6^Q-yOEqd%I3upA!h2Z!uRc_5|5+CjfqUO@kpw}~ina;EnL0WTSHb`!4 zJxbvna@{-|5z=MaNP2??nxqx2_2o>Mk`s)1w`OI*3z4`)%3(wj$28`i~@?gJ}3jmFcD(Zp^QFk zrcz(-*WBn#(Z{9iN!LSILLa$Q0a|oRAd#V&NUqQ4X2vWB#UpehNO+KJEWid{+?Y;( zmSM6I`b2wMxz!#nu^H1Zt(9DkJmcmtX~O0MEzX_0d2!t%rzWeBhnD{O@SgqCvU3vu&4 zW*#vqOS&WXxs#x%nG(Xant5JLX=|w0+W!t2GK5La3nyx~UBu)be~`g?q=zukUkr{1 zkHx6%tz=aafX3ESNrmF*cvMpi7;uT$*TzJbVyrBMR^bYi@nv{pm!rLKKD*1LL1azq zl-nD}`ai=E)_q2vQg`zeg&o@TGuW~1r8q%*Z}vR8^RY4Ohq#5AdW(0|(bkMzv{p6c zQ^Z}gkuChB{+NfVyNOQqZcea+g^oo_!R-qWl{%U`? z|9yF7JbPWN+S6uOPcnk^5mH`>u0YznxKn;W8I(IkRfB0<)iS_2B$=0@IrEt;ennk% zF02oPEZa0-YL;l2!h>Xk5WB~t4s46#8z6rG^7}Ug2?0?|4m3~q`=3Ae`~CU#{(678 z+b^%LZ?7*e_t&@E+w1-9{UquZ$l&)$D1^>QES`wh{Iv9djM)p`$knfjsz7~(X36cd%bg>OcQ;bqt7FAe~P9k zF;$0bVUT8LJVJdP%8Ui)t%OcgEj;w8YH}8%)0Y`~j8TrEHF@7e-&E{T(42y8Z$=T? zP)Cpd^0+nq%hWl^JE+Wr*Qkm$SZUR(juJ(>@I40dK*rDX^9YL}f;2dxV5yC8atYS1 zseu_SdewkPQPI{-+aR#8f#lHd8Y2lt5Z_c|3?0wp6@4iC)}yHh%*6J*H);bZnn4Be z83*{A0Ne_bVpgstSE%`B-ru`f-o&EDaqi}9e733#WXXOx!fps41_2sLApDXb8--*R zC#wAT_Jd#c)O;%IVt<UBR!wK)+<5-{0m=pZ|G!!ebqLZo$vYQH$SerlcD52IJgfbSliI zsfd}uwOF5Tv!95WLHP^N)pSOpah=t2pBDaHmqTsW@&YxJkuE|NHo9>>8z3#g{Qhn} z-nG!ZG4pN4Mfhpgb<6!_CyT3I6-^V+iozYVJnhbpGHFm&M(OZKVCr4J*!+3NKAgw< z=M7`Q@=~&A6acCxVnW_5AC_XDJP zXXDVMFn<}$ug1oW?`E0W+XvF8pyK~{XP{5VH#>1OzGg%9!^zG0 z)l5_V6*<1FtC*090Ncin;0+0*G0jtxL_Jyn`O{3TmKLdfrPSnTIGhpHR*SkEjjfWj zowlwy28=__H>vs4hgwMqri~ik=NAh~Ww_q*)-2S={PPS!3wwreB?aw67Gp1i3PhiU z2=i_bebsj3R$JaKz{4r3R0w}Az&4ak;62Ien=xk@Xo-6Qt9TK>9@r-h25 z7~DT-w>=`p^{)_aP7S(d9p0LpHF0dpw)}3a=eW#Lc*c0PKOej`Q&IF_Z>HlEH8B6+ z&b;T)9l@+^7K}tw1Dq;OX4pTJVSW=2wtb0|dYQOH*%aIX`k~Xb%FzzgKo*xSQlg~_ zwE6L*?Z;fkrKSKy?Sl460#O~Zf%`#XW&8dr$d2}?a1DcRixMwS%WdqqC{X#yq^WYS z@uSrV^E5$!j!tH24nclx>)r@-1WpEy+eD8#+KKe?Cf+_?b+o0O=u|CHzs3)@+!$Cl!vE$LwY$Ylu?`x zj47Hf5+TLF_5sG9@9!V{;m3M&@8fiTpKZ+DO|vo32+cVI6qfc=&iFz|$K>3KMJ4nc z*L_{PEea_Uop4Y{oS+=5Q0DtIT!@~}0G2(ZjI8{+D;4LrfWuh=$=b$bUL5A?H1t~W zv3qeMF14M^Rka-t(>ILYM;Mb8pZBk`YtG?$-wH)XFf0wOW!c`BAr-RX{1$Cfu;MSj zE@zU;T=nSC9z0qXj}x<(b@LJl!tH!RDU(yAmO)k!Hb1~!B)zV%bu^(aJs2GtbL16? z8kji7Lx2hG>YlRf7Nqp9BPs(5-K)0sUX&9_BD_@~R==4SH8ntI#u0>@8rT2yJ(!Q}8d$xaurJLVXI}d{#=AoPS#m_$Hg$Q^jyJ+2*f{cz) zH^eDz)08oO{jBsRb5Jw!RLFSX- z^NGFT)m?HGhPaWVI3M}4%Bnx>qwjFz2Gn`-JbIBVy4_X~`L*=dAZ1eK+$w4^O;Dw6=pMlAqk~q=<@z z#uSPm(H=2}Tk_!Z$grt<+1qIx;lA%#q!sAaayu4qq7jAOOwTM}ZxUSWX#LmnE++U) zx8eEa##&k9HP7wMR6Ks)nmwl~6HSdZKjr5@?D4$+?ZKLJR-HRQ$sZy&=yMK|j{4t| z<$mDi+;^v#j9qgOmhF49XC-_(LPeC{jiZJ+>)5(3Nw?AxqA>!K6=Tr(hyrG`H$g3Z z2=f~-zc=mpt+{?XN^;h>Nw>4>H=6nmzG9dh5X(WD+JJLY%C)O38#%TOQHg`@`=> zfK+17noiPG{(4_{e!-LGXc4Eh*CQZ#eZSn*blHv12{s1@iAU;PQ^xt#V0L$gkBoGj zQCvJe1zM+3#}Vq^>zybo%pu|4T5p8PmDFyKk~p?VEXO zpUm>lKjU-D_&rxm+#&Ll__Z`3lZ07)bdrj2)j3InY>Hd+JEwa-`tR7K8I6mI(u*HE zh2%~T7dh-BWiY8x>j9YiXe=^x-E0h}FsoFoilj34qdssIS4>J4Kz{!=O4`?V2A%lq zg5uq*k6uZuG7z33%*Ub;cXQ|1xV|<`8Dutp_@5o-C4+n#-(;D~G9Q z%FBS|vkPyd7$^E?Dr{Lyd)Ym!-Yz-ETgfF>65o%I;4PV|Ie_`Q!L*xGO4*5zo4wzp z99sLQo0QvEZU2!ne{UX(;=1$)iY^wX8Zdl%g0q)LIKM-j%iGh_eY(%RHp|VXJY0F$RXbyEcCyg6J%)=kpy*3*36w&?!3-0CA;-xR8B;0%&&xIM=- z=T%0z_(RS#X001)))g?XKiq>d2~+56H4 zZZ;wk;gzHs9XUvSv*P^TaQ>z^m*vmjKg;sI`I#fHYvfy!d_*+l2ruY;ouq~rF zyXrh@w4q57WsrH4o(a7OGLwnhR34F%9n582Eux$TH~-W_*0VQ<7|}aZz_@kFtt-8* zJFZZ7?N5K651LI+q#!O zF-W%rT^-rv_5-uslIwmQ!G!Kt4NMyV*u6K|e=swUB|5n}{WH?0v==)>2nQj8O&JqS zBlBJX(e1&h53V3<+ZXr8mzS5}fmYb-+}?)mgBs`$jv!xG&fR zx7Uy5W4)wqx8;B47rp~x5j}m<5FLr72KH1*yuJbbI@|y@UYGLKD;I5jAF*XGcLL

Y}+V zp{zi8)%#M(IwLV0Y))Ro8k>?W(L{qn%N|%%cuG|w!-?YkZp~CkuLh<*F9T<+StTa# zq-}<2gy511oF&Ny35F_^>xtPua_x}>|IuPh!rM|#7;_Du{ z+ngn@tAMRuhXNg`aZ>WzLAN;jP%l%{yo1Zbpl$ox!2~)SE8?xRFyF;u-o^d>{`Us5 z)#bhjGz0YQi?mPClUe>Sf{~RrRr%~p%hO0XRnNejT`22GN)lwvpA&0h&5<+koL6%7 zIC~d-_)(pFHo?0`g12i&Pvx$`vz;K%h#|Xt7d_@fc(DZeE6E3(PSIm#a77QCXnIHyFpUq%Tr0g+Um!~FSe4n~y@#N_2-4L)x)~nLfxH#V|)v1#)q-b*nZ|yE*W(g`*@sByZU*%Vn-* zE8_E|dF9Z7i(bQz%vBx(m^>phG-Z?!_Qc%U zOtV)*@36VpXFEt&bD9~Mx&UU(1nHc(73FspC56S9ACaoNsR^92H-zX<@ZoREp3J6p zukL7Gd1cPXkgMhdtOCfK3ljcZ!LDe8R->k@upbcg3{L;*)F;x&hbUdPxQr5cY#r^& zIASHUF8%~pUN0zNg;~AyT z)lkEGo&#QZu#ryL7wy|Y{F22f^`|r4jIlbhIYP^DG|KA+4E!Q zb&n$AkMEl@$RB%czCBMp9FAdF+ul!CykagRr<)mUO)gc}H)WhZ z4P-dTY{y6bQfF6I>@h&eyD3al2Km$2xBXAA*+QRky8H{3@qO)i-NiH|q-fMPh&t|R zGFBggE#FERt&<~|*rGKsX;<*)IxLe017*aY$8xT!}*KL6!@wXztR8 zjcg>0J3ic=9OY1j@>NnF@dcEXyxycajYQ3;+u(j@+OuH4D`1T0mcg4mkIUz$rVOVt zpP%oa2$clWAHt+E>mxAfdJ4%(F6)i=efz(J z(tz-}>jGk1&G}OhyJsPO2ix<=Q|dRo3*%GZ(~S9MBuaRKN zF5}zsd>jiUho?>)nM*!y7konQ(pAAEjj}b5q{6bbeiWvQ+Aslbs7lLEAz_ZJBa3C~-McYGoKhn7?h;BO#`|gd@k4)1iyDJw-gOgby>Ev{ zC;+&sVz5hj>DtsFZVcMXmp^BXa}V_Eqd{^Ur?EwJGzNJIQ@gZPF@vk-@;5Yg^J6>5LBE&Cc+fzUC|)BlKzIKV0IjE|rX_TN}L%lE$rr zCMw-Cc72khEx?%1Ls4>gGK=|JZu(yP8q-bPL* z3EWY)G5N-aIEe)D-{72GNwP@}ikk_m`{bo$LwIPE zMBDb%(v1%BStmV`&JjJ<+MsdK5UP6 z7t8MHfpPgz_j`))H$(`z6oT=l#cEHzsCN{GXQgh_pM`6{4)H8wwunVt#`a|#dq>&U zpWuW>a?z3{hXcTQ$X2Dl6$yOoN5?uyfx-t>hC+_k7Z_T|v(1Czfq=Wd>pIqJV}K-9M2pESfUN z0?D?+`9Q-QX;L8JG4y5{gg%5xe$l^%Ib{T#s-~L~9g<+xhfs*bbwdX@f3i3sI;8&$ z3(<7hJ&)wfogT{pKYj`%YlfHeODW@Nt{P6lP#?Sq_2so-Syq2e80pc`*qA5ZI zjUZYMb#8*>fM~m-?R2F(R2s}P_Mito+{lR9WK+Zlt;O}TgwGCrCgR`aPJ7)9mPU$0 zBFzzb9WjZ5uMM0640ffvA%i;gY$&pLPXdeDS%N@{abIpf_%8#8y^PV`ms$|h1BJE| zY3Diu^YHUf+MUF`?Sfltj$x-hL+Wal6+jQ}pUJb2C^Y?DzA-Zzzkx~lq)XS-05xHE z?3~qmeSp6y(eMdyrk#>ym_N z>)#z7x8S7VjUzs&;2w%mVprCHOf9(hSN+K@wwkAe>c8gkVG?)yHI$C|7aheriD%h6{XIp1Y>q<^k-~SK!mH)v%|H1E1_mllx&9S@W z)69%a-*!~+H*lkCVguGHXyA%b+S{rMWCVvQ0_lO4%6h%c8D^LDF(YYl|M4=^Y*t7WLtep6ozj`G#iEH)baaccWrsa zzfP1vjr|999WQyq-Z7Hoar6pahl*^;!ipM<{MtbdW}T=UF{wV^FSi)EK8}t?t#pUd zdH4cprr$9(Wtb;*05>pX;~##?Ylbfn?G7yzMjWaC4w4nAAYWR-=rV+!UuJTqq>nV< zBCt==lV%&ULP=_!2cy?qP9{P*CXwVBL^z-M;W~dWO;dM!>Za*jf-k3XM#=+V=ITh+ zFj5BPFp*&|SQ0R|!}5#+0~#DSNy7bpeg6D-=6+F?dIt?u+GpH=kA4ioa*onep*+3N zK{k}+ge&+Gzy#W%$+>4a7@f=r{CkYK_Th5sAQckY9>|1+pliw?e;e(S)=J-(Pxx=2 zb3fRhg3ncNJ*O$NJ4Kq$s+p#U65R)jPqiNBTdzjtZVDZfK+}j51OZ@~hlwfl8kA9L zFhZ(`u-JO-543Ah4S*zd!L!+h&YNc0bfa7=7a+r$J*IA;Trp)s;%4NO{IRHBrhQ*yiD#r-VoHUDxclnk3P z11+xInw-jCHf5CG%_OMU2mewZaFHVga|iC_>2B#B;gzD9FiLve zR4o39_kPqYQOCYWl=K)6>a9nog6?56^6YC3%-oe3B)31+tgbsCT%LO0nX#sjpA9LV zqXrxC+5SdQjWw1N5;M|YrjDNe^875N66~vq9SRN8AQ*~=r%8q25aic(*fVcBDTIpo znQqmzrWOa5M_oLT=JRSx({iC6FxD3Cqu-xqlnB#_mrWeFAeU~>O--mH6?$+FY<6TA ziKwRnB7-9XyLzf(rfpVHjm}8b?v~`zzkO2#NjVuDln=TgN2p%0DX;;5@kp=HO3DVM zD|CzQ<{XkdLIq5AU;2;n*E_xR*ZX*we#hn(o0idH=#g~dP9a*5_xt_D{{Da5>GOK` zpV!y>>&0~BCwEsqq0N1o^!qgN z#8FMEon*}fJUM%B21}g;y)_8=CGJ>yG|^tcS<*@Qr%J1X%araXhifNyO%bGh!lj-E zh@%s#{3Y}Vt%xL;NMPIDIf!t*;~=O-%hp!`CEe#VAYgsW59G1$;R*U!%y94XejBU$ z#WP>~Ok#(bMDSvC;oj`nor(FH0@ANXBB5#`%TPUeTltR9*N|*w0j0g#qvf7+QSjBm zUq)wfN6GoU;RzQoTBqp420QPIc;C3}rX=(JNW2DhXRn!UqTVvoF;94Cj!}Mll)uev zf==9SO%3DI&7aHgnUMc-4z@#5(3nSgzuY@^x!Z@gXw6?#rO(nGxy@PipbK|*+U=>J zb<_p+WG=g?f;rR37n;|%k8j_a=l-sQ>3C+drbx-ie*Qyicb8lu=zE7g)$xvSSq>&twz85GaXxU<@~B!qHPO)n)ke4VyHrCcc2UiA-A*S z{gE-fkjkO?8vYox@mLorSn)n}+bclE%j=wBFT>=ks;>`t!_>Wn&U+2zJvknstTknE zUnWZKHI*Q$o!hW<(7p+Kd(OEn+kb^}p6`nde&;^9+AH&~Zi>O!V{V4waK+Jw@NaEN zoAmvXEq!Ex3MFO|DZdd|l2!h?rVQ~2Bc%*J>2>D}-_@ET1HCk#Fxz9e7K*Wo@l`VU_1 z@}Ku~bj-)3jY-w2-cYAr*Nwp|⋘8IG80(Y%D5LKiJmfs`ZEJuw zhKeh`V@tUv_Qkl%(qv)Q-bNkup4g2!=`fGbEB;8MtXDgu6IPNV%v+#j1gMzrg+ z0Fjmn5?j6IDC-tI^HIw{gJ58WSU&RoWhW@969H82(?GoAV|apMM_7B|pi_|EmWr++ zNxx_!{`qFDD4*N=GC#EpWktf(3!g{)U*zl;s7S)TTkuyn}dlghZFQQW?00Z z>Ce%KY`v22{xWj97&sc=KPM~w1$o70n*Ppj`q{^~oyQq{PTR`PCM*q1$#^8g=PSp8Y zI^BlmdB^JtA~_}B{TAX%7nm zF7W%!!g^mOu<%T~0#|+-`h=g33IHG%xBpLe(K{6LWQddXxE8IMG~!OdH$QUD9gXsE z`Nl8alXxb|q&v!lySW7M=Vxubra_u9&aKYWNIHIAMwt`x4Hlhi8j%Nl3KG$oA+m#M znqoc5Q7|Y(smpz5~+~t+|J1JioE!v1i>~j&(hm zJb2aY{oaQP&KxCjTJ%*WIT}!a2F8ApD!ei! zEBnZ%j54uiyy7G6((74FXxL2DaNpWcEvy2_cu2NKXKM-L3R1aln5CE9=+j0cb+^x0 z;;U`qo|_+Ze@>1xf|;R0E}=XbTUmE!GT2!u^oEPT#RYTDDBRC?RI8?juu_P zp3_b?aOs?HO=ak7?cs8UJ=cb_#ppVa$!Ctf8?FmeWlKqeO7J%|dbUE8B6T=QAWzBu zh*f$@`fsZUFC;dm_1)|-8p3*{OvFqI7%4g*7QKokQg`043>pldy0tNS`~0SQ-c-h> zPc!WsTbq3g8K4QT^&w0&jTTLruK2p%BaumvcXatCaxG9avjCa~DT3EWB_p!u0Ko)k z<23DqG95&0h|osjK6sydE(BH87eRH3)_QN&>*!@zL}Ro9ZP%1h2I=)VXVN$8wqL)B z{3EHsQf8lY86*i+&#(0|#&fF7cl_ccl=U=5eGA$*wAqJ;TqR;AWt8i+zEuM32mKKC z!n`dIwvZF&Y_5yasKr$X$M_dyIh07A$RfrKIIxhWmYKVlS%8vjwt{U2jmYee$lDtOUHEzL9li@xE3RHli9}goPp()+ zGO0zF;q&vUYUaJ_7zQFOpktK8c5;mJ+oS9{{YNU0ioXiPtBd&0WOtVC>~_7`$n8Jx zyfLOo`{x&ZL_Jzdjds(0$>TO7>d7ow@}QZ0T37!`VV=WWkI``@VW7so?g%ziFd4rB z-NH3}4)m9RW^r!ws59vq<=6b27R*sZ>6SQ1qmfC70`caul0CmMi@0$NOX5F}V2YY( z$Mzr7-R_A~`mziW5A{5b)ZvP2Ny?yol=DiM<_sf==59R*TmL~gC@|d2eV#c(Ap@D= zm2eHiU}(3&?|}08G05%|Y@u}lP)lc+kz*;nBNH6h_Vu3<2o6%!%7tEqMp`gv*+BG{fL%^wvoIy#o3{)o`cl5*N6aLb)#n5*5`tkhzcXU)9nl9(+Z{JrPFTpUOm?y>E&jJla{j|`BVEZcZOc)`St#HxqsY^`7`~^A9Zmu zq(%h{P*S=M&7lq4cQ&<;j@*(hw?z+6PuOWT;e1_pkI*QCi7IQ&4h^N@`H3fhkaRDC z{7#4Vm*>w#EbS*!HmaLNcRNkaWR+$?&H|kXXi7r~_7J2JWstexQhIty|EKv+f9iE) zUh{R-4|VCbZ{cf(D=zt6R5a!9nsVPS_4Oz1S3Ffotfrr}`_cYJV^5fmGpsjC)xl(+ zJNcX z6+--8o6B~HNF=C%91iskrquh(`}}fSgb`}F2_j5ZVS%%IHZX$3Ij^bvKTl$M;HAhm1XrteWEk5HR< zk5t41oLn-(9`+(kyXxK2Np#y&q#LEWJP+iflMJzf=XQ!xq||a&fs~}!JKY5^N01JYKMkU=u>HsaWiAqU_6jK18Z}xq!4fsC(ogDcf7vV4JmGj=j4d;|U%|rj zbTO#mN+^Gt&J(6?It>6i_*XFc&;~Vf~8Zq*jy)n5Wm~oZ(0=!(97%IuLkE zG2irD9h`+zQmY$D_E<1Bn z!BWH`gDg@&T4-^yUR|oQU4&VKvOF`8s=e(eD5mOob3!&Hl)nvJIInKs`=tB1|4~KX zjSeJxDdC)l)j*Dt);tqdaFEeqL4J$qPyD^dBGO{=cA> z0lHrku6?4HS%!J~wtF&nUMi@<$tC}Si#~cb-9=wQ`Axmr-i-dFmxW_0#0ja8g}yEV z+Uw{J)!d#HIP>-lpr+%#6=MXkRCi!Y9<)0K_mU~S@zjQ{#2i%^>EBXw_SLj*6518U zu@8NvE{TTgaGril+Bj7+;DYQS%~jfK99V%V;1nypc)ahjCUc z%q)v1=5(9)W%mptBFNQLz|`pOSrar_+U)BYkVN7!xHx+rv_oC301|`A(muV@eDs-; zBw|H~xenA~3|&zhn&dFWI*D#dC|TWwPEQnPye~JdM5$ulr~`%(62DUh7g2P2`bZ^} z^qS9&_>~7E+-~dwY@jH6lnOGmx0FT z+I2+{H zlu>>)^ER2vPWt}Ihi;`pmZJQj!7y7dPH^_75@+%ANFOJ)cIlS#@PiC@=fKZ;4a(^L z@rrfp&7{;H(ecsRiVm8Lif>@eG;AxPv`@O8(AhIjAWE3UJl!{)s3U6@3vPb2)(~|| z*a}S`A=nUTiYFNVn_%3J`c_;02UCLc9hAR%x%aP{-<*A13+73hocdp2N=DyKO;aUH z{;nrz8U2+$LwJk? zdoXnor(5;){hlFMxE(5Df408+JrObMvZ>BID_5Xm7Fd+W8IAzy+@QbByp5{P+iSGy zK;|u4CR_v;F@^~wdiA4sh=D`~u@>a)<+DY$aVYEj#q||jvJP;6P3y^*{GgP^5H{IV_alhS!o`>6Td$Biwz$Ngu zysdegwfSpx?4>_U@jbOw;)sCL;K<9U$zo*uLw!>==$4+HJgup9@_T9lb1olRYPm?IUUs$LE0z@-EMeA_1N8~@jbH-t`mM|^s@I8&v7DzufQqo*|E3W@^Zsr zdVPI?nChM&nUW^YimbeEK31q_w$KEP@`^D~ut3+~%-Am~z9#%Bby-C?%tBEws@96b z4CXe#V;jT#-e7)*FfpgG^};{NW&ZfEr(8dQ7J8Pg=G;F146Q0Sd?;^g2LW-h8YeME z{_-f}B&}`=hCJoWSwNc)_K(|Qc#`%!m%P*94RDo}HL1=4-CU=(2=wPS$cSWKBDKuj z-{i1Yv8ZErg3)Od_NpJl^4rMDnW_rrkvTP;qv^3$xx0GOF!ci$HI~QL}_~mPHdh*pb`}JyqDW^Oi1JzL%vN;0!9Rj5}>v_v@ z&qSy(E+6V>dvf}ZTpMymK0DZsP}xP+o_-VC`^^)$S_H#M$l5P^f62-VwYetH-6&Z?cSUYG$>A=OCn6NM;;?u zS`E?WWbxbJX30RaU|shEYkoJTdpGK4gfX|b?#1Yr&e)G0@b%r?jN%>0Ym5BK2{0U{ z8e(bP1nQb=piuM7qQ4HRYo49jX%gKq+4DY4ExpOpKnZ5+gSZidwoCIP=Y-o^8YdQd z4bt7TKhX)UUCARt#!K&+d6 zq+S6ct;YrkuYAQ5x0N^-*Wl|Ccr`d{*yrHkp=8|Qri#*xj7+KPPElg~2sEwR;=cNp zrJm?E)MxwodHcK%pKsiCVlk3B2tImHd}$;b^cl=vRU30zExP#jX*~NKl1^HiyS$oX zI?|2|7M?4rtELP!?x#Gsnl6>SHtp38OW(8FoXXB)mq98TWbeEen!|ZLb3+0ZvbDyi za}+H*W6`Qanf<(Oh+h*v%xd{zzWyjH^jUR4}ppU2xd>kXhSgQ2s`H)7ofx!`eUeInqGE}VA1@&OxF_H()Gh9b|A zY92PQwa-$#MHAl1N}Nl;?M0uz%jaTu)w>Np&B9qX${B~PFBxvVmaH#XjXY6X1M_n* zYY*~F{}|u0h_7#-_s=&jO;aX>Nj05<{mHs2wTtM^QF&VYWi9u~bp~0neaD>V&s*W(P?IzvZ zdpT2Q28t1q3yZ4N!A;ghu6H%S**H83fsZWsZ;XsvN3F;cCzXyML#8cU5l$6ynCf4_bOyG^ebCn`O%xpOhr7*)y;yX1Rdy;f?7b(6ki+0Ra5Bf!uu$!JyIl$oA z)y94w3w0ot6_OV@VwhAOz6#Sj9^0Sni)_JOs%UQ6nNfpD6)#rs{P4~%-IO8zHscM0 zPj@rU>aQDfeqZ4l-wkq^yCkFi9&SwDIXzSXRp^{WieuS&X#eev>CztHza8vDAgn#3 z=F}LdQ}!z33;`txFK9zNpk+W+4f{@l01AztQ5Fg8gDQB=L73+t{zMSJR)}Ay7g;K# zeE}TJU>1)07mP@g@I{?#jk+ z`hLT_Oiz?*dg+g%w+dO=m2?G-Ht0}o#fd>dBwm4YmP^~aSuV4sYD_>noM@-jJoBTW zkqO9sO?^CxVA$hX2lMorU!Ldw`s2BEO;cZp_3>oL^~VVwkzUBa{EA?HH@Dd+yTmtZ z!q;`O=VqK`ky{#$a!Tv#nc-OSQ9H86m_5=H9rDnmaaX|L0^)Ovem9O#d?OVPJ|O*6 z_*_mOX9VK+qtvy0=xl7tIKP|sae9?fF3Zp`4&Y`jYs_}4s*`82uwa*e2F-IR7 zDbQ$+uk9-;n!N1VmD95{{tU6Vj&6E2Duy#@oi5`{_GJ8L^j2PPqxCr(v`!;}@54jH zidth+#CUMg59=14$a%UH!TUpmC~4HVnfl0*qcvZwW)fm_qo#EBQELTDGL4to+emg& z-sJOLcF)gJ$h*8f-~Qa*gm`}L{&qsF6z%J1Oz(}Uyklorl(e%v+Al&*ve(ho$>y{% z8&fows?P|U;K_o-K(5}})F<8{57>~?n&;v*Xy zk7xr5FHe;u!RpL;Ltexl^!IsO{#ualN-pb(&%i8~jMma{--2VwE_~Fz(6zczI=GX@ z<`|~mp5I%~itu+OeyjKQ$zDn?lX_RnJrM6+i~Wm6`7 zH75LqdWY86NljLUB}n1ITN+~jgLiq7!cqaJb&;VIzGxS4&Q9ZzeJ(jl8{f_ddQY^` z#&=Te&0+lD1mw2|nE^GoZcD!1AWX>x2`jXAQbG3p03}4Jexmr)l;2xJ;*zR}=WSUL zxl=Phhk28J#6bB8Olnce!u(+{d+|y`#1l!y4{kXmFCQRoM|bg$Z+t-gyKy)S$wP z-oosn{8v3nd_Fv#&C4=vd64-d$t(5sM0J7M{l4xAZKjh5Iz{R3c{86c{AFH?O`^R= zurZl7EFo~9)WJ{zbccAT2zF`NeF@e&b;#4$Krc4x30&yIEGA|rWZO=#0Tcv z&sl?KUv5NQX=3cdj+(m&J8Qfx*U2rXlUjuN3ytT%p6ete{(ze7=I70m`NLYKWlg6V z(PC@sE5j`@Cd6w)IvHJ{6>uBJeZQe{`MFMwbEgIxfqna6W-a@mK4H^gARl_Z(O68! zI5b=8Ua2$b$p=Y<>xFFz#ya?REsb%E;h9h4qY_9`Qe37f)3kI_lc}77v%W|R_tS#)clf4z0;KACaFu zw{sZ?wBCdB?vM(C5-BJ@hoH%qA;o)C1GHUSETpBI!oN|!&HOyfYL=sT6N<^u zj+JsO+8R?L(@liafk8QC1a+VnU^++Sq(vV8=_3a1Q7IOfA@AAK8RNt(R4vTlGy7+L za(^%=a6XjwbOiXlq5-(@5b<(0#qR}_zs;!i-ps$zY)u~sJExiQcGn3w1}DJ-X$}=~ zjISU!mgKGm<3VS)-IPd$d8)PHvC6?ftn%o#!5~zyH!~`L4+8zShaERO&bgX|GD!1; z?-0w>>4#BHlUy0r>>VJlZcW!}K?L5T^_GTUIKq0(i7mJ4@BzdhBt73}VIQG^4i5N# zmX(;4(jXa*LdI&LGt-2`P=}%`=1BD>w z^XnYv(>&alo4tfq0;m|}hrMC*4Rt4B=1RB&=U9v*);{#>#vvy6?V6%Ff86WexXF80 z+3>YZT&p1UefhJD%W!|!)600Bz~7~9h%UFCWT^Jrjl7LoHx+Ojp~Q=aNk*L_^UjZS zD*6FKH2P24?pE1H8$iT87w8VDFl&Q)*1XRVyKtVpRF^B;Ycs8VoQ%EwyYMm{S!~=> zk+Zb=jde8-@2UKWs?62){mk)w;Hc{%&_ufrKG}SwoYKX)e%&^wjz(WmMrD)283@9s1KO^V*GGNsnK4 zNY{e|;Yx^_Ma}aGP6#Mh;GE4qc`~bAs%58R;5_>@!D(HVDxA1H zNvffb)?Xd<&Q~;vIQCRURU`q((R@cIx%(uWGEhj$kYjIk9FqJ;8GexGUm zK}f5h*we3P?0Q0M^z;@2N6LgbdXBQKLph(E^!x@bc6vKrbV#c9jOd_~jAcnTMTF&K zSlYr=E^q>)8{)Hpr-G-_v5Hr8^oIBOG(F)=&k|-Z_VV9MRTsnluWlJWl)7cMi3#uH3m7v1ceNO`4;ql=K z2b4Q%fClmVyfqI}qE>(@ozls{k1+Z%$9 znP{L67XJ}~wb7Mir6GQs6P)BYRp5jvb$lxoa8|Uhp{{@|n88eB)KRNqRgsQVB2O^d zzYV)Dmb-L72X{M3!hU=Cm|t49K}9JUy2@PH=ZI8V`jA(zQ+x^Skp0!%mVXj#Z2T03FP}j#aOlS>5U2z=4hL!}Utip-8o@oi}_b*S<7Q!nOXZ0v1 z86|5qr$%q9U{3Gj+%J>1W>mc+*-|8D`*8%bO6~}Qe9#Al%dDaqXw5jX!4g$M$-SnS z_fb$s!N=wONfl4@gISLrj(v2FH0Q_0JW2`XcOySA@2a6)I14{6!-zy4Vk?y3gX_&C zp!_YvC|)ORVFip-OE{jHFb9nIHK|3%@d~ON8+n4`H8d#Cff5U#wJ7pTd6bw+`1@DA zEH{iqA{Q2PSKjH^>m)*6<2f%h(Pb-%eo9|MY_*59U1-p>`=($`nw|*V_|Yx8TJ9EQ ze|xb@*WVAiK+89M2U`-!UgR+ue&9g_SifB`y#Me6Db?V;6#3n4m8b$#Lm5wsbWi_35mGLuzZdkQlW*d4iI{b1Y zbxvD3Fla1yL<+z=%Rz1JX=JsmO4mRu3Qa6TAl)293JgLB{ z(ZpLV{RXgCy&*n{s3vFOLJU?$?mVf$B3XZ6RU6A0d6V{eSJOZLEU(jyDFwNoCbg)U z=jvBtK#?uS#>r)Bqu0;}4Np|h8H0LiUOn9ah?1mKLcW@b6jxW}xa~uKzMdDM0wS7M zspFrdFys!Djhw2ci1N4)|4EJCy+AJKp>{S*nIk7Adg@=@7Qb(K_J&$-3NU{falg+` zPyLe(LB#U@5mE;&nqF2mi6q%L{7J`veeQdtM#eETXRL&Q;lq+K^Bf46IzvXs2&JHh zaj9;+5j6~EGN^ovU9>s5yo6}VAio>!DV`;KWm?paymsY&pme~zX-4@wC-%y*CFM4`5_ZN1h4%rJTGa5bODTUYs z0=eBY={b3jt}^YxdGenz^F$Tz{+xDkaGp3hAj+QSAfduJyx*wfAJUO^eZh6Z`&_L& z*sPuoK?Wzkh!eEWcQd`+m#KRrX&z{l->FM*p0<20PvKS3(9^&Vn9~Q1a3os%j-?*@ zrUoewkkK9XIjyZN9aLB!qi#TEtcA!37&}6f^2rJe&Xr*PH1f|M-Fd)6dutK%HJRW_I{gFUp0>QuRvlDV0LGW)o=|z`bT~elzxw zF$YF)Y-IaHY<7uXoCTR?kZcD0HFivJZMjR-}7^ zMART{b>R~^dk{bm(IH|Sz(Z4G&IHpOg-z*Anl;x5gZ~$?B7=5RL*Yi_Si_LkTFpc`)Ka( z@9)YUB%W-l)Tocu%EM#CA0*Y3P{I+l%jlG(rrJT3nsG@|8bODPJ@q0GRmW!J)PSe> ze!;YQk_&a8nhU&?UhIXz(ClrhpY1%>VXG0e+;T}Cl5IPX0>@7m>U&vDvUv;WX< zPD!c`XkqQQye&9{z=1J_QD?7N%1PINq&7<&ooUH*V$4~Xb!^{U;V!V}z=xC2v0T9U z^RreJr27V*XG!7-4&#t%ZYkQ+C+qE#sloNF!)ya5!04JX#?vDN6T@|Sq^pG#7$Mg* z*t=;Mj$yX{3MMYokzfkWFi?IpxlH}OH#tiO`t6Ds(G@t(6G(ZcdGh^B=c(lyu4krU z8i5QXOsVN61^L54;sD+KBczsB`sx0md@K0cXq`_jvcyeNm7_IxO+wumibOJl;y~5d zGd8q*I_3WX~&a6h`7v?hF%5oK-?L1%%YDytx{)cRB43)8{}lNOB>4%gY|f zTA>-hU}>zQ6i!SroTTP(1?N%Sc;=ySW$$97OCRh{-pnGJGRogZ+S^S#YL;8 zU(STlb4gi6BKYp2*D~RiAhojs%%27lQi4(m>N6<32cgI10kn1W6bZZ5Iw9e_R4>|4 zhwqID(~Br+I9k#}i5Pfm4tw|Z)aVwuP6kR@Z$VUcs*!~AV93vKp(WxqS4!I=ko0Y^$GBU@{2su|FfVY7_WUh|5iCEf#I z@pw;rSqUj4a~JGS-g5(mU6R8&g?az;s-$*J0tX&){QMp)PuhBoz z%r}o}EbY!3I&$Ix1*4fEiG(g%c9#1>H%kd7E&JZ`=eX?rbUBUkFR#znRJu0GD2+(N z{#%8)V-Iob`>ld_7HQ0jXHmZ{p^huQ2S=FcG!ZNYOU}b}G*OpO{x+ZO=j}EOt0#xg zJA0BI;631KkpWDTCg(~ZDHT8YJ_hM7_9=;=%Ld@Xg26@X37fOI25~Do_c6f&UaD;^tnDHiAov2voTt|=q6Arkfa)BIW8 zMy++}BMsJI?Nunj`bHY8MU?QF-oXTDglqqt12@lqScKHq@^Qtokpa-OZdAtkjmYEg zv?vuZaQzE=`i>i1PZlew3q{u`=aXOZtwiAUwOVgxxm25 z1%h}JqNbfef4~qUWvv#)HIHlV!3i?1H{=61=|LCZy6W8#p&{_$r61vvg!dAf0!4U` z#-CpRKsBY_y@>^?a8R*DXCo!W~x;m zyhfa@0gdz}lr+pINYHX+r(VAWCeEjmPjI^D$ZJMgdrA{sORSASR^eO?IlF#z`z8(6 zxV@R(+w_sh)l0VQ@+gLnCDwz5muaH5h!aX(6PrPqqH*}tpf!w9UgNN9K^inkVWOqy zNL3Zm``v~!+FDU+=hiLB!Fq6u+Ls-pXJ$eoBwTt%YBrwY{PH+Y&w37L?W{*E!Wpc* zYPvQv>hX(>8o~VD&H2-O=;t^6A)kz1j8xdBMj6*x>jgO(&Q*ldx2TGYYJE(+m0XL%Vt<`pBT1_awJPSGiCKE*pE7_F)o~(Mq5;Q&@Eu(rrg( zyElEDG%bIYx#N##9h)7Em_yD(+N)GWtze236KF2B0Q3I}mt9V^Xu^@w^}ad-LFi%97(a|^oSaiaG(~f!26f|S%V5wI1X|Zw-A^z3u0Nt%6!a)* zdo@YX9pa>!gd)y^X}Q<14BuVNjbwz&gLUU}9&J-xbrg|i?IQO%K1;{zUOw-u5*Aum z+N89tSmP%Kw+qJP-tuNc?fzg7y`CPJorraJ16#^ac9&+*R)k+D$mS6KLP~BCYWSbO z%YB^H3tAg)%ix7q9pmc6)hAZj-Bn&aE4z!_jR50&Ya@6Qn$kwaZQ#$N5k0rsKOQ|L zNgGV)uLh((y4QB;=NDzaEH5<-MCL781%{wxEA=EI1^8YuiX)Q%O|2O} z?mT`sx$cmPEPY1gGVqo-`GL#Y`R32>Hr?6jJtW7;ox`;D|Ux3Ty6I_p+a8_4#41n1@cJz(aHL00|9D*$Z<+iSxeL_{GI> zx{jB=CP1P_mY&9p(N5d7VeyaywYO)1g#NsaTi3h_YqIWNAnJo*c0GFh+pw)N23m{Lt*$E{&FgnpLz>=ZWu)N)C>0`Y=xf1WCo4U3~+ZwtC0)v{a z>1CX>$)NoC2~mDGoATmC+W_>Pg>b8G=$iQMGPh?&I5c?RO(a3#3de0K|80^{JBbYxC%pWpsa~G{rS_q?*X@&WFf6+>4 zBMg53(Z&cy3A;)L`!Hna(_OHWlTRaBzuRc!(6Go8HN@f;Xu?|Tnj4wA8~l6k1+Yw0 zg*3eH_aBh7)Aof3r=?6Xa#*oX|F3Avk>}*p9kjin?EhzI%ot`rJ`6Kd^tsx2<>k-> zS_TZ-!^UV_#&G2knlde&`R3K0Zmjxjp3T-bBBE3D05K2B9)_et+sE@09_Z$gR2ek$(6{_AxFk}=tW-uL zI#_x>LII8FGSGP*r+KEi@=CtivPsu!LfcIpv=c^kf^%rAa8BTkuvfZ}K zS@xV(d0XnZvxsH%w#OMUJyyYFrb|gd->;j~SCQ;%q&UruG5jeiV|?T(Dl>jlKj=ur zMxz*rcW;9^v@Xpg_UvL2WJ$fee?%wPxu88!XhUcSchY{=JleEx-Zk6PoUK(iIN%t zmL65(2qbH0!7yJ5V?i$CgIZf2Tze*gw|gRkV~h$|*t(_&)Oi8FjhyTQGR@-8Y^Ft- zXSz%m{GB50+q-?HOxkZ*1nqc46xP2RFfs{Axp2ddiFM0}gYl_liBpX8=O;-dJup6n zt|n5MqI}~Rb~Q1WR9vWpxzMoO;&B>}dHDQ%HPZh1FEB{w0$1PNTsG`L5SvPlNXt@HOXe!S9xe7@x8(O}h zoX0FljY6O~a2TSq9_R0R(NtVbNA{c!=M)N)eH%3>hR`6*+I11gIDglgrsDcMTJ@+^ zkT;9ejr)L50HQ;7e+F*T!1%#BDti?`u5J_TMp=e;9GtgRK-jr@Y+kN*NMV?$*a=E{ zIh+7R>rEBVc}?7HeFOISc_*@RW8RJ8H)mynDc0rO4^hWiA^WC?5=rpS*}Q*__StbW z<#RgG`$!uwDP@3IKt+lkL&6Z&)u6PYhqjUNTHn?+Ef`fJ}u#D06?crm&8Ap|KV@+qPvW)#lZsf*7OxcAOFs%Iy$Q+^+AAj(oj~?oY2D zJf|4zwRO2RyOnvXgA4%?!47S-@OH)D* z$5BQt)tY#fo!0@BUs;r`n(xX?^YRQ=^R6@P)p`FTPyWm1d~8f-9VM;8%;IWKxo^)3 zb7sQMV8*=loAhedE+mYywqrMU`?wht{URl6{(duN ziS<3YZGSUNmw5qK{#R)q_a6DCKD&&Q5wp0|MW zw;7FglT-8ItnB5ivP+C8QiX?3$`I!m`Y0(z?et}!#4zE_8ooy>RqecT%uM>8P?T4s zMOa! zo4HY{%ekAaFb{4K@V>VuGH1`pzbk;kSqW?!%%AVO7XK3SA3YsyfMdF1mmiVHi;V)a zRFP$wx7Sl4q_kb_6Gh;Lxi|LYk7Xyntn0H;{-cG&-d25VSCL%(6)<-+X$oS*-`zQT zM|rG*vSJ99yVgcdCW`2Lmb)98ilbj*3Y;Ec3PhF}x;|)e(ToO|!E}5RWtiLXG3!a1 zwk@jNzI^~DNC$d*7~$)bk`-5=+`Ny`vs_ts9NAC}!Ydm&zrS7MCC^G+hi_9X z@Ivzx<=fugO(vFW%YCjV#WK>o!CB{1{6h^?Pzp`QICI6sDw1kCLMYQod)LR_6AM91 z5$12Efgr{^Pfth9w*KbK4RgE54l`-?&Bqjf9EQYePa{7yOP?ZizMl4jvnq*|>!?AA zCLn7Rpgv1ggRZT14h1jh33m44n-6XMXs1M|50i7 zq1vJtrm^omTEfFBP0yGjMHS4{d`Ve}!7Nku5Q*4G%~uNaagl=`$H@?%DuUz%#dw!4 zuXy>NFU55`y>5$11ypFG=V)N;4YDa>Jaj<=v`N{syyO&ySX;K%kR$eH_#O>O9=we5 zNF^tNda07Mji_|iPa0528g&X%?g2;z$b-rU0UU|v472DD2FViM99Yw>OE(^ybC4yZ z-;p_IFUkrhL?LfGfi2;ENmiXTt`0{d($u@GTD$%-y5v*#=Z6hS9~Y6PI4em^!A*`| zW)LSR$)&r5Qu+m-wXoj=v~H#S%};tQC0pOx`w}NTbU-rf&t;G{j&Erfn(Tmr%pIW` z22-F!6yFUKXas_d$>b7L70BZS_aE^9OA8Gs^q8c8fM|aV(%qRCK*q7~eU})!OvpqG z@54l4J0iFE@WcU;R!tEkZOiR$#FC3SxG&%F=Ld+`+-0OtI~1)%^Wrg1xJPw3*@w)y zGL(Y>SYLLJq7=QhPLe_=eVhm7XvJe_6YSSG2KHxN_tc^u@tX|;YgTTlr=bAyw=q0R zlOFoZ{r==4ftuV#Z;*G0qv3(g%=J|`4~&U-8s^QX0)R91M7CFY8o7^;IlzEsR zy*KL!ln2ZwajcC;N z7KxUGIX%p_DPtsK3UGsh9aIuPB)q4;YbHdlgke%RkTsI52}Wze0Ay?Ntq8xJEF&q0 z{%#FcUq4^}#b-a`6DVt6<=+MJ18;Z7S^Ez@dU^F`H2vB!iBbH?bTSsN90VKM8iqO> z?ZonPNTx>)d6s3qa>Ie8j!5dz*b(eE#){s_Dws(aHb2`h$P`VVpVn&kKi3Etf-VJc zETO4)IDzSESLZN!TpKf)*d^}g`45LF;tl{V1sQ17f~rhUQimaqh-3?C%(-mKGv9|? z@&#gS5A;aOag7;41Qqpn#T@gU8k@tA;S48^@1JEQc2;-plrtKr+)!@!zff936OFRZ zAc!{ta}M#EI~v+kTb^1IYIY($K{Yj?NGZxb+Coo8MTMZ0y}b&oUEbmT?n)Nc1c=7K z3o^mB>U~OTmNk96XyNZ@xJX)mE$sCXOnVC(w1n}fJHa(c{`o_rZ`rGAOv5EyhX9&?^@@-J1tD zdjK-&pTNg_e!k3T^_xFm)q%{Ozmv?-og^u=9R=@4Vlex$U`yslZe}FzzvWIl^AB#( zQ=Y0r8C8Kws-WT47^X?x-O`F5-kr`PdCx6*MQbeRA?!bpXs|At?!u!!vfSn7NFJcf zUr4T1QaM^@YVzabIxz<+7n9g1UGbtDP&fxeWWyNc1gVv?Xi4PS%u0VfRqNJOjy{2# zthcKU|08PJf{av;8p|ug8J5IQ0j|z_GL*ykx;bfu4}dX?g}_pE(E*YJe}PJ7%)UVDTBB)~ z8cokh#dAyUVj)5Dlwc{6ahIuN3G4P4=x+un?~1%x#J9n&I2#WaF<&=#Joa>;&?H(X zROG}-E)-;%qB*mPq0&LG6K0SY!0U~5r$AgWo*_jCd;CiksqAm_pHF3f z{ttdJq%{)-wD6{*8y@4#A$P;GqtEmsyc!Dg_q(^j`g|DS# z`BgB0ltA)x6w;?*e=fJ%a?8(W9P4eZbC(?gU^GjF@~MMlBn#`JpP=-M?guZY&&4i( z?^nN{oFD(e-^u)-mFK*_-4x@d%grDkeusU;o4!Nd(`AM z>N(Kt`1Q1`rJ(!#CZ?y$QRX_MV^?2vkR={M9+)BKZhoW0QV}Q-mng05J3q~KchS2p z>9>2~dn5ToNmbxZda5HdFH9^TGBEOHFL+Ua3E7x)(S!7Ys$@Y&TO2x!?NcA=+YE~J z7ek=7i1WLNTlBMUGhiqlmb=-}vAL`DXKhRH-VaG$=6azF0@nGQvo z_E914UUrwC&#m>j>uDa3Bo@O}=dKg=aTS36HujiHkkxb`e?ca!SV}FwO_cUTdm#o! zN65#7H>gUwv}8wy53=k1?!J(#GDN+8WiEXrM%KWn%Y=9%K9>l7+t=6sP_! zYFyr5EK2*$M#c6g)47~O$I>>18ONZ0ph!)Zlw!<^Qg=QP`7zm$%4s`XF7^Y>Cn{BKJ8Kx&&e;LX3Nb62jvajH^(R?19BeHJG0t zHMnwGtrU6OX;Y2cPd2G zEqsF2g2~zF+s)klel>S>|FFOQ{OSJ6`*6Q^^MtzExIFhnMeS794>D~{Lv$*TE}Py81^Pw&%J~a`pEBk95uM(pDN8MD_FT5+d~h>Z%dXrv7={vT-bKMW<#tycpXt{*R*sLhO z4^fggMB_I1T6I^E&l$(;%u)RxuV|0y8yy4YvH{jbC>Rwf5IE+`-0n0gn6^2!YmNKH z1>E=L@3^XkZ`eCTXq}KR418V^OMvzWt>DnRobSPTM_;J+1Z8n zidAPgWf3RN(zr6;k|h=CnhgF%@^V|J4^v)KZT0X5BBAO=B_gOS2pJ$eU9#v;qZY6; zFY|n4+9XF)IU5#>ss%s?UW?JqDMb8*-J}LSz492S zVjLxxUD1pxWw<|itR$Ku%pYRVm}7%Z+;6^okTTk8yFo?I&XH*f7~upoQ*_klTUBFw$IQBU_!)nOA!DF!I*iyvqy zl0wWWK4kW+clYt!igtO|%6Xt5ax{wxFdH@o+*c%VGP0=wQpt*hJt@2%V?+e?_1)Ni zYkM%!bUs0olDf~S^?W~1mwWy);y^LbKplwgdH@8@A0Yd6xp@wB`SWHy#bx@q2|0hf z%*%@w_U!*%ZZ9hlGG}v>5u2Mj;2ccU)%IJI+q7$|Y`TogKn5J?(jQ0YyM9EDbKb+d zsB94$l_zi8rVRAAk>HtJ@{~^h;?#4Q*AQqbRaiplUV7;l*>R(Ncyxabfz~+piF&{> zQ0knn07@+YpBaLW`N$KkqNCvo)WIHQ2DQ7t&Y;E5W!EXE1s7UWrFpRuPK(sOZ}x_o z=RpxEYU@}~pCReJwpL#4p3X7Hhd_Dy)-*+!lyQ9i!0x;u>C{rbN%zZ#V-P2*On#M? zXD?GpsAHTcEmh%6y%B!?9~q3|9QO=pA4hdHK<~@gN^__9=)SAMsdbts0oqr*^;`!q zfa&`!_Usr9OPpdP7QdV?a&FNI!gH>{5#r?h530$*{ADoR-KkgC>5iY#)gSoJr(=6# zChw5Q#GJu2S(vFc^*lQ*ivur@_pdyrx3piOO0abYw{QvPPujBfe}bbsD;L9|28xAs zP=Aog+Y~k53~kVldtOlBdOD9wv4op|7wNmE0<5dvoTu}y-cfm7#SkjHPJ74DUo@+0 zQ^X+(FV>SJ#Dfg|0n|j1Tr_2*UyYVFA|bUk0(A-qWuVFewUEYT(lRU$;>uKOO6vTMsuHr}Y%s?-t$k~7qK_KQ z1&EvVzS?1%!R1!Vln*z6DbnkyeBzte*UsKd>h@SyOiW6gLSWH|4;Q_1AFr5eyoDIWb)zv@(^V-e*>6&SW-1*l;4fL zJG4X&im0&dRdTmKB1gWKZRNqt$0edKPxW5YqA@d4L9)+Y`?^6ED@=}H*wOQfEkZ~ksq zi$HgV3N^Qq9=FQ&MkXE-;54Cj$QvmB9L(PpCiSbwFn^j+zl`s0BH`5@@t2pyv@b9I znishn;;`(8VBdZ3vlDB=2o||>`MAml9KFvO5NTWi7 zvO=gq@#cK>$)K}iTbn~CIp&+VoOwv;`9qwU0f8b+GU(uy_U40Ljmu1bDF^nBb8LCe z!M;jGn82y}_x{!5e3pLc`I)+0U`!Z%x9ypt9O_WsRX@smq`&NAD?* z-62c_9s$g+BbYXV>ugMn6|_1wW;a$YxqTtUy$!>Tw=1NL2$M2Q#TEZ=N_1p$6 zQcPq8jRKoSdPaV{u4vLZ_FVCAIdEO&=gwK#mk_SNfWXJ%OhI5pduv5l#=8eBd`=)&aXez|B zFFc3YRl-~y5vq5GV($kk+Ep+U%IN@SAJVZo2d{(@J>ujZDY{B)?xm_Ueax&It~pQC z>k5!RYmNR=Me#hY%So#yw%i{kQvUlS6_CzA|UJ*R{}EC9)q^kmDTDOW|4dP|l= z&XmLQ7^X3mFpHmu7_YdHRREOr5m0Dc!ikOtJsXr`gDlS0hD zf$GQ?FD2?9p$iOg@-94@rTjs^XaMt@Thu;8X8!vzEq-#^zV^3qx-I&ZL^^H>&Vea@ zciBLRX`LM7#D7||@?^2lKPt}BRwNrzp4Ahen5L`&+FNJ-?8HCdE+zcbAxu83X8&tT zAbCel;yGnt)+HO5s!?#_)1l8YN+((v9`8&G?jud2j5;nbXV20Nnp&~WQQGGSg|36% z;8hIva-5VfKZh`RBo8(E63TDfpL&jTjS4fg<$mjoR>HpOJ#&<-w^xC3u`x`52w?AL zAgjfA7!zB7lfUmY1auCQ9?sBw`&eSo<8amSJdxuGm!CgMG{R7TYfqD$BY!FIln*Rh zL3ou=0taU05$9M1Gkc6qVb=CEMHGjjJ!4KEM?+D<`3rrN@1~okkUp-KxVp-P;sR2E zX|w$UCB&|O0BCbDUDmY4ASC5w3k}N(v?wi<%^Gm9;_wF{@?lF;xdz5~cLP&$jHVK! z|4dmPz6{0+=l(!7BLJI@d{=DK29(fmUglfl%dW9bBcWq< ziDs^PY$+sO4h_mfyP+xL{BG>J?2x8419~#8SSw};fc7-m_^!uH?FgLq8Exb~{YmsQ zGz8|Bp5{Q&Nk9{9&}D;=@_kUt=kku`hWA-11of>q?Mk}t5ZbLDk};l-0bw3Q3p9_c z>N5ZrBygzWQHJ-G&Ll@2B=jTi8K|NZ#5>f1a7I8CvS%nao_YXrK#spvWyA<#czP)K z#23b}E$Rk8db-sO`YeUnAjUGfG&Nms$ND2v^Gw1)f*&_3O}xr0L?-^(HYI#Nj=;mL zh76DP>ta9&Jl{-zwS9h_p#pDP z;Sg4fE8^Ng0S;4hLiILiU%6@lFfgJYl)L~j`$@-8hKc}F)0+sEZ`Iq7NN-9r3?xGN zN|0y;qWgs|NG1Du<7bky?DW-C(w5~Or*XvKF^bu%C7Fg8)!YggOEIO)oS2G%9vAwe zWo1M~OUgr#{r>@wG}U|QcHFqb1lqfe4J8yuxZdF41}!5jLsatviq4(}p~oa2#!mbF zonacZ`u~6(d-!jcj*YKiq27WKQ!qyb`%&}j@n73oC zHi7HIo@~*SF@_*n*6qGP!_pYTrMoMMw`>GHSP#wmXR22_$oMMnzz%i_5Ur_)ijb>0 zALuz>NqD3{Guk9}Z9of>oo>;TG5TGZ^h>Q>sBkVGaDEUk5O21%WNW>jHbBW_7Rmo9 zfINS?jWC%_S(nLh9bU$alj#Ce+M^O4a7|2r$MAmWUmynQU2skR(4S{9i>n9Rog8id zFHuo5G(f)jy4I7_!Fmr(8RJ)@9jY}{(8~&tW-+04#sseVvsZC%>!ZGH@`9+Da|0Ql zoW5(y7=I#p=V@4c1`9epgX=}qoX6;_0MzYG#>$3W(*QoBemCU6;xB(#tmyw>;yU0ofl%q8+BEn#>~C}(%HTp85~rvFY#i=z%yfUgU!NGIE{~4d zj|L>z^&Z#`v4h!)8k9fTO}ZrleY?*iBaR3spmw5qyIpm-`X&i_suw&SunZ!={K6XfU>Fpujy>!-as zDeBr}P==%=o9g9(DQ%2-#gt*g`{-1SUhvg)3-gXd4bME~dpqk24+cZ*ePBt!Pf!`; ziK9s)PqaTTyGOn=3QVL1^*t5DXiRIK2$D-6lY&|JM2`N>yoAz+-T(64Ic5keXY zzK$?FJJXv*I9x;+P`-fg19QP$YeGXxo+B!we4z4J_KuvRiJAQguX*1eW0V&#M&&yh zta}(21X*KKIR(q5ForeyN+9rDx#j6Z9)t25s|)YAGz z@1Gvnlp9B)1W(RjjXCUf9iSBPwyZ!o&%L`8sjG0+oYr1=S}8GHTXEkrE{KYxA6ugM z*e%&7K3fT4?EcO)0B!@`v77ZcKU?Qlk4Esk;91$EiwHjK4J0Tc4?IgI+D?VM0><>s zF)l3MiezF@%#sysDK12V8)Uf-PTUu+pJEzGF1hQ&{i%2!vjXhnO(n(VtWi^G6vU#tMHBhqYK$!T7W5R(Norlx z6VTZBalqtS@cxV}_eN?ArA0qa2)mK>t53OWZnnz|wn;qYT@aAVp;0l);;yV|bDWqd z<9?Ukhi82nkYlpWZc4o8-R=Z?)pof%!9|pH-I)AJ%3qs(oXJ$)zl1oh7QwwKMUphS zf--F*8E8z1OAS|n9Fc;7jxT2aLQ@JJ^E__JK)R|fvDLFYk#>brXhI1&9AN}UPsbk{_}mBl;djsD1|9F%EZH za|z_UBtOlr-C(*5a@AI%F?2MFh1~_Ly+Af)ki`dQn#U`P9iSX2aVhLwIRrH+nJc86 zLp;i~#b&+iTe_A|)XhdvaEDPD1Bphde+zP2(wbC7R7R-+(842MNj=M3QM~7dEa0Pg z^Lb-eH~t$4*u`wNzvsQR<&K<96#&gcNUZmJGfB*9-1oXEgUsZAU9dBIFAm}mSGxgS zVcfM=c5RDcg&=>WRGR-ih4^*2;D);Dg%2{AA5 zPjGIMqq8Qk4oFaTT5$Uwj-scBds}W3yBBR!w4x7ilUlBDL_LuIG8p-Mh+TvR(Z){M z4YE7tx=Za?f*9|~7%Yf3@SWY0<-D(J71Lx%36McdrUxFA6(JtHiyLA&PqX;LBFA!r z!Kx~aGp;3cKXyxsM(N<#$R@$~->?7v_rDh7e^-nd_u`-BpMUH?t?Va@iJ72=1P5V^k90R*pv^azMS=MiFLD z^~bD`cyx*o%e9mAcvmP>P;Lm0B=Ue0n7%iRDseH25DOS{BXSj)f|Z`U1J#K|39;V& z@F*TW6k+Zo@R*9}Jc+Wj7da#y@6*S}qMmux#6&lxKI*(1?4}$CWHW*%N3*L^HAO>y z@uxAohbGSfKQH}5FF4XJ=rYi)eZ+%T$FSQ{HD#3Lrd1p9TrS`ey@{mJ81(cbvAXWd z7$ntpE`fAi%96vBk5rjS15>n<>FxU?f+)hUD;4c9D@wvN#J?Lz}POF0ifnh02j1-nY9tPjD_b<`-{ZRXZ zggomwjiA^I;|LSSjYwD)K^DuRp!~f&3$wQ9hz`WGVEfgFFu@5`0rPT;j;t2zLyg%H z67MIR4IqqF7z@`BK9^rdmtL~~l}3B*Bu()07#ZK{rYWO5tvl|U^pS=a$NM-?KO>Y6 zH)|%L#Dgf~A)W+eRc9zM2N)=IwZsLrfz(NYt%&j1l)*jvAobe_+D98ZLK=Pu8m9Asu7i%|UZEDfuX~qNd`hRi z)L`#!{A(}Lrf=Ip{Ai%I!6S^$j#U`V&X0``@>(6L&f1SGTy{E1&3d%%$28hIREr?4 z80!j7F@HUZ49JWOs+y4X8r+Ppo_Xf~Wd z1z5EPV#y7zqJ9X6?pFuo)Rf6lgRG`M|NP(2q@n4 z8lbes?(>0`r%CwcC)W$5l65Zt-5c|uwA>4f!tb^eG?Tq?1b|a3i}1hy-4O21k@^T} zHpC0E#KiA9CfW8jVkFFFhK3*u`lYAD*!e=TkcScv}BoS+qm9mxPK~zv(}VgE+?a=LZ<*U79%VY{(JfUVV8C zMceC2Id<$2cI1n=iNpfnqJd3vUp(YfJ^0x6L6*4v_=p4mf*b=oTw_O;#{o%SkFx7_?5xWjxN)GFm zolH{kRXA5y#7A(9noiiiexLwM#TWa!OB&z~j5&lesBXav2v33y>UO2x1YQrU72^_= z?d);ihn8fS_EN}o}ObYHa5@czAV#mXi^Zt zXc8)dVYW-fmyo7euXBL$jh+zT`@eXE)#1EPt+Y7x_e&vHwb%cxJc5qZx6Ct5lx?#1S{~B$W$`j3c{vUZ#$>l2*_w`B+_G zwLvo7g@1QHjfywg}D-Ie=y5Xjq@T`qQKF_>irY{wO2j}D{+24yW8T$p;y zBw!0DzXE4tyVz%EQlIfky3K0j%Vdve2=zcP;D*jSYe%7MR{^8Fjj|ijdY1Of;sVSu z?_U;s;xO&cT;Nas+TdQ)wbDRMecLMZg}AAki@|-lH87zhZx)8UtnB+>>o*O z`dz8;`V5FL+wi(%FnjqB%qa@g^emE=nR1@k#lK+Pp`6^_FoQ<>feqCp_O2tLO9hx`~mZptq-LJS%KZWv7_Ewrk2TAy!~b_Gh5Pw3aM-mGswFNdj`@FYCJZi5@1D zA4(`8Hs`LqSA5UQlRVP?WN#LTc-b(!LyX+>? z%C-unH(T#4NCE|3rC?14u@-}hZtyiyAV&vujUrs{zR^L3nrO31D7Qwk?PInd>`yn< zUMoa}9Xs~A)RRVbWz>N&hiC{NWvCKJN(RKeB>R)%zN;I*H2WNpPTH;`$s@Bjk&2Ga zfKJS42ES;@lgugdE7r-Kdj_&AyJIyq&VP_6Q*0NIa)XW@!AJm@NJexiA)f-PM> zeMl zHT2&Jx7Yqyx;I^fJG5xg=1z)Ou0%d;m2wGcN%!e4XHi$)d~}w`X&eLo@3=z;9%aR% z7n`n^$8;SMny1Z_T$|ICNlSaPp);L1>A_!J;(6NViJ{tN&?g)?n@CpZf#uDM=O-Pk zSoN_WMA6Tq=4ZW34B|kJ-DFxZ4IzCsTEVSP9Fap*9`Vd7NniXxFPX9tdm zF&qZyotorMizu2hOma<*-XgueoX`@UP>z2vsMRQuS)d3xLh1m5JVd@y0rF{jdUW;D zx8y9XPRJ@8-sj#U&R4X*n@}Ip{M{r>r*>_+<*&8h?a!NYJ0>arB$)I2^wOX|>4LfIHIx|&$A%!!{BuBB`{46VBVn$*K9 zuFxFeN^sJ3KMN2eIK$#q_)C3%1e$-_z))13728c{+Q=Z_#-g0B!oN2REe*Q@t3mO zAUDoDl>tn0hek-2sf9w(;xL*IB1+y$wDnXRsFH5mB_PoYm4 z!}+k!Qb*45=7S7OFhDd#jK6zV=E8lD&H3}Ln=(rH#>LN7GP4ZJ zXK;p8a?)#H#)y0^%cwc{o5wn*h(oCBH4=0>NG5SXD)j)&^{_ADto(y11ki<5sV<-v{@TnO~JBQoR}wrK$nj&J=CpEtN4^mu}vKh*o-gCPT(E zUS&d=loXkx-0SM50%fz3Ntk31F^{q0kkg0-L~15s9Z(%mDl$uYe*3Z^)9uJU-0iHQ z`s8XlyCs>^;pB7*C?6oVs%zsUq5+(7gN$*|H^l{%#-@nzhw1}l#EolUS?7~z8{l*K zzN8YHb%fE1+k>bwIN(g0sP|KIXBc@QFkbMHj+5a19u?h4B-{wSQ?$v#Y^a zn2V)`xjNeOT$nD;cV-05(n~Z)~%`Lm21uIR6OaI$NMr= znt?rea>iXp=P>bIPFuuw^!xC-ULhid9~F)eI!)=Dr( ze7S)!d3uH@b7Pc09Hl1>dSr8+l+zs_k99QBHzUwU(T7iEJhajcDbB>2`@N~%<_t@# z(=2A|Jn#nnb8}`nbGH*Twk)2sPG$)r1|E#|uTB_L>iCE2C!#=hz!_u#r3pw60C zvNGlS-4V)5w3S;Agz;kcoS&huhnf4#i)WU(?r8eVyM-y8ixh(l=8cv9cOZ{WP+xBC zA#WIh(e2cuPk$T|Q;Oa1D4_gadFz6dAu4O#^0A1Qepb`nv`_4m_Yu*By$g@YDnn-M z{RDMswz%Q|k!tKBjYVfnSPBGus;6_M+p76fY)oU%j-ljcc)5?MSZ4|3&o=OoR@VT~ z`XtN5C*jLaP=@|KIOk(Xfd57{p%Tg;=E&vmd3GIDBxH!lOyWM0-Dvk_D7ns>;~qe{ zA9398z4bKTg}y8dzornt@@seK7&XT+F|rPqQ32x#DcrvDm)(x_p)^Rtp-n2jj&#Jf zhM}e+N`kpRK9H}U_2TQ&i@9s5FS+i!fIVj--cPz=ZzY7+OEB$~G{wzPSr7TLALpjT zKwd}V(PKD6Ff#3JU4!#%NX(NAT%$$bOVsxH5z?Tk>lkB|EzbyJy!U>Y)Onk~8XQA* z7tLSEWSFhI;NQXgm0_h7LxgZr@d&;0a*t$ewv8!Shg9D-Wstv4!EskTRiQPr+p;iY zndQrkKnmWpSp@mrtPS~#3b->__2SY5*2YLW+Ls)CXR-|!oJk6dO%2F{ZE3e3fdv)h zWg??gO>4I$>+c{rtpY~sNgz1WNgVV@v7fVz?~ggh+1BD*`^DpH=n!Wmy#SZg3V`y2 zhi=L^ACZcAOjlrEc%JZ(RXB}OMxlS$vuG|5z@1l$qfE`WEG&>wS~^78)mjq^Z9JLU z#2L#{YgP=`CX!VrYu;X1^Fs#b6lHDW^GKb#T7()6Js0-_5q<$-CapQGzo5-4a;2*B z0rJIOIKomRY|>`MXo!(?XO#FY0sL+L`DdP7do#}(Fe@@fiT(n>P6P;dK8fG`^Yfee zfV=sh^#eBcf6~wEKd(EVM3$I2*k_CZLA!RMl7;l5hrm_L6~(V6Slsl7V?aP-qX-D3 z8BE74P)6yI?Da)3VJOGvhC6)bEQ7MV(>7ko-nwKutcXfB)LCcA)m-^!Z_el~RHRW2 zax0y6A%+pul6~kxB;@c_DA)e09oY!c7ZPB#pIdvq60xchwbHdUj&kFH=~W9}+5Oa( zAr?ec1Z{zfJTN(k8xIUH?&tQu$)uco|^QM-!&K2M*Y@^h0jeW0+-b-H#-^5LDKDTDlNaud#{ z$=eE{E?=CZwrMb}#>Pydy&9OOr^4Nij|8P$18cV?ABgFjGRpre&Z=8;2qJ;L#GvfB z-PK&Dc4*2Nf85m0D=4(zoyVd&^*Y;?z(4rXae|c^_Op@X2qeZLCauEw-lB|{H$@f7 z$Y@hq-A;IG>zFn$a$w(Qj2!gggQ8fE8jU*^FrHu5u8w?J+u>NjW#62ckgtgK|BmQp zP7`T&%Ty$aM7MhxCm2t^ew*p#WttS`GtWzkmxDh)kB!M(nsE;z0WPR)nhLZ9yPA16 z>^#-eT!JE&$O5qDM=*Q!ILv53my;UQ*pgf{tu4}i5}}$YLrMIR|HhbZg3tQA{Mgs6 zc%wwPla^7`y`LqXwTGsc#9AwA_z94g;b<%S;*6_k`ZQH<_%n$8BW>Vav zjmtd$AGKy)>vZ3qJXwwsWAQAl)x=6y^i1zOLS8LCJI2Ia#Y&9@$%Tkm25!vSvhs-TT5Yc;34du^ zQCc^o4RUW~h`)^zYP4RGSUn^4Jo`UAyT9waei!bIDj&or8C;ts*q?%Iu!+-3#>otw zL(GSq(4RpD(=2z0;ypB7%7kLKvtzoHqW-jyLcyK2V6Hn3Q$*7FN@_x#BaUN6S^*GfchiGJC|M}{bxeo00 zpe5_=V;;2&#W<<9TLCjroWa-G|2X;$(9g?wb=@87Mo+9{o$M%sydQJc2Z}C&5=fp= znAvyx`H+w7POcKWyO=hV2(vS!6I+D&)hK1aYc4abcKI|?1sFQZ7y*!y9*%#oH}_?E zwnt}N)O#;3#b{YA^=-9>mU~8CP5lg`6AM72$(er1)r$4XThn9uaSDhGhIZiZWD(#| z-~1`W3Q>Bl3LF?xgdBN}AF&AXyP2kG-na&*=vy9T)%nI`j9qZ+6xnw&bTI)ly4Bk% zl>5QSE_N!_)z=+v5$n%A){}*F4&?fDYumIUSIq2th4V? zMyO_QPX+Qp9l&JMq+?UF_!(Gx6qHL7_SH1K$QmhnbXcH$`12gVi443-+gA?zrP7}P9T`W-DRY5#9=DzOZwXM0U zXj9)jh#cE3Iv?y&QffnqTxQKdUsl2jxtot2_KWV@+Li#fNQ~(4l6y<~011PTEDbD< zMVJWC2bgU%^pf8RFMBty`s}W#iq{e%z!235Tw&mhavgObXX?G24p{>Dg;`bkT?z6q zo7)AQU4uQ;D!z%zAR$?j8N_yfWxvj!@;X1>sv)T8w$#D|>&@~9gKCNpzrUN2H*+1C z@`9tuHI;XaTx+mv$`DsoA|vA?6N*7x*Hoq)hjxQo5T9#JaKPB2T?N#<8H-SyP{otP(?D@9#RKu|uO|==dTr{sohdIJag()9X5bvq6ktFB~ z_9LSrZO}dk@xTB5`Tp;J{~JKumUL>BYli&ipJn;ypMR{Aq$y>z#*ahmGcf*Sj}gW; zM=D9#0%(-zHuT->-5^DD0Tm4SL+vp}QR~iho94NlFf1x4cOavD!Np{&rjAGuO730R z1E&Uu%*#1G7v&a?b+X)5+P}oPjq%%l8jmqT@S_IfqKd_QRwJ(($V5&9GYXRlsU!!_ zWWDRM+S#*+{HuR2bS1Dsn0e3$;NA(Z|Vu zQc*6hcPY%J3-h6=Yf?G2A;L6Ts}4huHRxrg`dr4DFt0A#)8m_Yu0D)TMgwypQ0YUG zb$=J_1rcQ}%LpUiJZLY9PW{!JQakZokg$ptGuIKqU`y5_B)NHoB&Dz0bE95G=#m8( zVn8LAT5`MKuGC?&zzvHQVg5SV=UFR1_CIa|a?vKp%92#}4&#td65_U97&uzIGSG6; zRd5@_jj8Al?XtYL%gdzHE1$rv84x$5H>Di0Ab94sXB;Hf;WJ)gQTs&xCqU;upYiHH z7Dw2nNBzvR|3?_Rix^kB)ltoF>k@D8&KyV-tQae@m%gw|0vcNF-NM5VjmWvytn$Vr zp*Cu3n+{*|M;frvjcNBH#)NJB7}(LmjOK4^Fsjc_>qPdS@FFP^`;WFBRPWd;HZ{@>k>ZtdT54@-<8GV-S~}o){V!Ox=OkU4Ns8a6rzU&T8weLig9gS z-c~wU@=F*uvbG*v3Pg$(utyF<+|2A~8H@L1;Q>c48K zX>=U#tRTONAcy(3Li$Mb?(&(+ST!MJJKSa~42Y(%Im34m#uS0iHdNViuPCpDW*t9jGN3We-;xf!1 zM6X6jGX#vUE7twqd7vSg`V~ie8A41(?;Uo-FvbI>pm;8$3@*O(DqMm+ckw2aG^#s5 zA*_oqMomn86-K?d&ZUjT1C$StrxeTbI%_GG+tp@Ymm3$<-yp{>5}Y4~gq^mCv7%0A zE{<_*qq-!Nje4RCG3wBVz-pHvp0qc7ey+F8LzxOH&P{f|PngKtkB(s(Gy^@j0?4oC z8Rd@`q6QhO-QBc|IB{))1zE@X&pse$&wC+ z-nZefc<;d)xEq&A=H0Q1@r_OojstQG#GHXrhyIv3M)|#)GE1@nS*x4O<_jljmT6EdN7!ovI%Gc4&O!un`^MyidoG!F z4%4NeO)J!vr{FxSK+)o{u%U5O+*?75G9 zI<0lC&$+HG^QhJiQOQX}ujLyQqbk5QCm7ey)v=hEB!n{OUXmh#O!B?J8BGWcvv#xyPD2;3kzwMiv6AwPxdA*eKDa-2} z%HYxkRi6#qaD29FevCu_iF%=kkPq?jTA84~7#n@+l?{Gt_@9@p#1%&5GY33z5RLX``hjL)^)G9 zkK3(NazX0S?wRFPz9uL;AGQq+WiKU)>BOLcMN{sdjz1i>Y8_xH1)vMw*N!KPgFYU^QJZcOl z8k}lio?Dal=+(`+k^>;xz@ym?$0#d}8RFq5T;fgQo@d2Sq4$qFG$z|fSd50j3^N}? z9n2Rx;Vr)0mp6;_wxCUWYVmXbKJm8Sp|&Gk5WPl^F_~azvWIfO6kGu#f{kgI_7KahM;E*+2d(w6_6+XX_uR>;@a3r8)`=^QG~yBYa*V%R*THlx|Nj2oo}tNQ>BN*! zmsBJ^s65n9H0>;>aSZsVhM4$zm4Pgv{BEA@Sq6=gKiz>5Ro{CY>)M({+J=t?CGC!Pz} z9TPpo-WhC6(TFkJm_t*tq$dloA@3>o+(R@sSYvs zv19Eln6;GoFgcht<46P&y0mJbE~MS8sq0Y&`PIzN%kr@q?;|&+`(XOrw+VL>{K+Gj zG@()hQy!!f_{3o)OrLxguG<SvB{2IC*AgH*t|!`uj;)|~r1!HH9|y7~^So`)RlB}Tn``|*iEVr;4wWzy zXeMoZaTO#vaXnem*j1N9$1EwM8@>i)(nk?RK^elQnzk-TnN4usq4pZ_gMcJ)jGxMy zZ8_iPr}_E){d0KlKHrrEsV&Y$1b|uUJ$n=N4raeU>7=8oB_$7ohDe9>5_y?I}7rPlUBZJ4Z9xH<_0qhB3yJ%8cmb|1(ZLH zTvKr7zR0)51#Z)3&(xe1iFe@}OvlM!Xlgh?NrN{V_SqZ4(2hMTbj#V(jq2hxNYxBz1G(wmg zjVH=4_SBrF{kJ)21y{IMS*`Y$8BYK;&&LK^SSwhAdMrh=kiQKUIHTT*>G_a3UcI z69jLjjp+#6p{cp%!SmGK%u1m}L8YKvjdA;yliHu+RNMdHt%Gm@{GKZ-KWpX90 z-wn73O%-|T`oxRCv9tbhYiKkYtXeV7KFA_=S8>i0T|yH!Do9Tjgd9neYz0oAu0ac` zbwy-ECjpdQ5OR!HQMy*SO$n-B<{RCpk64o^do`V+4ib~QA7Z+gvbU8@873%urFm7y ze7$v)y&mVbHH}LnO6%SrMH8U3{%1JosnE+=!1%Ran`VoI^d|(Y`cSPXP6ABLiu*?egw8*b+p@C`iY$?9KF;Xkm<`&4eplK99~KQ5 zPBCQ)2tXm=&6>u3&AWAcN86Fw@Ck zx(1&Ll*P|<`DXXUe^6PG3QTlo@_W5&$|%2@3AFoa+2=E&>nLZ;av#$F(qu-b4ACcy z-EfGKp2-T7%lvvw-umE@(=*vQC$mK`6fmqYoCwycU?xk<{y05fTJ!CZ>}Z-E5`H_y zgXoBgDef>XV*F|5k5rR;>zm5ivJ7%`{PwM~_ZHluz~2xtOY@sA|CdnO$DCS_T!&`e znxAH1!%49L;ROmWn7iDTyoeId|?8dTSql~Lxhq}#eP2fRU`{~Hfc zy59FP%D`ss6#0Bie`w3H2qt+(BdJS{Ak0v~Wz=y^(XlCl1k6{?Zj_K6U08BY%0p5$ z8hozAoH>L}N5?vfO&!dIVu|R}d#K~fIFEeIhciF3OMCP|S=ZB;;4xNIf-A8mpXjtt z7jDij2+;*vc2Osi;T#s~1#vA)G$oYOqcHWKJ0u%svCH%IXz3X(avOQYwDxjR5EmUC z0l*GavIkQ(C2M|teKm2ZB}-v<`C1~6vx7>J0^@nepTS^)gR2H*cG-jFPkNvW;o8>a zgIO_5irK{%p)nPU*kn^i`4jXh_?eMtgJz6*S{Vv+#UR7hleSw_h;Y^WE>mhf1^G?$ zs~(M>_a-!^#F3}aI6g+M$rzM{YEtN|2=3259v(tYu?+HiuVYMGL&g00d6JMTtA{(! z*t+oc>XdMeyI>lJ;q>~VMVP0X^RXj@bQ}!{juxrCJwrdK7x9vdp($eAdO2qoT8cTB zm%G}k;B;0jJfyhXiZ5=S(n?4R}vZ!O@XgHK0+*P z6C}ia4$_9|7Mu<@X9npdzK4ySfM!=adxAngDF#VQITaX-`|`S|`8qm+_Nl?{P7$7; zq2)g3DMmc_K54jZM#WsDmpPRYRxvx0Ihea5>fuRg8>{OeW*bu0p^_nyG4$b@sP^A0 zb?jUC?{-@X8?J{F%M2%4V{2f>b<~*!!PNKjFih*b6Y)^khb9KjprD&y59}D`uVW&) zNs26DjO)LE&Y$wre&~@z93}O4X(fV1C+<)4|Lgu={l+0^M6((65NIIl7wyyB!mqLn z`BT)Ui9=;0OljcAhX&<}2aXl%blELTaK{z&1TLTTO_re_6b2ZNQF_9uB1*vMre!`! zcRh9^VVb_PZKQEB^m$d$Z-rZERcgnV;c^%9SZCH3W#3qu zZ)5kgFekr7!yc0K=?d|*SZ^^JWF$g!*CR(MWF<x#S^exS1Fmq;nXr>BgJSH1CvVl4Bm)U3L*$gyCf*rUM$+Ej`0h%_nT1 z_4p(AzzhjJLm{dJ{ax6cb{)srfV5R0H}^R{Q;G%zAMIv_zI?sYNw*}QCNbQmJAS=y z>jY((;P{}~`6n#0hw8<1l#L0i9^Fxq*9pqTjJ*jKoiiv23X?8+Rw(G|aLykomT$Z9 z^_AKZ!e1uuJ=2x4lW3QsnnseH*|3}1rVR44F>4Zz2N{te37PKh8ja+_4J=yIBX)xb z`*Iy^K#D4m!+rZWW&{e45M|J?i)(uWk><{GW9GS;iOKy8CH14sEzoo4QK6x{Py$}W zu620KwV-Ch)t31{fo{qW1IE{Bp-sGaa9L*IWrnfp&ZGutG*D>Ddz`?uRWN<{xY?4^ zdjEY2v&`8=lVT8JLD41L@SMyg!&wV+yDyUiWH%H4Wth>66FsKyeB1k$J?A+GYm{s~ z$GFz1hmmi}v!?fQ&TwJgg#<2Ma~`c_FiE_s0wxG{slRakv>QL^dYx^pxgg944<&+O zVfo7I?e#u`{Vv&bjJ(U6=*}MJ>n7R$_=Tfa^c6Wqe#|M@r~3;LEjm zk%TfS&~#c*Rs&s@zG~g;u26KLL2?$9Bw%4R8BW7)r7}!@;5~=xr|-MqpmbMHe9Y2- zAewYY!`;M_Nh)jIy=c$$%tuWC5IE(`&@~s+R0>A8;gG875}X-HfoZPUbLTT)4B`=U z8b}DqE^LyhOR=tc7P^VYK!ST{?e?L{w%zgJlxQU;saHPDwh0TAh3O#=7e_7 z(A5@&r9mGuB;HW5&Frr@%0pGAoIZZ}vYK>~7{8qDnkZO_aR?Q4X2>kI^q|8kVf<;r zl{V;bX2L%;Nh2XOBjcIBN+vfu9$=vHkQ7h0st(WzF*@ZDJaG;OQYr7w8SOF8mEVHKXk|^eus`xQRBm zdR?A)Vx*8rB(&DCMRyq=#XJXk2J`0|GINdZpM(3K^eMQ!{cOAZ6?P>z4o)Tps#BIe zwV~7RY5+Gw2qK=6$6Vxu%a;?|#7F_!Hr;n~}a# zHO3sHE7ZrYouQrCL2l;FjGgOyby@Ir?lo=0S5lwhX(YX=VO63meXdf<&2>wNMmhtH zakeu}7GZ!ibeXO_JnzlezCTfSc|6}wb5L~Gn%XF~3`st1gg()}15pmxyR2qjlF23t zbW?FHHOuJ0%Ck#=<7s#5Mdf|vWg;gR1+p>cf{=&@l^N}O>zj(J^MNI=Q6Rz81FAw{ z!IRE+qc#tF`F8)A3Pb&cW{;XK3WJ&pp3Pv)#E_XM=PEHCe~9bB=1p={f~%x0W#g2k zu}2_5tiJ*Ic@5;QAuGfPXt`1F_f!qkman(napcYo`L$-ZUaMi&z2vA~AuHp;gYMat zZZ_Q$7eMY1OA*I5BrdNyNDJL=hdDOT%CRdwBTZ^w6W@^liiZUCtRT>_QL%4&G7APmjwJ9c7RQ>{rR}9;7Z- z=tGH|Qqpo4)8suKwkhLWWxCAKjhXj&R5a{P;#c7@?|e8$Nqecy`w~o3M)}pehv$0u zBv8LPv6gvkrLVdoOmAMy@~0oju$dqs=2Df0Z6tn1I$%0L-~0(J_irZk3xsv1RrX6 z)2F2j(H)VVNgG2p&#z9WvOHYd6qh`6lpHtW&Uvq2_SiN9dV1>SldVXOQ9ejd`J*z@ z1lI0Ve;H_u)n(+^wh*%A!~`zuK9jpRKaVZCIZMIDcb9jcb#>x#m=Y6I{guuhqio${ zD9NZ3%arWh*yb)>)!nJ5=E!i8K#;JW^{fF2Q>^8VzxY4#8jOAD6Q4ve}v&|{j)?IX-1s^$Tb(wvb zL@-r>a(B`B$3Aey&xF-dhX)?5U6PI1I-g!k9bWbl&QDSeRKyHw-_TENr=FCvm9IGM zQ@u?fxe(lXFL(~bWdjd-gLJb$k7!89k_$KI<-v^DSuQYHEi}AP=GB27R&iH}X2l99 z!Mkde_~W7at_D2}!MK8m?!>H#mBHLUM-wxF(tO+!b7BgLp~?J60O}d-9bEYeoVzln z^GR~!pD}Epi%f^HMiAUN)G|OoEwqR0g`m(r3KdX{af6!gROCu={ApgYoN+&M ziyd9@?e2v%bL}Q9fu)#2Hdl36ps8E2cyFn4aE^b{S44ej5Xdab&I5EFrBI7p5? z?A~x0pVv5d2j10)XdmQHV{iYu-(HA7n+#}{sO=!wNM+x%T#cTaQ zR^WuM+BK!q^YioFd<@;k{bTG`N)O)lv(xFbPw3*81GJ2a5%C@TV^&NLo`}`y&Md8W zxUCv*5koE)`G#7o?v(54{J6DFt!XgOBfd;}D15@7PdJOF#6i#UTsuH3aM}r{oFgLi zk%x1R1J8Rm0a9~*zDS!<0Qu7xe;b1@2a;9uHb!RUA@k?XPEgv9JxLg%=mi08Xy+Als(AOW6&& zUhD#~2Mqk1t|*ZOaJy%IR>+ZT2N)_DwK8*-vOTa)f>&&T5U8;?X(DbLBL6=!O}!by z9tqLgL$lTf#8bk-yXSBPAJCat;7c3!H2QD`RfOzilBanvIp3ve z*zDy)BK4GnjCBZ&y-7}e#`!TNlM6Txw1CXIX zCl5|Z?T=7WR_O}L%;`L2Ijw15kVwyhJ|*l-tUGI8&?0RHk*+DDJV<*oCYmk%Pz^~m zwV0#L!eG2(g(&marwA~c%}Luq;_q(1&PnigdSk;qWJ!t@8HRALd&{Z@IwM^cqA8&K z`aUB#jeTi@^#{Z$N00e5LnKJ`PUFh#6i2yvBnzg5G|i4N2bMm5dAWLBPH|@NGbQv* zJ*G97qbYQnSoTcrbt!dHQ|hZzp4&5hemX*0FlV9v)KCrk z@-e5&2y>Ud4S#9N4p2It<50*fLEP@lVM&Mth*W*QHd8+pg#`%PIujOOBuj47B_~Rf z5=OLuVC>=Al2?`8@cIa}Cpkm+`r;Rbe~hJ(J?Q@B8AlM9~KYnojdo-xrqHCxh^X|C~%B*E_=R)EvNdiEH(u(f+)udZS*fnK{ z)9!T6tt?){!XCN1?&V4A=piX;K{XmH&roieLkkE$56{r)%~^TB)6AulCUf3p*8PG0 z(-O#SR%Xd=hF#j5J(T779I9V-G{UMxlqXmmIhjSisJN9tZcs8-@WDiRr|Ux{)98x; zI}&>1*_5Epnd)Uz2DwFvFDqY4za3_*AZLsTE*R7H26-9J^MyH1TZQxJ5&HP>E><)1 zxU$N1NAlH1Y8NLN`;5+cp=Y(jV~EHzG;OQeiOIC3U&3#m=iA6yu|4Kh?A6qH+CnIW zWQW+I>wMA6Ao+#Ky3!V^M#MpRQl^FroG4S^#(_<;Wre{mSIt0-UIO_u33m-iPfF7D zv6*nIa^sLXflG_G(z(+>Mh~yG^E`|AJ#S5w`6FdxutBmvWt8q@F{x=TY%VBfh10e=! zOkwB+gC(5cMXPLN(AMB=wvtlIPr~U6-|1^IKD?)oYN;wL|Jv*W?UcMFy2hF>L$KpW zJ+b3QhI!cQNGd!+Q_2Gz3YtXUjb(3;0xQM3<_IljjGu=XX*4G+`0Fs-)=8(rn(sEz zEb-cOZ9?w7?>tns`E{i7I88KVOCm4x+7IpPTG%gPtf6s2&o(9>!gbVvsLdwER$hH)PVv3EqbRM2+sM)43Sds__3tFFd>FBT4XV zS2&%OR{d#;i8&&0+uZnN$Bt^%C~SasEF3MSl%!QoJBI2@5pOfz2X~{qY%)Z~f4v&O z%~N?0;+4=aPoiO!@p<_=mht^##o9Nw z*Qck~=WwULuYnuBs6zkg+@FvSk*!28lE!p`Cn}l4q=GBr*g=Bua{18j?#C(Fg?GT^ z8}n2{aw!U0u)&)SyOT91=xs8#;sEU-WS;WNW>Z$VAcoqF5vn78YZ>uw4il7p6)^WS z4%4VT{BLED!9g~O9Vr+-Ra2-lCiNSm0ZBsMh0O=+ zj6D=Xm}Qvp)+`iQFVhyxebK~T?a5hUi!Sqbs0QXaLSi4goIRv_9i+Ax4-V^Zxw{00 z{j6ud3^Tl!p5;BH9EMiLg&#b99mno5KLonF=rou3FEb2f(i1BakYD>Czp_LPP2Ixn zSofzV-&D9hpTo?W(0t~CE`<#nN8PB8o0o|)F3~Q^r<@zLm*V%}0H?{MfV*G0O zEfh9o^gqmM%mp}-jFL5|coIvA)vR>F;Ck%-AId6)t5c&7 zKyhQ{Ak8I^L{PB*xn9M>i{1wW;jjg7j%DXEy3_WWA;;{ZkD#L_0trP4E_og7X7+R+$!I=}4$fgW)xstl)`>5U*+qRKm zKYo{aAyvA|^dKn|4#I`+1V2VOVOCd#6KO7VXY0N;omjI&=@{WvLJY@7-7s9lrW<3@ zb3%CxGValSxKA_?6}$x*Ezv>4DdHh?&Y`_eTb`#gS^LMNN^)+@p9XFEX8aWl+By`o z!tX`fv7+^6ch<-sT8cHI2yfpRo0u~%Z`8nyNc5am+G|?5x;dxu3}S^hXri*LpEJPh zY?7T_QGRbwenLL!=ikjhec63vF{kMa1nB+E_qVXAZszv<*M0+)jwp@T@jmDbVcrE| zUM86!!QdO4wrTT~avQh%fV)E8H{KYqB z7H)LYF+W}K5UG>H7H*xFWrc@z<#%|K%Ex@}*v2%tfdY~7Cpd+w!&&}3vNO?dioqcb zIwCH|n&Dg)y&mSE=nyI1L>Io#X4lJtV=)_V!Ye6zYUl0vwV4HPGM=Dwl+=wDpaf@T zy*7UjFa5DK%gNbT5)MZQ9aNaXlY=>&7Bj+ZPv#=qzA1x*W7NX4dXkXm z6^Dr9RXtRaDqCHz6m)nKyJiDpwyD6o zd*bJ9AyAilKn5Z0wEWHf^;VykLE3DesSjSw=8BY_3ojD*iB1A;}0mW^n z67^W1Oh_PvtU^i7+qmbuFH?;d6JN2MMQhy~@X~b2nrCdxx(emqnlJef@EHN@;E(w z4;7g@g#ihsPCVl@0H!gGsw*i;yO)VSx|+c@jDv>v6@U3~j94ZKF!`Fdbuh8@{Im%- zHr<0Mi}P5>9m0zX&sR7>p6$!<p)B)5|=Q8h}NVh10BXUJDRt#z1_xJ*Waj{ zsy)CcQbroxZl?+sYP3$$>y9Cfb?)CPa$2+49Hp2!|Bg{RQ) zW%<+#*qAee*1**F;oczucOTx}=$_w4`>y@yZLmPt2Y$mfS0i%TQcI`CAaMAv;;F6; z6*Dxl&LQq=q7~GK$|z%r2!o+AY!cNclCc|{T^-IP&C$*)wL@B7h#o>(Pyr1(r?Q-WV-e;T>q^ulz-`AHCD0Su?m26pzhr+Rc zNL>L<(ZM^GXiwq{o!X|4P$XSH$GDaS6LOdJGd$V6s$St6PmFzCxZ$mb+$CG^{0VYR z86=)c+tC&2Q?UQ!0L_zv(oD}#QghiO39kbBI!BbVA@d`28EDKKLUM|-NT5qO#@TjN zIKw|gJ2&inmLFbC&rI)-2E&KPOt58B8aAQHin*>L%1l0#pYo_^;4U{tuOWs8bp~D{ zBa%~;Q4~}GO$Q)`o$vAnnhNRiDcdDM9Zbwq9Hz^Xa;_;`Gry8ZT*J-8PD8JT@K*-2 zBC~*5ESj{1CP3nEZkWt{QwI6nO!qm0p`*TvN9LS7Qb|5On4UTB8IL)6n>onu4anWT znWRlNuVZ9{LvCkynJ!M0uShXTN#1Vkv1RsI&X5Q<73zaIjk!Lz<=P<&uUHw3 zFDIr{?4bk~)&}Fx7UK@(_I$#1q&hyGx;&wTrwW16;Z_O<#?FsWc#T+4Nz@qyJ6?z= zjk?m7l0g(5!`KFR5g=#}sfiCX9zQ=(%`0YT!p=OQADKQrQ9AT8VGDzNsR#43Gpv6j z{QDgzU-MN~ie;$q0hN_SXVH|4M2%S>2wTj2?{#MWh6^U#5`HHeG(nlqqEbK!psJXSv{j z(qxJ_bs*yzW|)9Q2Elws}!IEf!Q^qU#lf70)iRC zfNhbDe7X(8)dh?@pNU^4Po!61bP)RXq;Qw;7Wzz^OEky$A`9PMFgw{0`x7_)=<~F1 z%^OWA3^DbGLj=j;{+VDZ_Wu2pAHeemT64j;>HXXJw}&`z{nsncU4+vG41XOpsPm7z z>LSEWPziP*0|GF{;tYgXN5zu42Oxh|kjK5~(%fSW8L>53-f-HU>n}tAv8x+&eW4?`$`^L!4zEM2E695;c&Rw1*I(Hdq7kWu-Qq*c9fqUPSV)Hl5B@ zB81Q+&?^OBag$KO!&R+-d40u;<(CLkfuF=06WeK4Yv%Du+mum0P+YIcag(knnUh2{ zy6yy~uY-xg>g_9Z;xh*OiVWE31kg{>dFxyI)Z!?<1a_eP|AGDWCV(?P3?Jd7C z)20vbqW$9)-=im~ZJ;7OW+^6pwDtVBWQZ97ei7xKSu~3oyquO*hmFWRkXYL)7&pLt z8u;`&`^=Ljj}6?%3btxx_&eBpl*6Rf{Ti6X&tuF);kmTE5d!o8$JPYNK^@GCq@v|9 z9lk}$v4dg_W?<&1I)J|zMgWGJ-tuKE(~TNJCC~BaNf>bmVF|5)fz5?djhzI zbOQ-J&=YQdh40>wx_x__heGSK@3C4bs3{iOa;D{5Ph>i$q6TKw(|FBkn@FL9X9;TP z7JV};b!WoG2+wjZtAdf_@*TPVc|d9%Z)3R6gPY;Oogvq>*o<>YI>{QG`!zN{{R(b^ zJ}03m7XlYFU(~>|LUS6dVnK&AFbTT*;-Yf?3W`JrL(kw5B$y za!M}nkl~DP_6FwH)|#7-N<7UWE2U=%QL#7pJ+Z0_WMTw7jFugtM0UM@pJezb9; z1}E)3{=WA#?4q^2``^EfIftxvAW!jmhbA;<;|rc4=DCjIHa+g|iW&RCxb09**=w6i z!k`c}Xx!%UxyUE1yM|zXox%KRFhf5+?P8rn^z7WHoeFNB%4==`z&*-u`h>F{cT?Gv zg$QHHcpVfN3Sp35cqr_XQPiF8tGFGH5&JeirD2=z~Q9RwQp{#ujXw z3+p~*W|CD855fEJz6pJ5Ml(>ABgPyM+#;=!z)U<5a|Pz+q)&n$I{|Q2E-oxP1hK*{ z^-xgJ{E^T5`p6e4;QVTQ@QLC_en>mz{Qtgs4lvt>_lPF<%3Yd(@ds0fZC`DM5{JB@S< z2PyN5vy5?%a(i6A8Hd-84MAsdZ>AZp;C~v69t^?q?>WD|h(+gftPf<2D<1cWBFjd- zB*@V)lXkgD{{hOWw&4ez)1p;oVpR`{0;G(&Kc8U~<{HK{|JDcZ<9WE@ManSN^422} z0{tP4>9#dtB-H~6qkSXN*G~O~iQf*C7@o+f`w)(q(MQMEPhb4y z#KEjt8XSt)lg0}vnMEf)p_+9cw4=tenYLqc&kin`3Rc7DRD{>Xpl2@JG95Q)*3>De zMqXjhDb>Fa3KfQ|YS27ym22Ek+y)c-6 zD1iK#2)32r$EQOpTU%&Qfx`Pn!XeCGhsx#6gn@DdXaqX33)^>H{$_CGliBWI z%`(WZruVK*0VDNUuUV|L!6+SO8^Q&1xm*>; zAS6`T!mWhX$MCqzIV4kU8WUB(Et5abMn&}tC}maHNF^0yt#OBHrq*PX%=3a3Hl1PG z6;=8fa${CRx3Q!m;L%wXY^qqXB1h@3pgjJ#+%qmK_sqoP=!62Hwmh+=twmYTb9gLe zjT*n!q%WjeUI0*CQuz_CxpazBXsktZ%o(&Rcwi?6Z3u~}j#@hy&a~c3T}2PVwhq9^V=I-1ecyxvULNBhb1G^A_j!Yl7O z7O$(URMC+%TLI@6$V*AT|>;-<`rEU6#20P?eW zo=*t|ZC}Qr-M&aypYxe_&kAZX(@e_2q;&qmQ97=G@ht1F_d!;?lXHU%1_N49!16W@ zo%F|07O4z#sz}moecYID<7nsv#n7nd$Le~#LCzeKhYIOAFzd3@zMTuH0NkuO*h4?$ zYJ)2!5W^`K;N1-GQIeg@1qg87%Vup}qnu?w^|!KwO}7v8beci%wm8gNV+-(!NP^ih zZ_ZVrj6hC%<1*s78e^gnUO-_NL}!lG`Eu!jHeO4G9ZDS9-i39)jYoNq2a6iex(m-` z8-C^QrD;>2lgN+C$LQe1-{+c4zwzb@1K0n1X3Iwld+&pPY&nA%aI!JjiS??4^W5hg z_7?r@RZq%oU^)X(o>nk<4v_@E%f|dABoVo@r#(w3T|Gdt{S_K#xZ~ z6jA;(zsJ_SJ2=Jf^FczfXzZ5EQWg(&wT%9t001o)O|r08)9A)Y?j_l#gz}5nYK^sS z&=VP#?Po@=G0v-Pc(g0%AE+e?!keIDQ^Bd@4X^>H?b6Lz0rPZo?%7k6ANQGK)rH&B z8b~m5m}!@$pyEpb?i;7Hx>N0?1oN}`1^rFG2=0wL z5^a~0b7W3SI@MOou<|m+nXnX7`K63=hm0iuTusj*xRe-5g*>E(L^;3VHv?-mC`&tY zye?b3q-ce%{Sj0NT?HtmV& zASE&{9xxR*rLBVb+;c<~$48cXDJJK(15)@ zkr)%P;fRAK3HmolUoIGH9Sh!EQ$%^5*FISFT3WUM(2_d2=q&f-Y&I)knljA)5z&@q zk_kA&?QWGif$1;8%!)7b>Te#)syVw33VhN-)xje#dJI$im#q09XDueJBzc_Pu)5%e z3$|@a7?D}{&@R@gXHt=^^!ugi<-|Otd$gf>jaD>fO4p{@O>k|NApXQ4-BFM(=Fm4B z?iNAZC~oaNuVt7j9#{qM-GW*Kdz5QqWH=*7WEtl$n1!L%EMFGRS_d($t2h^Vau;0a z@D?#1FB91|ld@n5BmJaAL^U0!;Q~?*CO1ti`ecp67Ox% zikUz`o#QB8hFcxUL}u1ZZAMn1ynK6BWVhojZ>Y^VL~huYt8bjsHFk4W_H^wT&VEfE zp5d$*=_YHMo(O;)5=zOc>r@?MBJilk`Ld@GX*W2FXK9x3=sHY%L%GGAn3JZdufRO? zm#!{*6t{Dre)cxp^etmU*|18QrsBLe^DBAJyDEkwlw}olk}_p_mjn?WNJvtSlv6A@9AexzCf^wkgAmdJezT79F1VcF&sSdCFJ~4)zQ#Jd(yJ zXsnB#iGsGK5++2GUyp91oikK3x-?~I44Qa59&7lG`am7Xol+ome@yTD1GB!f^e-wd zC}MTYhxcjU6vjx3AlDaY;}{M2Nxu?%*0;~ZqP2xf?83E_T$(;>0BGB$4DzSJZIL6K zIeFdgf>%23AqdvJf;d;pXI*lbQdYr45p{On4mxGwalrRF7*p{CjZYlT$?Yg%{K|5N8#iv)?#;?5-P2nc zyweZ@lDX zgm^Y7tZD?pOh()0e?|pdYt{inf`+{a(gY`Fj(RJ2}h*p{s z%ttxQcE{PZdtj5!FB8IH16?A; z?P?Rd((P1bRmec&6UXhxr4F}~O}_Lrty;FikZ=iz#Y_r!eVA+w+W!$6=y|txn74;C zmG%7WCuT)TvD&H?U}5*1cBo!OvIEy?2Wn_1m}Rpk-!&D0|0j6Bqgx7j2lUvWeyF%% zGwIMunEf2v;SLz$_uj=}7|QC|2RDH3Nm~a!{acvJ`eP=yZux79a#;S&tX`G7k1|r% zT6zT(i|uXB4D5YV0XJfOn~zK&s7aqd2^+;Em+MO89hI)BI#X|NPZ8aSg#GZk zbWgi|oOSK}Sbh*CX=sQl&&-)sKDrn^ovr)K2Tl4vHl}bW8ICEBqj{(*mI#A8_*^ca&{lZ6W^n_k{jAMgXpBd9;Pd`N?1(f)Mo~Xe`VX~ROa=ci{5kD=xR$ z@QMvOMfb}Lm)t2SUH0~8xdK6BTpDd*JS>Sw*Ib0I0gC^_u}N3ZV9a~FJVrieol#W8 zw653OMU^{ue#OzpSn1%Y6;xwO_FJd_u{LEzuBq)DCO409PSCm7L&`$Vv`Pr~v* z1F%D2qI|gb!NtLzrj0di70M8PzOPMTIibm8?KhM*F&Oz28lz|O%$h{pSYb_c{ti>I z`)zzNYkA}#y;Gr|gK7h)5K&gd7p`xe0gXf2RXBIAq~leuEjLKg;C8q~EsLBc-l}C@ zS2PtTKHT*fE%O*YSt_5}Po6Rdb0){(tNIv*=_lqx zSo^j)MU{0$xb!XRVUnrjW*}u}t3bk9V)vGh|MqMYq!?G+8tF@hC4D!+1h6 zoKyxmG3L(<^WgUe8}4ni*c~pZ%1`gmO>1AU;5rSUnwh^tjpr1e1~%C@gwmV}5>?q~<1n>9IyT zMRRc-6ku2z8;T8SeSw1abCluQyp6Y)(Y;N1PzO=*9Bp>gO_8W<(GU$#!(W}_Ff)Nc zgvAA0B6zdbf9Q_1X@-xU`El!akIZ+_7z#>C;UFRVwZhD2-DOio`Po4GqoA*HX&7=U zB%5Hv;1h#fiSNuz38Jfk_}y&$bpB~0{lWE#O;24y%1m<4yz zA2ms0^~vb-ku7C~lydZrH2Ran_q`E(Yvx`Y@z)gXdH;~|!))Fk&%V#hDRbnNGJGX zoJ{DzNZG-lX3v;r?|$EUaZuA}iE!Nu=4-0WP*2&EWr5pAY#FPaXh!dqCe`$g!IUY? zSyl1`Bt|v|RJwY~A%r`GhGKfm7NY3c85{l`m&=_KE4&~EY~N%2ab5Gg#2vFq=O}-g zIfF1%k0Dese~~pMb&3c~FRd36)Lv|wn%UwchEH^*UMvhl(#LQ#bf{`%gOkv&3K7FCF? z$xtEcg3kd79digX4xW@@+EIl+E-=^Bk#ytzQ4sTjjtY+{bsh%6gFDCgiIj{^7P`p;`HnTF?ar1n3&d=4+7LbZ$h^#`^khND@ zo#}qGP0^C2`t{JiZo=Ox&QT|b#za$c6Gv#ZWv(&lnlj1=vyYpTh{={h@Oqve8XzY! zHGv+h@Dh2M;7k);Z8gs6xc9?$x?^Y6t46aSWYIhHDVvsGNxHE~gfUeIPBF%;dqC&m~>di#!HNe7c4rl2846(mF$^vk<~spb-bTOHf4~zs?2|X z{Ehj?I0LzkK`tnF-}Nm@L$b^Yqp+5xV#8g^h~#O57Ew;=_wF1{?L!D3F1`%yu8X%lR;eaCv#kdMoG<{( zshrDT!#+rzroAd$zG-)}B0IK4_g}9JiG#6Az=Lu+0ZXaH8m#Fj5AE*TJ-c5Z-e4uK zl*;YF%r3zSVoi><+O7v5JHs|PR$aUdVp0q;5+P>lVzzeK1h&Of6*gw8C|wy6lWag{~6XuV#E}+qbuFaSgg~#kMi5o_~~^wg@-!Q#)c`L|(U( zA^}VBL3$yybzrv(pI^6^U74llL16F~?U$+31-vH3OP6g0+yjoLxyNa6!}UjO(F&E+ zBig4}C1LG=)vepM47HR@8%F;C*2uycrvf^K{$Q2?3cDUyC9rz-Fhs2M`J%?B4c5o)gLq zD*WC@;$`1CceD9zd%ka%EQa*Z84H`2cZ*u0-i7-u7oWBjO%W%nliD*9(d!$5r0_K5 z{av_gL0He;F$eN;o4^&35v>w!{Ctc(cMzX`VVi3F$Dqyuf!HW0xvROGuk}UA!&c(G`t;nDn%DaKS04ZU~ zUofALMH6wsDMb2@XvUrP@PE2jW4pMCvcUqfyoWHU$*2bAJ;IDs@Om<1$0?uDov~*MRW*Fu&ptrxDjizuU2cMu9*psn_G%?S?uf#2C?j;!HnhGBB$s&Z!3uY zqX0AhAJF9ty;DQYr1vS$N@EVY8#QYoo-Av3(!+5FfTuH@{jg;HQwj4AZzB z+-ec~$xUy9vofgtUb8cx|Cv4kh1>J<0?4UdOD~eY5B_wTu7I zs&^t7QcyQBhnXq)(L8YJ#5ecNdgo^-8?{mujeC85-yg3dC|vJ2=@Txd3(n)5EuXkGxnAf6}XHp z+|@zcUF2l#FHfqjBxIV}$F1i3a$BSs9-FpshBBYHL-4s(YQrVY|%Y_StnNPm7d z^Qtp*$+S(UuOZU8qN6mQcp7O5&L|qmHIGTrscq7ev^+(h$A*1=4eru-7f})xK8ocL2LOrKTx!}}2ke5IEb$UKelMjXVbJ5w zAbxHSQIlUsw}Ge$mY?b)Cn*(iOnoFGWDc(-#LXpdC3czmDRk$G=X_H;fjf6&Ptnl7 zKQL;Akp|*;upW)iEPl^PZ!r;8zKsC0ojuMtmCToYM9^`0#y|Q;)`$BYcPBgFeuvki z2IN39(-+Qn9jO3}tx4a&0*4GmnLr<}$GESB7pklVW6dV3QU zK~iBnEcts(&;EB91|uVyAw`W&ffmgeX3WedhTL2UWvOB}Y{zZNlkzihTNeU385W#B z;weAtq##?s_}#>IM+2D(Pdn6v@1k7m^bz|9yyy=b3V~V;=JmDq<=QNdBvo_pcrPG7 zrxu^4d+qyII}Z2W$EY`WnJ<0tXI_VXO3PAS@$^a(J5#FjEIRs2Jt>jpfD@I4l2z$a zaCxE$yAt%3aHTk05dbhtr^TM31(r*KMNz& z!(O_piXN_-F4=c z*p?mKr55G{_e9H9d^}>w4Nd(@y+)W$WFnKcsWIjRCdj$LU&d?EZ(|*j=YKEN?U&F>1Un30}GTV2G-}0hM zPf}Nd_nZhR9{o#?(L@c7Zbi(30w{Cn#AaIcF^qrbKe&9wPfhHkJSQ;BWDils>O-Pcf2-mV#^xmIF^Au#=OB>Zutc-vh19w zgt_-w;G$>Yv0I7qUA@Qs>ACYEl2C*6+{$OY;vIUFwOGKFvkD|gp5ID&S=~1kS5kv% zbjlhnRGVh`vb*Z#dG86QMYc1%HNoxVCn(J#PHey>l;_smL3z69a=wc?T<{pG>}+yh zrZEOZz{R`07+gL~EF__B-J1}vd2KmN-#D4%kjf9xXY>EC&p3uJ|BKN5ug6T8wj!bV z3|V5SlUBZ!Awp{{8(m9*B>>{lsO$xx>O<9V4Q|-B7k1ojzv?%81AA!pspMb zThBIqNV_+tj2I2iZCflhG_4)O*r7d|1(mVra)$7S5=gH&&%=DG2fG>n%%=Ny_hmO# zuiSii0!Zjef>D<*x6B@LQw1``lTZ2W)Li6Qu0tk-x>^%0wQ4(NOPw|xikN^s1^I>6 zvt+gR(}x!bQFL<3TS+=u!nE9;o9CwjXz9W<*Ir41FgaSJ4k)euv8logK5- zM1_+n(rH7TnA0+M1HGHQkzfyF0|mgzNf(26-;~Wcmn~x#?EbKPezl*SAH5F4#fbt8 z!zHsX4rH|*ci&u7L^%yUP#S^M(Q52@_Dl*+1Pk(C4SJ@q9at43Y}D}E^&pYXTmfcy z&y!roKrBe{4Efju?|m-fvyXJzp?(d5B7>!cl2gb1cBr*>S$I*#IZR_M<47d1vDng% zBQ>9n%5EMaapn&%G&8Xds1&q1xiP!+RYS}}gJT%`w+7*$Q>q$(sm=mI_BlI6ivc{3kQdkN)Fqhl{m zpJQX6>qLQTX;n}g8oJ8`*uy5M9X&-8R2sf4z?@IJh;O>5UzPWxH%6Fnv-t%lvd^W{ zzO?J=o}})=FO2)wrL?8gzDc9ErwCCHx{tptq6>r?_O42%G$|dMIZSAGy$&#`b|v`5 zsnYKJI3j$GGJN^^`Z~#Ttx|F%G}zO z%J10F36fpici|E>k~?+E=jp?WZQA7ZRD2XGy=+o*6|d(!9FAoNJ-K4$+)%@kfL!S? zHM2s;8Qi7B&oB(IvnmGnhK3m<4l`k*&`_;?S$ zo7sat#@*MwP7{UnF;HIB2syY40e~Xhm_0ipEe51hoIlO;^YiWH<>mh8`Q>^3+@GJG z|Ab$|Uw{5QKmDnRh?C~>S==;u$!OhHEiV*kvs1n-8T5ym`I{bDztWN4Y|}d1<{H-w zvCsBE+f=~Cw2b$WV`KAi^yf7}`$a1wUQQv9>#azRluu`FRTa!>JG#7fZ9aNSerca= zRav7T2Q5DIq6Oz5*wwR)h{-M|Nm|^cwQufzN!U!XGo@9p=te{0GdOn$-O=Z#SKlt6 zG@AK#H4xpi8};wel1$G)RJQhYTv{hV#F_ebruNVq8n*QwK)D+-*>as*?`nX4H~Ycx zsWofI#uRFpqUTCT4kYjt@-leyN@pllGF@L`$~_W@#gYcPVwlFvZqQoYRScGZq6FMe#?LTiK=(Xt z%kLjp58iUXIyueXUtiw@`wQQux%gkf=|GRYwc{WA(szIQo9zFz6Fx5w{^h}`T>tHi z&!KIqEJP<)Ae%LHoy1mMl8G~PSRw$UYGR&we93^<%e3H0U_p=BA@nXGL|isGV~cd% z)0lrAm`SDiKQwNktRLQNzT48einmP}<$hfy?d7=aNltH{M1(6W7QE%Rbyn7wqAA1t zY$)STx8Pu7r}%w{Bfr)YuK;I2n7_+L@}zP)NcI|y5=xPB9lHs0ZHcIveGW4wwvs7N zsI;UP;pL9kc@iDOo@nM+XPAI8`~#LicgheJK!zXh>IQ8tai~>a$QANA5kW46Yc1S6 zvu{+XWVKEC$4_NSh&T+COvt10it*1mV+8tfhtyUElleVul~tROUDr#h05_EyennT~ z?SY_-lm-xwhkL1WyxqvS#xv4nNg2)3(lcYm3#wP7JD|hH?<^5W3(L4FRFW zv0vrJ6N5eyf{{55;;*psf(PC(`)cyRh({rS%^wu`2)6AZkljNq(@d8q00p!Ott3uX?s(e(+Jvv)VG_4sm@9KO6!;ekkExr4s5rta}| z54iMxA|kOO+!JCp(hD5lhxh%vzG8F$FiE|^vf#*xP42D6s#F#VyZ`?5JSrA$WZ~|v zW$Qf9TzG!nvk@{I7!p;G8*{h!yI2RS@8J)4pnfJE@|$dSc`$I^wct=dD>R}@232mR z>MM;K0w!bbCZO9i`+Yz{tTR72h;T$ z9sxK2;o?#({&ZcM;esSag-P)@uILs|rCT8n4>!P5N2i2hlhUlSJSXXcQgIpQ;T3SS zY#2c?k*^1LAA8Xqpt?JkB zNP`hAH8MQDd`JU0Ez94@3ox@SI(=fXD33^LL_))QJL>xh=Sj91$h2I25u~RvQ7X6? zYOd*eu(O(nx)jg=Vhqtl2iIgh>^|{IVv4bd@`ngs4y?%=_=7K$4_cR>%|$cPg7y#+ zm-fW0ZJk+P!0(gr*;_xpXzyRf;MRoo&&1hGnL(6VJ0+A%X-bD&c zNkLM3r)FxV&Sx)YhmNlj%FiZNkL+MqxcPhZVEdE>%ZunB6l0{tXaNEBZOau++olZi zs3mPZ^-B|KeYtBFr05vRwIulO@~8Q${7FiJvpTv2moh>$&rY!4kaOueJ248~gD$g& zJak0A=#7vhuEp*|pVFRDOMV&W;n=TE-)#6>dxb_*=+Q9K$}FQ8 zDYXlLgJ;rNYnE=4zE8VPx(8cNG_@HzYDO@kM%qX_ zNB||Zry;o>_dB&K27u?fo0Gja&`fvCpc!#w@Y`zashytQm`Zb2XA{xbf$lgGq>>D1 z)^R2^+7=2pzrTLIL*U!r;QT_uC(F&NQ^;wbsXnkQZYqy%Gvw*GMxAdrWY9&!F0y22 zY0o?}Qzk*1d^bg!1i^jpm|b&^hQ?@yv)X1FP7nu@o;F})E2ZEt3L4tqW*^vr;VEXmUYd~7QQ81 z;1)rpXDHL8W=A@dg=3y1H5~^x#HJ;;IpP&4=Sv<%VmNJWEBJzM8E1+xw<)d#rW>TA z!9jQRWd}=&LQ^TiNGv%k8F`WSKU1mj9OTz$vs~{pEBW?wos7bJN*BryA~{lbU2dWb zR5mEf|%mR*o7eIfW!-`>XdG{u=dm^{OY*eBee7|r2DST9_X z?r6KFWYy1@?r1Y5igZvooGLbp*~sp!l^$c zD_A={L1;=`^~!a8&9h+~TLZH=Z@Y1lpc zyU7%v`Em?=v?1A^3$4ttZ`M^f^+dYZo}f~3B8mkTrJL1&Fi zC$qN_8Dimr`|jGXIr&?eQ4c{}zKF8K)V4!xT9IBL6%t9-l&D+@cG4w9aC}PWag@Wn z;I+owa_%ZJrtF|CmKZZ-*d~vz0J6x=w#Az0F*+rzQm6yVgBpCMU=6Wu;mQvm~K=e3AaS3H&;p}K4(XhOp=QfzN- zG(KuT9y?;fL}iHcp4(#AbMK>=`jf^<^` z`PnQl;*jvraLZmkLF6A{qy+rm!7*m-2tGuSTw@sp^diXRK4J|CtoP6byZT?d_tA;w z%tWG(ZYgC*+LA4J#S93@VISdz*hg0=jP{X+ir=4?0`jm8tT%!`gGCAz9 zm0+wpJQ6piGFWVi)fi5j2fk&PKdhT;0JBSX@8Xg1xADDycXoWwQ-`%Up$8n*7`vq2 z+PO_my9l!=*|kk^)==ojaU#%*DF`iFULfd_pfYwv7h!%j$}7*$wBe`ydGDB44bBrM zmX2yj=8?nf2Q5SmKk zJRX@?4mpQln{bbGc^ZyS-)aOx4&8CAmZ7pnGvAbrNiz`B{7CF544#J{`eFLL_vqe8 z7rkn*qyu?AZY6_hQSKZm%^x62zZWq6AWIxD#@sCv4L{CrK0H105l&EDor z31u!hqRvMc8Lk9?mFfhhsDqie`t*r6tT*dj@tFIlM&BU-3T}J11-X<2sz4UWE%)>{ zt73DS%R*Mu2L~=G3Z>ShXR^YYaqucmV@HXL6o;OStl?~T- zGT0KOOD5#uQ-&@gUac}^mpH%EH?Zv;Z&CMtjh1a!NPE53b>4gyNjOh2f~lZ~wAJY; zy57_k%l#+rd{f*FrKgW-jY{L1jUgx z6Qa#v(ZnIvnM2&xfGmq2bys;GrX(CV>5(K18#!6;T*i_jwF1a|R4{`#Vu%>Yvh^ZNiM|0EYu zyzw`bSmn!z0$-?e1qV*h6ce*%KEdRhvM~e9QlNHpXP&P(2!${Q3eTKbGNU_e>maV5 zgbX2m>)~rYNWzI3en%W5)^k%P4aAYCrwH?_iAh(j*Cj5U@mc~_1AR!LsWjkul|K?Q z@+sg7py3+*-s^(&tAQikOpjf5J#|Q4k|mM5UTx84k!8`WdzX+8e+uXiK&(6`b+4#R zPc6DqOD0fa$cOXbk?Z*Qn$6jS{08UG4$j=m+_~=0-yyT(6i+fE#2kt^n0X%I^jV+! z?iCjjPI)*a%LwhNEpq62)KsT6`lFnrCP)NA>(0#j+rvM!d}&s!c~%>!E1^%77A17= zxmt(M$xU0;R9)@ELKn7K@nLssmTLp#Y+AHpBUFR32s38dkI*9J!9jlRfc$JyWbZS$ ziJHr(_x^r6QtdGav8G)!GY)}dUt zC{B-?hC}9<<9)O$<6f<3$DbtQFmls@COGlq?M{iH9_D_)VaH7aQca0~L~D`IZoxal zlG-+lHHc=rbc_h2J$fwlIG7`*gwrWN1KJ4_o**48>Dl;2*e*su z5g#|W*TR%I-9mceLt*yJ=#`}`!9@SmYEC_o;#l?*Ki+=XTn*MI-4p((T}BCLfl{-~ za=bH$;d%P!E8g{k&POTh@-pZhhwhdMfe>#sb0WWlDjvzay<>Hhs86ALzumJ`c1YR|rhPUX7dCMCl#WK9n98!$h=<{$ig(aS@P9c|yc z@zwNWYovO89&fL{dwp^DKewejch;;lDSd-Uo%Uqb2Hk}^Y)q|LIO%dZRn?Tu8KPdN z#Y}#)L=|ac_NC3a)cL8RW_rzsx&ua+guJc_BSzHbK>?_vi16n#%WaEIzVQ5vPpMbT zei@P?mq};4%M-*TxmExfEZOJnTN~!&3zl38EMUnF5s(H|{RX39w`B#!pJuoxSCB?8 zSD(GlfD{sB5FxtekU;Dc3_-yQt@cV5JZ?esX44S(ni8Tq-nSvqV#$CLb{F^ z4MD+V7|^`bQZnb(ha1xG>CelrC2@L3Y6PQE0}ZNlC16GR-@=_+E!UNSV=~`Sm?-eyy$h8cbOU8naS{ zhZsIfY{u@63X3kEKKTZPyuqqGJikEgY!R!GI5=PWTw5xV7N5ghaNfX$@jA{SrRCN2 zN7xK$)CwcXAyhZXvS+hF!R0K1jGE4N&mW$CXs&z4k#n&l2i`(lsACRia5+qu<`Iri zelIA)o#aOkU1u}t49sWU`vYSNmzG)flrScWo{pX&V#ep@Ogs=_{UOdzj`Qr(QxLqL zbuY!g3Wwda;TIlQ<{a+&8pxQkj{LNKl!I4bchDikzS`%$^lXNU(@-CmJ3=|tQvaDL zch67N2nKbzI|(t%@OO5}9iMZ}L)W>4^Rsc`BYn!7ozZAtHm8Z|?FchKi7y?iVfb8R z-c`XggP-+P=TG!P?U6mJ?jx!R$T);5NGahG@a@;2`J#T%FvE#%&YtqHPc z5j=RudnWw0>A4Ey(P!4W%zLxTjit{02PjQA;kpGK{fbhwZ6+Egnlj3R6V$Vn&ztuu zD*aQ3A%@m08g-ZTCeGSC0xpBZ)QuXFCvscBcz_C$h6bbwmx2Hf%6iHJ&U9Fj=n;xA zKN~Ci!BQc$A7Ab2d-TDW_kIUwMRi*IC#*M|cTbW$9S_=aptU#;_ofA8k`B6@>!Qtx z9!S-D(2$$%7-y{gUch87m#dT@>gJefmGer;$B z3l;w`f0S6Cpclk1K!-KR#mzj9PoQ?eL9Ee576OdHJ7C&7~`uv~m$p zmxh-+K>vMU&6O43n7^iPqI>M^zKfI+6SrAbfrbG+$hZWhV%fc<8kbB~%BH;NUyXI0 zHNkeC0&^1FHA4(7;m5si&XFh%CnJ9-(QN1(G=qE!GQB)a6-ZPh+mkai9*zALog@)! z{a~=lFkRIE9qfT?6eCQ#nXRlSU(u};ZckKPz*MVja?R(W6Cq+R;-nt!mn8@? z`0{jKDyWEqXGt^d!a?fLo1pp9=OVV@buYpE{2`IUroUr8H7@ka@g8_&1{ld%jNN%H ziO7%2|H!B{S{BpS51#X9RUNv}~l>X8-dEZ5rMpRAB)-0^|4$P_o zCXrB}FD1O2iC*azrzEMc63+VuR+5+9vV7+qoVj|(5*Q&A4&j@INd)DeHW1Ko{OwnupxKBa}OJjvnv}rr7|D~tTo3>Wle_4D#KS#)( z(&L^MT6H?uv?RIz&sP^LC!(r@P;IZINVaH3TdSH&My*}?N)A(uXn>ReaZ<;(C95tUIXG8S$o!P4MR^0n@o*3}+9a22PQY#t&YNH`R()W8_Uf%+{sRP(w-D zj-C3Pm|R!lyt+SYQ*~iu2FFgD-lUOSjx8N*0(DD~oDX+I!0K#* zQ|0z7!#qoU#v!7O6LR{(%a#5NEyj+lnPfy{a@UjWSsCTmtWQ1ZzAgdXaB|>JH>jFp zBtvZ4$jR$2@k9bJ)>b22SA(-ar7&!%6!uYe5oNc|6(~wEVLDU`#ja18HEmC%UHJ3Z zngJ(F`0K(k>duIE*Lu4_JBSTW64JCP-Jc1u3lRb*=KS6T`k_AX=ZE|F@c!fW;qLxJ z`1_lDGrz-6?mnlD+!yU>xk0IKyQ3MW&Vnnn;|9(3(@CECbaMjdKy+Pwx+x#to`PTV zvQxx;*XhIs4M3G)8`M|fJYDi|HNC!O6l3C)J72719}+WJ8YN99InI<+8zq?{$}?j6 z3Fzit+RrvjIvGN-;8+v?sz>HdY=N#Lm{maW{Q+8U=PiIvyWlA0>}{vg9+EvxT~Bm< z7|U}mfgD^CPO|Nq3dGH@^Kr{p=g@4F+IIKOpDrok*7#V{7_MwdSUJO~^S7ypbARRW zHHrajIIQ<|>1UjVlP-g;z5{ct=dV+jQx{NNS6ctbZM#3sF82?A*~g0k6LAFN%I~e_ z*FH#pI`)i0W~#hTzhVW1wf*gJ8C*y;vMzbmgRjCM8c9!87KX?gpbu1JtNOu6HdcmL z%7y#XW@~TLS#fOAuc)Dcc<#ue@6X}-`}Ou`2+wJULLxaQ&MXRzM@VP><$Nl0kqd_I zokGx@r0}|gFN+@hiiQ7doLY}h-BA0f=gh{*1QnGqdyn2pzVv=mtSRE$_wW~j{lP-U z;KI^N#^k2pK!GQXnG^HFCVlRU5Q8->Bj;R>{V+{D#-C;&tLS(m&7g$>UN})IDa;0> zmQ5J7whAc8Y9D=^wLXtS>Lf%#{9;`1@g}v3MRh>gCNEMA%FMQ+-4`lrml2~bFrnSmMevB>ds{-b5g1bl%s!N z-yu8hk)~xR&oU%3Q6bW**N~X&M20g{8>_-;y2)ee(bixG#uy)Gw6CnXTsx^v%#jEh z@!*@&=Xu65%2JO*b-=`7#(b!z8i9TgfQ7FGzIbpNu!{UvEF`vM#u9jMfo+O{A^UX za+G+s%07p^>BA1i(`4mv$IJw6&)rvOmf%g3;Hi3!L_e+7G<&ojBAs5Ke%swU_C-2cX)e<&QI*pPxzGA;BU@5J3{!9IUh)a7o23)VTq|yRAzewEMWu zZ-|lo4SA8l`>yL+Z(4m7=EO<`7Dv~%q{+%xJ?-=>V<WA0M z@i2LFCK3QCsZKD;dW=_0S#i$epbQ#iR66OoGI zvJbFCOo5v~vmK^kJDCGbgxuOHpifKWqvk*Vfl2395l13V0c{J}M#CO6jNik32ZiL4JzDB;GPZdsN%k}-C;o}!acRCfn_%=2z7%$)aDXkIw%Y$Rm7a= zQ^}^ns!fkB7u|pGoNe}-2Pe(LE#HCO)uV?*CtG#XFjj-~(4I$$`ue6@$U8>TAhzyF zzgX;BFYnPZY29XyN|*H=P3*MBXr~Q8l}Y-Wa_&@JgY*z6V=jDzo<&pT1B&7ZNO-e# zc-5Hr8?l%@ZqEs2zg_jJDx!i|j3V+NC?+N#lQd5EPe5|GSH~vF`GmM*Poz;QNjBN7 zl6dj*tVla-hEvKk2vbEiT#KZjC|!Gu6MSor;$*whjf1%A(@j^BMkl!Np7No5)jf)` zNvBv5X1?khhO6zWm$&B37X=|oP{?@hi8z@EvrjyLTa4c;#?Q~3BzgMWlHcvSbq9sm zwPQf`)`wdY)7(6i3pg451=WiSrUXo35vJW^iNkl8p`#80K@E=9TBWXzeqcg6#Ksn4 z9O>MY%R1_96~yhX^b1e2)jO7~2#S{0>ZoVq(^^7Ue9Kxb=@BZyJXrB$Z$2{LU$kE&C#MU7bcM#(csP z8Db7lBqwcFF6&cotAI|0!zH^g>~;lSo<5B>wa~fONHCb>MGsKf?u;!rWs|0m+-%qB zSGUDk&lP2;mBFX&D{eE%S+?d+)7^(Q{4vm9;iKnq3XdOXt4(_~FpBNpni3!y)5l&!qVPJ(LE3RwY5Ean7C6QhbZbc1BrUbhZA(ZMFi%hW4$KO*``8*1 z&b!8RbkcI+vSxZoC659~v|KEn^Ol z+mmaA&xyK@2%<1F#Z2i%dLeaDJ@FLcCz`0|%jlJ^Ogoq@Df(4vmYL29On~0CpmgR) zGtv{9Jgfjp68Cl(j-Sm(E>0rp5D@qzux!P(wj1ZGuiFxbugwb|Hd znY3^UGr%R%DZHJE=M?-_IJ1nC_UE6`-A11`GaR0x+SlFU7zqp{3C`4?BL$Op4N$z0 z%PW~amxGI(hF5{ku{xycR2$y+BuSUS6m>B740!H@RI&=7$sepd;my?i_5^7mZ~qgb zDZ_-aYdiF}k!VEhKHMzQsGyM1QToptur#z8$mEJ|t3ZY;z3<1F)n>O4cePLiCq7Od zT68G(Dmz6sRlSe;2j?)us=v8+vg1aG^oS0yZVwaV zCuu3h#TNMW=5djV5Tg@+u04!`kK24P9qG^{peMjscA)S1Mj8SfhS z;Yt=QwFT{DjVn5=W>SHG(oqMs;#)M=LMIu`0?bcT30F<6__5iym+qxy2p(OE0!1z9 z6KkflYi(PD@*uyRsoiAutde+@NW)7{oPfse+Y&kW8dIJx`Oem?tGH2KAsirY(Ln3d zy&1eYKB*6F%Ca#T0rg50w)|{|CG2#~H7mJV(ny9ZX+q2)B1_oPTcglgQG$v8>8pyB zrSE&6HG>ocewVzbq|4rE3<(Lo19anTlCr4)+~AF;$!CSvQ;q#|cr(LUw{O<_KD?g3 zY79EN?+rDsh11kxb~kzCO?Kv0Q(@KyETDVrF^^?J%-UvQVlJdb<=rQjeI|g?wN1tH zH#+H983vwSoW5M{t(x)=L`v(Wz8C^9Of*GPj-e*PNBvUD{U}d_3ag=_Ef# zi30U2x~OaJCd{V+&>GRf<2`zwklY}k-k*et|Kd5tT4($my66>GmgIl%;f zhfC^aYR%L_&fC)nyi0-*AvrA%U7{6Rbhf6=&(Y!DteBUfAOzJIN#dFq(`T2ckbJ~+ zn%yh|T@(@q9Sk{|j}MV6C|{qLL6S+4DwjhoHX$zs&<%r7jYSs^c^-${OmIF%9EBQe z*q6PrUApdRJx*GYXv2^`6|=mXL`2AL{{1)$+-^PpexA*?sv zjKkZ)CqRwUqq|J)PnGv+gTl>gyX3&9Nl+=9nw!3GcW&RMN_jittkvX=2*tyY3?>s6 zdK~7?s@I4^dQ0&!#(TndT(c*@3=fgw_fpr)D8FWuKN(7RBPeDnJ%r%>_BLwcy1^;r z9)>$*zFnRX6Im;Iw{1m_PCbepA&3Y4UV6`)JVjr$W}%VNkG=PoItAqF)JLzWL#K^T z?)jY8%r|A2SodKnzS=sr#fnM3$p>*@*#wh>%M5DM)JRtewD6JFuchUl4{!x|%@Tv= zyk@GIP<|ew3=98zRG^8Il1QJ!j#SwdYV(7X4$Fu=p3PYdRtCS$K>y0_R{%)bOuQcRzo|E-S%cG^s7w|64(R`TC3nRF_bc^$F?;8evT1MimxGhLuqC> znmyWX(Q;?%6o3TB9&8eV!^|^`F_tT#)Zr%gIvJ1a^>6~xR5jd$RhQre#0&x$MEDJw z@@$416n%9ippj_v?TCV94{1f~{5%2=xhBk?bbJ790~CFq6+nT+pU)yZ)Qj#2C^8P< zrRR~SOw)|WgO_oBtsMgcXV@L$VIK#%^TyXQ{Y$j_qGk*h{bcd;zA)dEkwOotvuGLE zBl@2{m$vED&LX1hS#X}XaKZ*8Bz!7B4(*Uf98*in(ZW|;Od!1Qikg^2GIk_n-oWID zX!8sr@?k@Fqsm+#1#;ZZwIy z$KHdbDeG0Wb^YF!W;-+#Q@BoBZd&SQDg(Z*2wlkAPU~-CWx=33J-PAo=TkNnR&Dy* z#-4dX@1Y44PwjwUlR#MK`FZNvy-D7Rz=#&=lHoThjzSFw;0L(ZY*~PfdIcmE*0V-`J(0*0Msu2)16|+64FR1Y9 zgHk^^O*t{FtHikffmOS)AN=4hL?V2oOH#wX*KMEVMY~+Xt8FUwX!CK4#Kjom8a_VE zm`!$VC%THFvB-v_=-m^?YRfTZE=h|foboN|=W~vxKnj~S&B_m)A4-L5WJfSMkrY0= zjsEUKm;JKlK>vy``O>yr$Znzxi6yt04I7GXGb2^LkqX`P!;Up8C%M9shiDG0rHT{i zi!Q5wP|tq>;zTLoPp&Xsd|!YJH|=VkTp03bSUn3yhHs&x|i+I>oj4W9D3W+^i1l3LWzXkNA3XS z7eo02B{bC&O9TvfAMaxPHQL+FzgjU0`((%W%w44~u);gJ%O@6EJ7 zeVXnm8gu%@(0KCaSGPV~Nm(JjrcW7!2bg`2+(AeuuvJ%ivOzXq1(awLx_4-aGieb2 zN?*f!^n~B+ulk&0lE*FTTyc4QFEgMdxjX^=J^>2)h~Y(#FPzINWwbj7x<`627mp>V zt-wld5|ehDz_@JD&!(qV`uTwSGR=3D%W2Sg1wH{3xrO5f-4;D+V1m9-A26ps2y=?y z!n^JQ(jF^y>1`EjXxybtm?GL9*FLrQYe`Ecra!B}M_4{8r9p#0iv zY~btdw=gRw-Z6AW5wS{AHY!iqpb}d``O^$Na*O-HMf~J`aaD4O8mdFYu8?As$=2MC z9M<3@im2tfocWiMtjN`Fs|lw@<98R__%6xR<~XIT!-?$I{HU)3d?&lJ=+DJv2klze znVkvFF?TytoFt4>1GD_OJb31b5)gN0xa2X2!gpszWYmH};A`srK0^7~{L-&-xS8bi zoX&V;owa`dtiuCw5?V;%Z;+=rp=eyPX27YGOEnp9`7L^t>R?uJQSg+4E5c``GMJ=` zQGxP&y?6S5%T?H!>HtwXNMA(~U#2jHwO3)zto}=CX_q>=ZlIhR{v}wbThhc~*0gyZ zv5c~!WZ360D>T+ylJ21Xs_sMCnrOxEIrMHIjFc1rE)Ml>U%z2P$t*uYlTkG^eANYW$eLqBn?l zTs=UlhrFCeq!BK8$+I6h|G2FJxxXdXwpBm+tM6s>I8lOA;k=vPCwXq&X2ygMUtadz z-mlZ$S&Z0`ms7Fp*M~cvlPqN@iMQ$;BrdFGd`r!T&+{|xC5c?!?OERTFV6h|k5you zB6cgWF?*Gy5I4l$pi(SZ^SekEM4-W9mJ)$ULo}bNMjeRN?G{{|;>ll(EOD z?tXTLD%>Qh4;dnJw`iwLLbnZ08_76IIRA_?_?b^*HV^xDgma$I-zCTA=0qgy`c7f& zESc>INEz7@9?Tkp&Pz;1_E;FADP9Vb-qBR|NG9Gbq<@r}O<<<(aTh0L&QWgN<0O%U z;oZ$RS5?GL>mCl02YX1Tl7-JFJMbKpaE9kd1B<6diyY?e!q>5A3My^*;2gKX2jm$L zo%PgJ!aVrgqj4WRM~0fw6%^Jj^w<)6<_?`{iZC&{E5_fyl^fL4+Lmlmqh_9G6%{Q} zNVwqrK`S)5<+J>;u0lyTP9)$TG3PR(FX$Lq%UgXj?s=LwE1^6IGW!FV6*f);=>%3L9fUrabVRJCh*-_P+PC33{A$dz&FH)cX9ZgfiA)%hfA^p-<3fRGvf5PMiZC%0_dZ`f!|q7b!~OCGj;EjoFlD$G zl5=>sF}-jO^rvxefZ6pi57}XE)7x;Kj5&d+CUS-X%==y8MyEU7p)ona zlZOU1D(#q8M=KA)x^UuKGzFZQQ&gGLww|~83_B%`$7q*OUJucR%FUTQ?P4kto}&D0 ze!21E9*b@7lS%WVw4Vgo6#`F2^c7lsJy8HA37Z_Llug;5;Z_=#*Qjqdq=0Y?^WU)vS&4<~3K*oSvVUU~Yb) zXuZ5byK{S|%bs6L>n!=y51>2#xCfMcsuV@7k|}|bI77lq&&(ON?iay22Py_4v@wG*v$M3cpu4?ka+}`rn|$Jwv6-Zwt;rbHW}IgAkSpw& zq`%yy=N^Al5f3 z8P+7x?=y_lb8cf#&A59V?HzZqyXa;b$v2BL-;$sq#@!))kr>o-@vt~cTn1XQ=SJU1 zoEcQ435E`r6EY0EwI+P048K5qQvmrHg_zFfw)QM~2Gq~jyhSIfB@Jyx^9qobOM=#7 z+})DlA;ksd$H8aae*4PQ^Qw!)lNXYBA8k?|20L6UUf9}d!OGpoNKYfWdZ`(y zx%4XYE>EX8(5S*x1WHK53;gjyz45DKcQO!DS;z)4dQaQ5O=ie5c690LV3t2MlZA@S zJi|r)KVMxdw1_+`f&>_OJ|t&IZ)r}qfHi{MpLae0Taaa0Ys4VgGtx&o&GlviPLmv_ zI+$u|Nu)1Hb}@vZPxk4l#$%dns`UDBdQaRtN+y*fmo|h@{h$Q$=>8=0-a||m-j#!( zqZGSiV5g(5zjR*5*+f$U!`Wq+Kh1Dw*%?3k_9r269lwQ#Z6jTsG+c3WrH&IMb#OSA+ArY5P0s8-6ijqy-$N zPW0}vb42aZ!IcmHS>czo7MJtQOWjlfZHIn|iv4njdUl&$q6=9ZWlTNdEbXFP>?uVJ z%Fo8P>vp_HL$WPvDEfG0_|;LPyl{k*U7p*Hr5aGyxSC6jlywj>vzXONRTf1~-yg%c;-cTiJMuK>@eNU&pF3`j2cDHZ zk2cprG3Y`9THAu{H@WE364%65SwgzG<5}O|^n=H1N8q)<3!ktR`mD{SlT8t30BXIr zer5uixGH;egN(hKiXZz~?+O;9&i7uMa6y<8oLv9p1g8Uw5gNRQKCFB1@hgZUhV|fQ zOJAq|8YlH2ORp0ajvS^?bua-$@kRf~)uXw6R)_l&lBJH9WPHxC6UB>(!%A*|tBNkC z%WeoVpQPMigE3v3aFhB>46$+ z!1+0KSS@E~XHsI*BTz9$Jky0h8NmZEkBhy{$ReWRa{(xR_(P+_cqcWDxNmq03p=$m zj9@U5AEM*lbxH4<&*rn+8o=9|)XrX>--lVA?3ky`B%-9?3pLJSQSv8rMSeZiN zh3e^{&o^rMdYY#-Ht>gS|3qN!MXy6)Y|P+4AcRW>Gh%os8YZfh`TBBm|d zO}lr+4sDj0jdEc9C}T`Fe4y**gLKcXR})B$-jP(GOFF1$kaN*P${LWxH-5*tE1dbY zbsjD#c$ZX)Gi{-UMD<0OCPf#w>!vhQ*?57;Y!GjR_?Y>2cqqM2U-+6kNkWSYh+Zm- z39>53Es zNR(PE;9Sa_T)tub*gdbVeBh(bJ8B(NlnZ{HNR~d;{chD2phpY81zI(aKuiZ1CHF2o zO7;ZjMS~vV^w*^gSH^iHztOF#lFO-UEbnJ&Bi-LQ&Yy6hIcGhkKYDsM`xs+J86^7d z!WK1iEf(wT!S35jqZ4zcic+Eu<%v1xV|6uGJCg{6@W{grX;pHL6sIdAH>3>n6WmoU zKR(&#f*Yddikt*Wv*?eWeMPXdph1u7rzNVdPS*5hGl=vMk2*oL~PdX z`4DVcu=@5kQI13Rit3Vx_!#Hcf^$wSp^sX0H)2hOWuM+}?@v$TZ}XZr26d&hVvLT$ zS(hO)26aU8zRR}JTx+$B>Nls>1h*?{u$I0oF4FL^R%SnPH$Zy{!~vd|q<2lG)Z-{@7hz6AyM+<`3zw2! zdumB6FnCQEisd~Ga>G4MEy|}T*V^G+klBTNsW`LruA#wBVO~3Nm!xT|Nxk_Z#`5K< zHhJ%wx(qtj!u1tiPFT}3O(wuh?s$Cx<`k?AOWa27b>k=o`=0Z7m!Hh&gFxp9`B{OX zTyD;68Ug(w1(eg8pKz6_ZF_Qg@ZEW$sSE!}s`pAJJJY8cLc%v?kUx#nzwQH74gAqr zx|&*wxe$d3J`GCegzG@cY`)a)!2I68%$e}j_}Y(ma}4u<_HVi5m$WBz?VM1UdS7>D zmk2h%87W$F6Sgm(WlAorxOt@Of;j(#@if|D(!6I%j4YL?L0V4FUO(#78Nzgak6z@y z^BuRS%+LDc23_sis|Vz|nM59yd{63NaUtQ%HnD8r33x9d*gD#OpL0Rf751 zyi$#ITqI8nW2)N+s?VNEkqqm$HUvZAFPFTQT^4FoAx+0RY7BaclUIp_>m4cV@~xR+ z^tBl0qav3xvwp0|kGZa@5fi5g2v|VYu2Pij4VuRGYEUB6LE&UfdEi_Bg8JZadmg%% zx9o1O7%z1H0YpnJ$)HvfBH5jq>ug5di{jo4{^k>H#xtIS%lY_N$VTf5=m{|!CsYPC zsb$lwZV~DymACqy2H(?tS{{!p$HbKVbTDslM9}DOI9_X#JV+UCl2eq!QXi(CKsgRl zj_$U5iJs7=PI0?LWP}2jYxymj3judcg;5*O**MONT(V__Zj>X{lgN>bjtD*px3%i^>H*ba9CZL9o9L36vkWP9 zCRf>$eP+N>?g+Eb60&X=)3Y-l7&w>EsVI`xdFWYnZR*T_e^WE<EVieCW&x921)HiNXNTG71(?off=0eg|8z25uidw-8N6M}zgmhI`)8{FF z*|Gupq3?6z@v>e6H3N2Wv+7J%LT23kt*8Oq;AL_+`)x!=>H7D(8%=QU#v#0<*%r`M z5`3D%IY>x_zr0SifakgrRD!QCZ1elK`DLh=VR7`HPS1Yb3kC5-Y*p~%OUI|Sp-peI zYFEwMbiDcx1C2!_(Ayxe^PG|eD4k}_YeLtc{A|3w(=Dp!(c13n_~= zX%AK1BGYH&;WZm{PXX@Eu@wF1eTTo^MLg7f?c~+7EO4(y&3hv+5N+ z-*XaFbug#LYbka&sOu{Ikjs zmxdee&(E_Hq=Q4T-NnNm`x&6L2iiiRRpQiz+ll!gX!Ng#x&MjH4ECxE5(Py%fq~dI zyzjg@BxCeOCcKCdd@PX_J?l$88Qi@vcLohRi`$dyX%omz?`KIcC_Q`pV*Jw555lu%^LtKm5N&g9O0;gputN< zqbf6YXWW`QFuxc~YGh!nb+i3k{0$OM!~KXgc`|30YSIe^w4U+%!3+kiV$V6!B_=0$lYs1+bClH5khD|| zANgnS+uD07ts4UH{mxH_2Z2vh600{qNWrOZ(`zXK`aJ>t`66kvpC4NL#>rn8&VNOn z_QY}(XhxZYO)cOwke|gIhH_)*OUZh?Ive)c89Jf2_nki5(s`B zf8Nbj>b(f^C-}!710-Ktvpwze=W@xI5?}^*l0ni=C+9sU(G3?<8Rlmrt^EZFwx`UJ zhv%p~YesU5;d!@E;YwLfHYllSt&Gz9WlFc7g7vvQWY|Sn!Ok#QvkSM9kzk$Yi=MM4 z*TJN-)8{+TO>esP%7>7KvL9mzN1@ekKP|Le5tVz09< z9(EDQEQ-=wGGV}o?}D@pdbC|=G?(qq>Pz3HIZVl;qDAj?26Mh zs|!K|{zZs0qe&~r(cB^<@%!Dsd-ptgW8S;>31Qe$74Oh4Kxj~NgwGScuWxeJzNe@t zc*6MOCj!Oq!`bP@{Wc5+{qj{Kfm>FnzKRlY`^t#yOQ<9PavQSk6$An2^R)6bs-0D z^ZKmD;#q)I9OpQxT-Y`}9YRflOQ20WfcXhcUq|ge_hCQ)v9e&hH3T|{@$)@isqafC6@{SeO*}s?!Dg3x|OBSIVT+im%ZMB z{?hD|SZov7u6NY9-)FfkEAoe?Y|hpDT}!F>@=7@bJ3?0CQMMxZ7@|`kEz)N=+bW!U zdhPJxT>Q?4&wf~5qs@cGW6v#HK}+bJX5{0sJ5irGv&)9 z$vVRMd4dz?=G4I6UDv-(!|l@xaCLjmWkBd{DaiXe&FfY+Ph>-Jt6)!x zc#Sj{QBjvkMaX9T=CUc{ToOl&_zC_bk=7i96jg;l>ZoXmEz@uT zo()QU1!ex}4+}ySC@JvKl;@C8dQBU-Gn{ZouH&4V6ugX&y*GonOj|S80YN^h9Wl%E z?V0ybiKYw_{LSGSYGatTr{}z}r>W6-rLq;qn!~l^IIFz0IcW*>#!D!1FFlWi&*f|W zKcv4CjOkGT7U`mIT)R4yCm*`!^Cq9F9ETwdE3p6I7VQ$8*`T;<$}s8C+$=T9{otMz z-IMR~N3-0C8p*{wG$NrmU_|06yVO^bt|_BDG3P8Tuxab9n}DH4L{Ne_j2rOX=|s|m zN_Tt_XZQh9o3Xw>PTV^EwqCHmPs`b(S)&xBHLU_WU1HkI9WgbCPj)IQaC?L>l`7PV zI(pkC*wW5X*r)ucrcLdX#+X*Evn@(mBYl z=IKf6r>DET1*a~AZZECTcHWlzeqEOo+Jrf1>eZlt=}Cav`xIyD)*(D9;~efGe1V?P z!71P1EC-#jQPLnWf}07RG4qpi-q~)ZR655g@6XbHQ0_gr%wfrAMy;c?`&=BD{A9mh z4TPVpsF2}=I<_WFlvP->JY#bG2u<^3=!r*%v`@P^u|1Q?2vY@AyeiR;LQmaj+HV`r zv$ADPmk&7F1Hf>CKtjsx>Dx^*tc()Vj;0^%Jj1cu9RJ9bq)=5y%1%Q?HJKd^K>0{J z_|qzIE@)PWFYy;NA-@f8L#889mD~jVpr*;9B(a$D;-Vl_g!t3=5TmcN5EY#iRrIJt zu>k>|4nyK1nJLToW}$sigc(8(F{nRxWZBi2kC8sG=-oU-h~X!A$vO!)d|nVjw`mDt zc+uZozi`t0V~jkZj8h(b4$O(CtDaA?d!dZLx8nga3CQHciaKPEZ4Wi5K8#bN80-|1 zVV*LI5ogy5j-vRp>ZjIzp1L=%1PxjjUZh2o&XplE9;d9++O6VUmJgk(eoPW){5 zikw$;ay}r+FkH{i1UHlD6+GBUEG0e|;P{d&RwF9F#hftD6}u&O=%_70kGDVfEV8gk z1SGqocoJiHEy=b!!4t-S=_I?z{SnL`X4T20-EtF-Pvm#pCc<;|o>z!e8d~h~5m8^U zbRkG@(fHnFFtf@_#p3UgHx5hF;rL-cHc7d1>Tuqs2-86K!^!cwHk)KtZBw&FDKjVK z;Qg=`2v^$9$ME_&bdty-b< zDTrb;Ho)sv?4UGjTDE4J)~tG-JuFX&PE_X(QLqVr!zzenP6FP}hJ?M+s?d(t*<~5Sl@nb$b?<{5Wi5*_wWHj>;a?MB6Gb=LE{UM9QWUdAq4> z>W~gYdWuRaTeU3r9$bFGh0**F<&$kti72f2e0!?!2c;M6-m!mRbAnh{VuD#%8IDrw zk%%k;hP2wbDSsY$HM^QteVTL9G)(}B7;eveAj>obThe4O%=q{0iu}#K9@?{fea^BQ zkb6W)v(xFdl&6o|W2B!eQpjq>jO=g}13wm~chRDXBz{jU3MH@#oFS^rsq%*G+e@zK zBK!xRz9ZjW@M|e}p=??(Y}T4(UjZGl`q}HtKGg=1pD`H56B$l6hd1T8FW}sK;ydcj zsq*%@<8gb&JQ67_C2x_;i2z(vgo!3&Z#v%(M@@MIFUS9XZLiWrM*pOv5&3 z1I=LONXfBCgTa%^aWH2s~H4zpmP*R z$~Q(YC?(IywM`k~;(O+A=G><07L4JhzQ|9a(WGzb&?ixRfXRl#5gQ}M5)hl#Z5Wo z2WGNR+VaRS1UE$TBz)K97rkwY*8Kc#2C;vJj_p~Iw;?_Ni*!imgH>C_J-aRsv0|?DZiKE;lS17%l6*$iTj(g>onyvuiBZHutHn^R* zHS^%eHKjG5yZ$=1A7^OQ%f2_t97j(;vEcD`N={MIf!;02GIY(kCGp!By~RLgPK6rL* zHXPn{+qTrtqCuhRT>R|!RGsrVqTv&jY$s{?T_gZFNhCbTAPluH-|5oGQ;WPaG_wDc3 zzy05SP~q3`c~RkaFQfQ7^OEh})tNk+G!P}?AP2%?!;8l%>8wE1QDM)`=FEc*ci9D* zWdpiM=%&0#LFNg?2S8?@a8ExfJS@?yBxnG#%Z+K*W~H{YIln$9dX+K(*~I?S*F>E= zJRnijgkH~<@ilikWt()M&MKd#1BfSwshIeVFA_$I9meiC_e+GnTSgK9OU^qvi-OtkX%VG8g3>*1RyeLWyyH6r1+;G zadmpL@jN`g9NxX1OEz4AfD1P7K}qLD?rwpWfjN;HFA5T#f zb?&xtA02+_j+uVapvWC&YcTnUK~m*Z2GgXAUIVlL1FDF3WWcD4|20~W>xP_85a&w)R>Ci8TjdTN`_;tR2Im z4>_~jXQd%aj&->a)zExda0V z*%W~?KwF5r3txQG=bC0GTS&G`&FCyTlTwRP&AkM2=dEPB^-l9;IeT>NYy_7dWyZ{> zwPaHSiD!te$hSDH6)pLe0l&IEV@cCMM=e}OA$a3$Kvr(**d>gMHQ#Xs-E3-1e8d)J zMV@<4ahL%$S#lxvgPee*A2tEyeN?yDue4qJJ&R`NNF7$VFo8cYGk0mbML^5;D17*W zMe)P`0S5lKM-{czb&&Yv`I>wXzq3sl<`EGq2lRNFqSXaCUrHZ=y?MpFNb}dioa(`6?p9 z=NtTym#Os8b@5B)Ca1Hxa?i7iQM;rjRn$7V#8Y;8NhYSekJdZ_-D9^;KK-FZOC17W ztU_N1vffE=h%-CBasejg7jE=EidmwyzEb>dgcHr?^a7R+c`|fMcbn}3$`SJB~sLw zPEgPOIyR`Na+WuOMJra-=Er@>+Z+R(RcL|DP1k4SsI=>8e?YQmqdmOr&>SYH^r~Pg zmS7lEHrlt7UQ)G%qg1z@0qwYKdu*s&VE*jG3_w-H(~aFNd=cT=3>$Uk?3MVRUiSMK zj!Q>7T51ett_UNVk}g+fx z6pqi`COTH9dBr_xY>qM?8jl2@rx*b<#0GPwF1_H{?=DYu>?r%L3or6)gKA1B$XzA-o7frJ5_HW|o-h98f61Q{UxL}1_F8<01D4<7RE*4_flxAvs#bC!CX zT~9R~_Vhhsxf>g~mB7EjyQ?^>> zK({eR2_;4l-YnRa&N_V`JM5YMEL(S}(y4N^_ZM-(=3$G5tFF(PvGC`9fU`%Rh zgE{F-ufW`>*Wi!FWH?OSJbrrFBiW^LXS*OTE0|{196qz_IJEt*vG?P&Yvc6U z&_;~#@-q>|ED64s1K%NaW)|Hve5 z9{8E}A^=?&Fvg?oNEP9HE=YQ8Q(1+y*#?`1wXYB)ohSf>@ze%tc#cu!YVIoB{51w$ z5!Tgd%Dr|w%&C0kZFc0_Dx79*P_D#T$Ig=}#T?rcN--5UGfY!{S)bsnc$PL2K@8@O zyhst{oU$DzwH@Z89I>ZXBu6#`)C}eJK`MisJo`MHTT=GskwxDTBu!v)P4uNhs0QY| zJ8Sl4g@9(J?aB_2YPq=UE7V=jF#Zt6sewK|M{hFr{AZZuW22Fo1jcGjmA44lreH}5 zl&JUS!!FG>;CfveZ^peFzUtN7NYyc*Dyt(+O+u$Qe}0Qj; zyoYi*r(nM^8*8I-^SDe*Y~1IE>Pb1n8As|<26Bf2$G$&ST+B~u2nsbD6&3j-kd_}))xmLHw zoXv3Bv=@iSHz$mryBP6n)A#1XxR3GqU&Hef$ffn~Gv8!Sj(W?$#!%*aGhCjhhNR=P zFtE+MCv0oFU2B$`+V%)H+3w6+u)+x|p~OqyIU;*an#Hp9-zVKX@5AAAVH{+a1@o|G z0U{RN!)S7EPadB28UoV|#WRPR6&2BYR|JYn{%LuOTn!^rIX}8XxVY}mykQ7$Q3+&# zX#`Kz&xCtP?e39T$qfMpH)uZlK@=DTpdjaQK9br+otd4_bqb6PHP32Lzx9?15Tii{ zGk9wZ6_CSYO!3FrwN$3&*`rNoIJ-10-kGKhlbCFSzYJr(^pR8Gq&o6gYk10b#dZK7 zw>ue#6!K>Vx%m}$Xn<>`PSHM(pZ7BmQ}<;jcEZseUSMWh%V~jMi5K|u6lB~3Zy#@O z_dC|zhNZu;wfBB+56q6!+|HV_H|)}wGML$*C zia*3gKbke`cnjO~7VoD;v`_3}ocqF4Mhqc2<5h=wzc!X_S3`<6Gvt9C*Wc`PJwW;U zroshqzmI=zOPzinq%u3_XKzTJogc6PTlP57nEcM?Yx|?7*S{(5oKMg@nssjKmLLC$ zL}~Eh2(!~Hw+99A9!WE+TfY3^V6E+#$m`+rxmfh)D_Qyp>u=HjQjoA7@4*4M`MVi% zf{fI*`i$|cJEpr^&wRkClGL+l&JOzw_09EZgF&gpxNA|*4Z1Ue&i6pqv}q#0Vgoey zq#Aa^P3qk3D$I1@=WLGbB&iOe%q0DSQ#Gkh#;z&98QA8|;h1R5@XPHszQm!BmzUiW z)tz)^P5fM-KJrql8Mcp!s&VH-uu=fSLRU|UD8eaAiEuqsL>-I zN@_DDlcf=-T9z|A#uVqy$$FoO_!b z@)J8oq+dD6iFGd~ZHYF}y1_ER!}II2OS55}T|?v!6p`Nb-tS;ECm6TKMj71cR)q({ z-fy<`_V(+h77?_*FPL)X_K{ny34ypNt*f{YYe7 z_H89EZ^6@62$yeW|iT65~GWQ*H zI5?d^C6Krn_`x|_^azrn{Qg<5Y5L$1etI1+9P#8K^vx$954_wR?%A2WMuxyfVsNYF zM~5JG2m5mCEUpLVHV$&Qy^b&AeXg`qY4~(cra`PdV*N_DBo|bRykA>8ZVu15x`yO; z-RuLNx7;|2G>@$f=5m!0jZ4>hkpLr68!YMf6mosWJDVZ=>n|&u_$#=c=C%LpRhaJ2 ztN0ay^p(RTy`#7PcA@u=L^~(<$v#IWdVc$*ySM&H-2KpX-VVZgty_EVIz7DIw0P3H z?^t%ftmk1F$Z($KmQiM2H<~$e2fic7Z4gC$uRV(G*j~SbUDz7VC5h8pI?F`a^~lov z4Mv9a7UbT6J-DQ4qs^f+v@3!yp#Ik`!`Ojel~!^Lrzeo!;6+waaEHa^;r;1Bh&yez z*W26J?`HFN7dm~6H^E4bkvlAoABJyAI(@G}KACb2MW_k?nRflqkL`>pTq~XinqbU( zpF1oDmDq3=NE=x5lj!-s{O4=kU|-Akv#q{p3H|TzAlNv2W^K&SEx+(Sp`(8}KZu(Y z(Yh|X%k$G1`aGMP2*2EoGV|vq=kLze#~-Z^YO6F3M=S^86yezh!mAV_ny`ibgv}#F zxKO86VS*yNn>-6=bC;Jk5Lk!7aDmCfe}XY=9RD`zH)=hkcIoh))8yazxczG|ZgMRO zskZPgIrPk0>$z7a^E6gOx$6@zrab%v-|5MknG1d2GtSAT^*nE4!c{_?*oJE#93ecm zh6nl%rxk#{S*c +#include +#include +#include +#include +#include "faidx.h" +#include "khash.h" + +typedef struct { + uint64_t len:32, line_len:16, line_blen:16; + uint64_t offset; +} faidx1_t; +KHASH_MAP_INIT_STR(s, faidx1_t) + +#ifdef HAVE_RAZF +#include "razf.h" +#else +extern off_t ftello(FILE *stream); +extern int fseeko(FILE *stream, off_t offset, int whence); +#define RAZF FILE +#define razf_read(fp, buf, size) fread(buf, 1, size, fp) +#define razf_open(fn, mode) fopen(fn, mode) +#define razf_close(fp) fclose(fp) +#define razf_seek(fp, offset, whence) fseeko(fp, offset, whence) +#define razf_tell(fp) ftello(fp) +#endif + +struct __faidx_t { + RAZF *rz; + int n, m; + char **name; + khash_t(s) *hash; +}; + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +static inline void fai_insert_index(faidx_t *idx, const char *name, int len, int line_len, int line_blen, uint64_t offset) +{ + khint_t k; + int ret; + faidx1_t t; + if (idx->n == idx->m) { + idx->m = idx->m? idx->m<<1 : 16; + idx->name = (char**)realloc(idx->name, sizeof(void*) * idx->m); + } + idx->name[idx->n] = strdup(name); + k = kh_put(s, idx->hash, idx->name[idx->n], &ret); + t.len = len; t.line_len = line_len; t.line_blen = line_blen; t.offset = offset; + kh_value(idx->hash, k) = t; + ++idx->n; +} + +faidx_t *fai_build_core(RAZF *rz) +{ + char c, *name; + int l_name, m_name, ret; + int len, line_len, line_blen, state; + int l1, l2; + faidx_t *idx; + uint64_t offset; + + idx = (faidx_t*)calloc(1, sizeof(faidx_t)); + idx->hash = kh_init(s); + name = 0; l_name = m_name = 0; + len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0; + while (razf_read(rz, &c, 1)) { + if (c == '>') { // fasta header + if (len >= 0) + fai_insert_index(idx, name, len, line_len, line_blen, offset); + l_name = 0; + while ((ret = razf_read(rz, &c, 1)) != 0 && !isspace(c)) { + if (m_name < l_name + 2) { + m_name = l_name + 2; + kroundup32(m_name); + name = (char*)realloc(name, m_name); + } + name[l_name++] = c; + } + name[l_name] = '\0'; + assert(ret); + if (c != '\n') while (razf_read(rz, &c, 1) && c != '\n'); + state = 1; len = 0; + offset = razf_tell(rz); + } else { + if (state == 3) { + fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'. Abort!\n", name); + exit(1); + } + if (state == 2) state = 3; + l1 = l2 = 0; + do { + ++l1; + if (isgraph(c)) ++l2; + } while ((ret = razf_read(rz, &c, 1)) && c != '\n'); + if (state == 3 && l2) { + fprintf(stderr, "[fai_build_core] different line length in sequence '%s'. Abort!\n", name); + exit(1); + } + ++l1; len += l2; + if (l2 >= 0x10000) { + fprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence '%s'. Abort!\n", name); + exit(1); + } + if (state == 1) line_len = l1, line_blen = l2, state = 0; + else if (state == 0) { + if (l1 != line_len || l2 != line_blen) state = 2; + } + } + } + fai_insert_index(idx, name, len, line_len, line_blen, offset); + free(name); + return idx; +} + +void fai_save(const faidx_t *fai, FILE *fp) +{ + khint_t k; + int i; + for (i = 0; i < fai->n; ++i) { + faidx1_t x; + k = kh_get(s, fai->hash, fai->name[i]); + x = kh_value(fai->hash, k); + fprintf(fp, "%s\t%d\t%lld\t%d\t%d\n", fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len); + } +} + +faidx_t *fai_read(FILE *fp) +{ + faidx_t *fai; + char *buf, *p; + int len, line_len, line_blen; + long long offset; + fai = (faidx_t*)calloc(1, sizeof(faidx_t)); + fai->hash = kh_init(s); + buf = (char*)calloc(0x10000, 1); + while (!feof(fp) && fgets(buf, 0x10000, fp)) { + for (p = buf; *p && isgraph(*p); ++p); + *p = 0; ++p; + sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len); + fai_insert_index(fai, buf, len, line_len, line_blen, offset); + } + free(buf); + return fai; +} + +void fai_destroy(faidx_t *fai) +{ + int i; + for (i = 0; i < fai->n; ++i) free(fai->name[i]); + free(fai->name); + kh_destroy(s, fai->hash); + if (fai->rz) razf_close(fai->rz); + free(fai); +} + +void fai_build(const char *fn) +{ + char *str; + RAZF *rz; + FILE *fp; + faidx_t *fai; + str = (char*)calloc(strlen(fn) + 5, 1); + sprintf(str, "%s.fai", fn); + rz = razf_open(fn, "r"); + assert(rz); + fai = fai_build_core(rz); + razf_close(rz); + fp = fopen(str, "w"); + assert(fp); + fai_save(fai, fp); + fclose(fp); + free(str); + fai_destroy(fai); +} + +faidx_t *fai_load(const char *fn) +{ + char *str; + FILE *fp; + faidx_t *fai; + str = (char*)calloc(strlen(fn) + 5, 1); + sprintf(str, "%s.fai", fn); + fp = fopen(str, "r"); + if (fp == 0) { + fprintf(stderr, "[fai_load] build FASTA index.\n"); + fai_build(fn); + fp = fopen(str, "r"); + if (fp == 0) { + free(str); + return 0; + } + } + fai = fai_read(fp); + fclose(fp); + fai->rz = razf_open(fn, "r"); + if (fai->rz == 0) return 0; + assert(fai->rz); + free(str); + return fai; +} + +char *fai_fetch(const faidx_t *fai, const char *str, int *len) +{ + char *s, *p, c; + int i, l, k; + khiter_t iter; + faidx1_t val; + khash_t(s) *h; + int beg, end; + + beg = end = -1; + h = fai->hash; + l = strlen(str); + p = s = (char*)malloc(l+1); + /* squeeze out "," */ + for (i = k = 0; i != l; ++i) + if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i]; + s[k] = 0; + for (i = 0; i != k; ++i) if (s[i] == ':') break; + s[i] = 0; + iter = kh_get(s, h, s); /* get the ref_id */ + if (iter == kh_end(h)) { + *len = 0; + free(s); return 0; + } + val = kh_value(h, iter); + if (i == k) { /* dump the whole sequence */ + beg = 0; end = val.len; + } else { + for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; + beg = atoi(p); + if (i < k) { + p = s + i + 1; + end = atoi(p); + } else end = val.len; + } + if (beg > 0) --beg; + if (beg >= val.len) beg = val.len; + if (end >= val.len) end = val.len; + if (beg > end) beg = end; + free(s); + + // now retrieve the sequence + l = 0; + s = (char*)malloc(end - beg + 2); + razf_seek(fai->rz, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET); + while (razf_read(fai->rz, &c, 1) == 1 && l < end - beg) + if (isgraph(c)) s[l++] = c; + s[l] = '\0'; + *len = l; + return s; +} + +int faidx_main(int argc, char *argv[]) +{ + if (argc == 1) { + fprintf(stderr, "Usage: faidx [ [...]]\n"); + return 1; + } else { + if (argc == 2) fai_build(argv[1]); + else { + int i, j, k, l; + char *s; + faidx_t *fai; + fai = fai_load(argv[1]); + assert(fai); + for (i = 2; i != argc; ++i) { + printf(">%s\n", argv[i]); + s = fai_fetch(fai, argv[i], &l); + for (j = 0; j < l; j += 60) { + for (k = 0; k < 60 && k < l - j; ++k) + putchar(s[j + k]); + putchar('\n'); + } + free(s); + } + fai_destroy(fai); + } + } + return 0; +} + +#ifdef FAIDX_MAIN +int main(int argc, char *argv[]) { return faidx_main(argc, argv); } +#endif diff --git a/faidx.h b/faidx.h new file mode 100644 index 0000000..98c60e4 --- /dev/null +++ b/faidx.h @@ -0,0 +1,81 @@ +/* The MIT License + + Copyright (c) 2008 Genome Research Ltd (GRL). + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* Contact: Heng Li */ + +#ifndef FAIDX_H +#define FAIDX_H + +/*! + @header + + Index FASTA files and extract subsequence. + + @copyright The Wellcome Trust Sanger Institute. + */ + +struct __faidx_t; +typedef struct __faidx_t faidx_t; + +#ifdef __cplusplus +extern "C" { +#endif + + /*! + @abstract Build index for a FASTA or razip compressed FASTA file. + @param fn FASTA file name + @discussion File "fn.fai" will be generated. + */ + void fai_build(const char *fn); + + /*! + @abstract Distroy a faidx_t struct. + @param fai Pointer to the struct to be destroyed + */ + void fai_destroy(faidx_t *fai); + + /*! + @abstract Load index from "fn.fai". + @param fn File name of the FASTA file + */ + faidx_t *fai_load(const char *fn); + + /*! + @abstract Fetch the sequence in a region. + @param fai Pointer to the faidx_t struct + @param reg Region in the format "chr2:20,000-30,000" + @param len Length of the region + @return Pointer to the sequence; null on failure + + @discussion The returned sequence is allocated by malloc family + and should be destroyed by end users by calling free() on it. + */ + char *fai_fetch(const faidx_t *fai, const char *reg, int *len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/glf.h b/glf.h new file mode 100644 index 0000000..d9d23c6 --- /dev/null +++ b/glf.h @@ -0,0 +1,11 @@ +#ifndef GLF_H_ +#define GLF_H_ + +typedef struct { + unsigned char ref_base:4, dummy:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */ + unsigned char max_mapQ; /** maximum mapping quality */ + unsigned char lk[10]; /** log likelihood ratio, capped at 255 */ + unsigned min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */ +} glf1_t; + +#endif diff --git a/khash.h b/khash.h new file mode 100644 index 0000000..1d583ef --- /dev/null +++ b/khash.h @@ -0,0 +1,486 @@ +/* The MIT License + + Copyright (c) 2008 Genome Research Ltd (GRL). + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* Contact: Heng Li */ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + if (!ret) kh_del(32, h, k); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + + @copyright Heng Li + */ + +#define AC_VERSION_KHASH_H "0.2.2" + +#include +#include +#include + +typedef uint32_t khint_t; +typedef khint_t khiter_t; + +#define __ac_HASH_PRIME_SIZE 32 +static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] = +{ + 0ul, 3ul, 11ul, 23ul, 53ul, + 97ul, 193ul, 389ul, 769ul, 1543ul, + 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, + 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, + 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, + 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, + 3221225473ul, 4294967291ul +}; + +#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2) +#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1) +#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3) +#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1))) +#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1))) +#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1))) +#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1)) + +static const double __ac_HASH_UPPER = 0.77; + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + typedef struct { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + uint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; \ + static inline kh_##name##_t *kh_init_##name() { \ + return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \ + } \ + static inline void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + free(h->keys); free(h->flags); \ + free(h->vals); \ + free(h); \ + } \ + } \ + static inline void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khint_t inc, k, i, last; \ + k = __hash_func(key); i = k % h->n_buckets; \ + inc = 1 + k % (h->n_buckets - 1); last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \ + else i += inc; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { \ + uint32_t *new_flags = 0; \ + khint_t j = 1; \ + { \ + khint_t t = __ac_HASH_PRIME_SIZE - 1; \ + while (__ac_prime_list[t] > new_n_buckets) --t; \ + new_n_buckets = __ac_prime_list[t+1]; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \ + else { \ + new_flags = (uint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \ + memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \ + if (h->n_buckets < new_n_buckets) { \ + h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) \ + h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + } \ + } \ + if (j) { \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isdel_true(h->flags, j); \ + while (1) { \ + khint_t inc, k, i; \ + k = __hash_func(key); \ + i = k % new_n_buckets; \ + inc = 1 + k % (new_n_buckets - 1); \ + while (!__ac_isempty(new_flags, i)) { \ + if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \ + else i += inc; \ + } \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isdel_true(h->flags, i); \ + } else { \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { \ + h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) \ + h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + free(h->flags); \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + } \ + static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khint_t x; \ + if (h->n_occupied >= h->upper_bound) { \ + if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \ + else kh_resize_##name(h, h->n_buckets + 1); \ + } \ + { \ + khint_t inc, k, i, site, last; \ + x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \ + if (__ac_isempty(h->flags, i)) x = i; \ + else { \ + inc = 1 + k % (h->n_buckets - 1); last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \ + else i += inc; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; \ + return x; \ + } \ + static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [uint32_t] + @return The hash value [khint_t] + */ +#define kh_int_hash_func(key) (uint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [uint64_t] + @return The hash value [khint_t] + */ +#define kh_int64_hash_func(key) (uint32_t)((key)>>33^(key)^(key)<<11) +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +static inline khint_t __ac_X31_hash_string(const char *s) +{ + khint_t h = *s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; + return h; +} +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a null terminated string [const char*] + @return The hash value [khint_t] + */ +#define kh_str_hash_func(key) __ac_X31_hash_string(key) +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other necessary macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name() + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/* More conenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + +#endif /* __AC_KHASH_H */ diff --git a/kseq.h b/kseq.h new file mode 100644 index 0000000..25f31a3 --- /dev/null +++ b/kseq.h @@ -0,0 +1,207 @@ +/* The MIT License + + Copyright (c) 2008 Genome Research Ltd (GRL). + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* Contact: Heng Li */ + +#ifndef AC_KSEQ_H +#define AC_KSEQ_H + +#include +#include +#include + +#define __KS_TYPE(type_t) \ + typedef struct __kstream_t { \ + char *buf; \ + int begin, end, is_eof; \ + type_t f; \ + } kstream_t; + +#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end) +#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0) + +#define __KS_BASIC(type_t, __bufsize) \ + static inline kstream_t *ks_init(type_t f) \ + { \ + kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \ + ks->f = f; \ + ks->buf = (char*)malloc(__bufsize); \ + return ks; \ + } \ + static inline void ks_destroy(kstream_t *ks) \ + { \ + if (ks) { \ + free(ks->buf); \ + free(ks); \ + } \ + } + +#define __KS_GETC(__read, __bufsize) \ + static inline int ks_getc(kstream_t *ks) \ + { \ + if (ks->is_eof && ks->begin >= ks->end) return -1; \ + if (ks->begin >= ks->end) { \ + ks->begin = 0; \ + ks->end = __read(ks->f, ks->buf, __bufsize); \ + if (ks->end < __bufsize) ks->is_eof = 1; \ + if (ks->end == 0) return -1; \ + } \ + return (int)ks->buf[ks->begin++]; \ + } + +typedef struct __kstring_t { + size_t l, m; + char *s; +} kstring_t; + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#define __KS_GETUNTIL(__read, __bufsize) \ + static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \ + { \ + if (dret) *dret = 0; \ + str->l = 0; \ + if (ks->begin >= ks->end && ks->is_eof) return -1; \ + for (;;) { \ + int i; \ + if (ks->begin >= ks->end) { \ + if (!ks->is_eof) { \ + ks->begin = 0; \ + ks->end = __read(ks->f, ks->buf, __bufsize); \ + if (ks->end < __bufsize) ks->is_eof = 1; \ + if (ks->end == 0) break; \ + } else break; \ + } \ + if (delimiter) { \ + for (i = ks->begin; i < ks->end; ++i) \ + if (ks->buf[i] == delimiter) break; \ + } else { \ + for (i = ks->begin; i < ks->end; ++i) \ + if (isspace(ks->buf[i])) break; \ + } \ + if (str->m - str->l < i - ks->begin + 1) { \ + str->m = str->l + (i - ks->begin) + 1; \ + kroundup32(str->m); \ + str->s = (char*)realloc(str->s, str->m); \ + } \ + memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \ + str->l = str->l + (i - ks->begin); \ + ks->begin = i + 1; \ + if (i < ks->end) { \ + if (dret) *dret = ks->buf[i]; \ + break; \ + } \ + } \ + str->s[str->l] = '\0'; \ + return str->l; \ + } + +#define KSTREAM_INIT(type_t, __read, __bufsize) \ + __KS_TYPE(type_t) \ + __KS_BASIC(type_t, __bufsize) \ + __KS_GETC(__read, __bufsize) \ + __KS_GETUNTIL(__read, __bufsize) + +#define __KSEQ_BASIC(type_t) \ + static inline kseq_t *kseq_init(type_t fd) \ + { \ + kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \ + s->f = ks_init(fd); \ + return s; \ + } \ + static inline void kseq_rewind(kseq_t *ks) \ + { \ + ks->last_char = 0; \ + ks->f->is_eof = ks->f->begin = ks->f->end = 0; \ + } \ + static inline void kseq_destroy(kseq_t *ks) \ + { \ + if (!ks) return; \ + free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \ + ks_destroy(ks->f); \ + free(ks); \ + } + +/* Return value: + >=0 length of the sequence (normal) + -1 end-of-file + -2 truncated quality string + */ +#define __KSEQ_READ \ + static int kseq_read(kseq_t *seq) \ + { \ + int c; \ + kstream_t *ks = seq->f; \ + if (seq->last_char == 0) { /* then jump to the next header line */ \ + while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \ + if (c == -1) return -1; /* end of file */ \ + seq->last_char = c; \ + } /* the first header char has been read */ \ + seq->comment.l = seq->seq.l = seq->qual.l = 0; \ + if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \ + if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \ + while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \ + if (isgraph(c)) { /* printable non-space character */ \ + if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \ + seq->seq.m = seq->seq.l + 2; \ + kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \ + seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \ + } \ + seq->seq.s[seq->seq.l++] = (char)c; \ + } \ + } \ + if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \ + seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \ + if (c != '+') return seq->seq.l; /* FASTA */ \ + if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \ + seq->qual.m = seq->seq.m; \ + seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \ + } \ + while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \ + if (c == -1) return -2; /* we should not stop here */ \ + while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \ + if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \ + seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \ + seq->last_char = 0; /* we have not come to the next header line */ \ + if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \ + return seq->seq.l; \ + } + +#define __KSEQ_TYPE(type_t) \ + typedef struct { \ + kstring_t name, comment, seq, qual; \ + int last_char; \ + kstream_t *f; \ + } kseq_t; + +#define KSEQ_INIT(type_t, __read) \ + KSTREAM_INIT(type_t, __read, 4096) \ + __KSEQ_TYPE(type_t) \ + __KSEQ_BASIC(type_t) \ + __KSEQ_READ + +#endif diff --git a/ksort.h b/ksort.h new file mode 100644 index 0000000..16a03fd --- /dev/null +++ b/ksort.h @@ -0,0 +1,271 @@ +/* The MIT License + + Copyright (c) 2008 Genome Research Ltd (GRL). + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* Contact: Heng Li */ + +/* + 2008-11-16 (0.1.4): + + * Fixed a bug in introsort() that happens in rare cases. + + 2008-11-05 (0.1.3): + + * Fixed a bug in introsort() for complex comparisons. + + * Fixed a bug in mergesort(). The previous version is not stable. + + 2008-09-15 (0.1.2): + + * Accelerated introsort. On my Mac (not on another Linux machine), + my implementation is as fast as std::sort on random input. + + * Added combsort and in introsort, switch to combsort if the + recursion is too deep. + + 2008-09-13 (0.1.1): + + * Added k-small algorithm + + 2008-09-05 (0.1.0): + + * Initial version + +*/ + +#ifndef AC_KSORT_H +#define AC_KSORT_H + +#include +#include + +typedef struct { + void *left, *right; + int depth; +} ks_isort_stack_t; + +#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; } + +#define KSORT_INIT(name, type_t, __sort_lt) \ + void ks_mergesort_##name(size_t n, type_t array[], type_t temp[]) \ + { \ + type_t *a2[2], *a, *b; \ + int curr, shift; \ + \ + a2[0] = array; \ + a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \ + for (curr = 0, shift = 0; (1ul<> 1) - 1; i != (size_t)(-1); --i) \ + ks_heapadjust_##name(i, lsize, l); \ + } \ + void ks_heapsort_##name(size_t lsize, type_t l[]) \ + { \ + size_t i; \ + for (i = lsize - 1; i > 0; --i) { \ + type_t tmp; \ + tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \ + } \ + } \ + inline void __ks_insertsort_##name(type_t *s, type_t *t) \ + { \ + type_t *i, *j, swap_tmp; \ + for (i = s + 1; i < t; ++i) \ + for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \ + swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \ + } \ + } \ + void ks_combsort_##name(size_t n, type_t a[]) \ + { \ + const double shrink_factor = 1.2473309501039786540366528676643; \ + int do_swap; \ + size_t gap = n; \ + type_t tmp, *i, *j; \ + do { \ + if (gap > 2) { \ + gap = (size_t)(gap / shrink_factor); \ + if (gap == 9 || gap == 10) gap = 11; \ + } \ + do_swap = 0; \ + for (i = a; i < a + n - gap; ++i) { \ + j = i + gap; \ + if (__sort_lt(*j, *i)) { \ + tmp = *i; *i = *j; *j = tmp; \ + do_swap = 1; \ + } \ + } \ + } while (do_swap || gap > 2); \ + if (gap != 1) __ks_insertsort_##name(a, a + n); \ + } \ + void ks_introsort_##name(size_t n, type_t a[]) \ + { \ + int d; \ + ks_isort_stack_t *top, *stack; \ + type_t rp, swap_tmp; \ + type_t *s, *t, *i, *j, *k; \ + \ + if (n < 1) return; \ + else if (n == 2) { \ + if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \ + return; \ + } \ + for (d = 2; 1ul<>1) + 1; \ + if (__sort_lt(*k, *i)) { \ + if (__sort_lt(*k, *j)) k = j; \ + } else k = __sort_lt(*j, *i)? i : j; \ + rp = *k; \ + if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \ + for (;;) { \ + do ++i; while (__sort_lt(*i, rp)); \ + do --j; while (i <= j && __sort_lt(rp, *j)); \ + if (j <= i) break; \ + swap_tmp = *i; *i = *j; *j = swap_tmp; \ + } \ + swap_tmp = *i; *i = *t; *t = swap_tmp; \ + if (i-s > t-i) { \ + if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \ + s = t-i > 16? i+1 : t; \ + } else { \ + if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \ + t = i-s > 16? i-1 : s; \ + } \ + } else { \ + if (top == stack) { \ + free(stack); \ + __ks_insertsort_##name(a, a+n); \ + return; \ + } else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \ + } \ + } \ + } \ + /* This function is adapted from: http://ndevilla.free.fr/median/ */ \ + /* 0 <= kk < n */ \ + type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \ + { \ + type_t *low, *high, *k, *ll, *hh, *mid; \ + low = arr; high = arr + n - 1; k = arr + kk; \ + for (;;) { \ + if (high <= low) return *k; \ + if (high == low + 1) { \ + if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ + return *k; \ + } \ + mid = low + (high - low) / 2; \ + if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \ + if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \ + if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \ + KSORT_SWAP(type_t, *mid, *(low+1)); \ + ll = low + 1; hh = high; \ + for (;;) { \ + do ++ll; while (__sort_lt(*ll, *low)); \ + do --hh; while (__sort_lt(*low, *hh)); \ + if (hh < ll) break; \ + KSORT_SWAP(type_t, *ll, *hh); \ + } \ + KSORT_SWAP(type_t, *low, *hh); \ + if (hh <= k) low = ll; \ + if (hh >= k) high = hh - 1; \ + } \ + } + +#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t) +#define ks_introsort(name, n, a) ks_introsort_##name(n, a) +#define ks_combsort(name, n, a) ks_combsort_##name(n, a) +#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a) +#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a) +#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a) +#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k) + +#define ks_lt_generic(a, b) ((a) < (b)) +#define ks_lt_str(a, b) (strcmp((a), (b)) < 0) + +typedef const char *ksstr_t; + +#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic) +#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str) + +#endif diff --git a/misc/Makefile b/misc/Makefile new file mode 100644 index 0000000..8a38f54 --- /dev/null +++ b/misc/Makefile @@ -0,0 +1,52 @@ +CC= gcc +CXX= g++ +CFLAGS= -g -Wall -O2 -m64 #-arch ppc +CXXFLAGS= $(CFLAGS) +DFLAGS= #-D_FILE_OFFSET_BITS=64 +OBJS= +PROG= faidx md5sum-lite md5fa maq2sam-short maq2sam-long +INCLUDES= +LIBS= -lm -lz +SUBDIRS= . + +.SUFFIXES:.c .o + +.c.o: + $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ + +all:$(PROG) + +lib-recur all-recur clean-recur cleanlocal-recur install-recur: + @target=`echo $@ | sed s/-recur//`; \ + wdir=`pwd`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + cd $$subdir; \ + $(MAKE) CC="$(CC)" CXX="$(CXX)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \ + INCLUDES="$(INCLUDES)" $$target || exit 1; \ + cd $$wdir; \ + done; + +lib: + +faidx:../faidx.c ../faidx.h + $(CC) $(CFLAGS) -DFAIDX_MAIN -o $@ ../faidx.c + +md5fa:md5.o md5fa.o md5.h ../kseq.h + $(CC) $(CFLAGS) -o $@ md5.o md5fa.o -lz + +md5sum-lite:md5.c md5.h + $(CC) $(CFLAGS) -DMD5SUM_MAIN -o $@ md5.c + +maq2sam-short:maq2sam.c + $(CC) $(CFLAGS) -o $@ maq2sam.c -lz + +maq2sam-long:maq2sam.c + $(CC) $(CFLAGS) -DMAQ_LONGREADS -o $@ maq2sam.c -lz + +md5fa.o:md5.h md5fa.c + $(CC) $(CFLAGS) -c -I.. -o $@ md5fa.c + +cleanlocal: + rm -fr gmon.out *.o a.out *.dSYM $(PROG) *~ *.a + +clean:cleanlocal-recur diff --git a/misc/export2sam.pl b/misc/export2sam.pl new file mode 100755 index 0000000..ae82123 --- /dev/null +++ b/misc/export2sam.pl @@ -0,0 +1,107 @@ +#!/usr/bin/perl -w + +# Contact: lh3 +# Version: 0.1.0 + +use strict; +use warnings; +use Getopt::Std; + +&export2sam; +exit; + +sub export2sam { + my ($fh1, $fh2, $is_paired); + $is_paired = (@ARGV >= 2); + die("export2sam.pl []\n") if (@ARGV == 0); + open($fh1, $ARGV[0]) || die; + if ($is_paired) { + open($fh2, $ARGV[1]) || die; + } + # conversion table + my @conv_table; + for (-64..64) { + $conv_table[$_+64] = chr(int(33 + 10*log(1+10**($_/10.0))/log(10)+.499)); + } + # core loop + while (<$fh1>) { + my (@s1, @s2); + &export2sam_aux($_, \@s1, \@conv_table, $is_paired); + if ($is_paired) { + $_ = <$fh2>; + &export2sam_aux($_, \@s2, \@conv_table, $is_paired); + if (@s1 && @s2) { # then set mate coordinate + my $isize = 0; + if ($s1[2] ne '*' && $s1[2] eq $s2[2]) { # then calculate $isize + my $x1 = ($s1[1] & 0x10)? $s1[3] + length($s1[9]) : $s1[3]; + my $x2 = ($s2[1] & 0x10)? $s2[3] + length($s2[9]) : $s2[3]; + $isize = $x2 - $x1; + } + # update mate coordinate + if ($s2[2] ne '*') { + @s1[6..8] = (($s2[2] eq $s1[2])? "=" : $s2[2], $s2[3], $isize); + $s1[1] |= 0x20 if ($s2[1] & 0x10); + } else { + $s1[1] |= 0x8; + } + if ($s1[2] ne '*') { + @s2[6..8] = (($s1[2] eq $s2[2])? "=" : $s1[2], $s1[3], -$isize); + $s2[1] |= 0x20 if ($s1[1] & 0x10); + } else { + $s2[1] |= 0x8; + } + } + } + print join("\t", @s1), "\n" if (@s1); + print join("\t", @s2), "\n" if (@s2 && $is_paired); + } + close($fh1); + close($fh2) if ($is_paired); +} + +sub export2sam_aux { + my ($line, $s, $ct, $is_paired) = @_; + chomp($line); + my @t = split("\t", $line); + @$s = (); + return if ($t[21] ne 'Y'); + # read name + $s->[0] = $t[1]? "$t[0]_$t[1]:$t[2]:$t[3]:$t[4]:$t[5]" : "$t[0]:$t[2]:$t[3]:$t[4]:$t[5]"; + # initial flag (will be updated later) + $s->[1] = 0; + $s->[1] |= 1 | 1<<(5 + $t[7]) if ($is_paired); + # read & quality + $s->[9] = $t[8]; $s->[10] = $t[9]; + if ($t[13] eq 'R') { # then reverse the sequence and quality + $s->[9] = reverse($t[8]); + $s->[9] =~ tr/ACGTacgt/TGCAtgca/; + $s->[10] = reverse($t[9]); + } + $s->[10] =~ s/(.)/$ct->[ord($1)]/eg; # change coding + # cigar + $s->[5] = length($s->[9]) . "M"; + # coor + my $has_coor = 0; + $s->[2] = "*"; + if ($t[10] eq 'NM') { + $s->[1] |= 0x8; # unmapped + } elsif ($t[10] =~ /(\d+):(\d+):(\d+)/) { + $s->[1] |= 0x8; # TODO: should I set BAM_FUNMAP in this case? + push(@$s, "H0:i:$1", "H1:i:$2", "H2:i:$3") + } else { + $s->[2] = $t[10]; + $has_coor = 1; + } + $s->[3] = $has_coor? $t[12] : 0; + $s->[1] |= 0x10 if ($has_coor && $t[13] eq 'R'); + # mapQ (TODO: should I choose the larger between $t[15] and $t[16]?) + $s->[4] = 0; + $s->[4] = $t[15] if ($t[15] ne ''); + $s->[4] = $t[16] if ($t[16] ne '' && $s->[4] < $t[16]); + # mate coordinate + $s->[6] = '*'; $s->[7] = $s->[8] = 0; + # aux + push(@$s, "BC:Z:$t[6]") if ($t[6]); + push(@$s, "MD:Z:$t[14]") if ($has_coor); + push(@$s, "SM:i:$t[15]") if ($is_paired && $has_coor); +} diff --git a/misc/maq2sam.c b/misc/maq2sam.c new file mode 100644 index 0000000..e30aa92 --- /dev/null +++ b/misc/maq2sam.c @@ -0,0 +1,168 @@ +#include +#include +#include +#include +#include +#include + +//#define MAQ_LONGREADS + +#ifdef MAQ_LONGREADS +# define MAX_READLEN 128 +#else +# define MAX_READLEN 64 +#endif + +#define MAX_NAMELEN 36 +#define MAQMAP_FORMAT_OLD 0 +#define MAQMAP_FORMAT_NEW -1 + +#define PAIRFLAG_FF 0x01 +#define PAIRFLAG_FR 0x02 +#define PAIRFLAG_RF 0x04 +#define PAIRFLAG_RR 0x08 +#define PAIRFLAG_PAIRED 0x10 +#define PAIRFLAG_DIFFCHR 0x20 +#define PAIRFLAG_NOMATCH 0x40 +#define PAIRFLAG_SW 0x80 + +typedef struct +{ + uint8_t seq[MAX_READLEN]; /* the last base is the single-end mapping quality. */ + uint8_t size, map_qual, info1, info2, c[2], flag, alt_qual; + uint32_t seqid, pos; + int dist; + char name[MAX_NAMELEN]; +} maqmap1_t; + +typedef struct +{ + int format, n_ref; + char **ref_name; + uint64_t n_mapped_reads; + maqmap1_t *mapped_reads; +} maqmap_t; + +maqmap_t *maq_new_maqmap() +{ + maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t)); + mm->format = MAQMAP_FORMAT_NEW; + return mm; +} +void maq_delete_maqmap(maqmap_t *mm) +{ + int i; + if (mm == 0) return; + for (i = 0; i < mm->n_ref; ++i) + free(mm->ref_name[i]); + free(mm->ref_name); + free(mm->mapped_reads); + free(mm); +} +maqmap_t *maqmap_read_header(gzFile fp) +{ + maqmap_t *mm; + int k, len; + mm = maq_new_maqmap(); + gzread(fp, &mm->format, sizeof(int)); + if (mm->format != MAQMAP_FORMAT_NEW) { + if (mm->format > 0) { + fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n"); + exit(3); + } + assert(mm->format == MAQMAP_FORMAT_NEW); + } + gzread(fp, &mm->n_ref, sizeof(int)); + mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*)); + for (k = 0; k != mm->n_ref; ++k) { + gzread(fp, &len, sizeof(int)); + mm->ref_name[k] = (char*)malloc(len * sizeof(char)); + gzread(fp, mm->ref_name[k], len); + } + /* read number of mapped reads */ + gzread(fp, &mm->n_mapped_reads, sizeof(uint64_t)); + return mm; +} + +void maq2tam_core(gzFile fp) +{ + maqmap_t *mm; + maqmap1_t mm1, *m1; + int ret; + m1 = &mm1; + mm = maqmap_read_header(fp); + while ((ret = gzread(fp, m1, sizeof(maqmap1_t))) == sizeof(maqmap1_t)) { + int j, flag = 0; + if (m1->flag) flag |= 1; + if ((m1->flag&PAIRFLAG_PAIRED) || ((m1->flag&PAIRFLAG_SW) && m1->flag != 192)) flag |= 2; + if (m1->flag == 192) flag |= 4; + if (m1->flag == 64) flag |= 8; + if (m1->pos&1) flag |= 0x10; + if ((flag&1) && m1->dist != 0) { + int c; + if (m1->dist > 0) { + if (m1->flag&(PAIRFLAG_FF|PAIRFLAG_RF)) c = 0; + else if (m1->flag&(PAIRFLAG_FR|PAIRFLAG_RR)) c = 1; + else c = m1->pos&1; + } else { + if (m1->flag&(PAIRFLAG_FF|PAIRFLAG_FR)) c = 0; + else if (m1->flag&(PAIRFLAG_RF|PAIRFLAG_RR)) c = 1; + else c = m1->pos&1; + } + flag |= c; + } + if (flag) { + int l = strlen(m1->name); + if (m1->name[l-2] == '/') { + flag |= (m1->name[l-1] == '1')? 0x40 : 0x80; + m1->name[l-2] = '\0'; + } + } + printf("%s\t%d\t", m1->name, flag); + printf("%s\t%d\t", mm->ref_name[m1->seqid], (m1->pos>>1)+1); + if (m1->flag == 130) { + int c = (int8_t)m1->seq[MAX_READLEN-1]; + printf("%d\t", m1->alt_qual); + if (c == 0) printf("%dM\t", m1->size); + else { + if (c > 0) printf("%dM%dI%dM\t", m1->map_qual, c, m1->size - m1->map_qual - c); + else printf("%dM%dD%dM\t", m1->map_qual, -c, m1->size - m1->map_qual); + } + } else { + if (flag&4) printf("0\t*\t"); + else printf("%d\t%dM\t", m1->map_qual, m1->size); + } + printf("*\t0\t%d\t", m1->dist); + for (j = 0; j != m1->size; ++j) { + if (m1->seq[j] == 0) putchar('N'); + else putchar("ACGT"[m1->seq[j]>>6&3]); + } + putchar('\t'); + for (j = 0; j != m1->size; ++j) + putchar((m1->seq[j]&0x3f) + 33); + putchar('\t'); + if (flag&4) { + printf("MF:i:%d\n", m1->flag); + } else { + printf("MF:i:%d\t", m1->flag); + if (m1->flag) printf("Aq:i:%d\t", m1->alt_qual); + printf("NM:i:%d\tUQ:i:%d\tH0:i:%d\tH1:i:%d\n", m1->info1&0xf, m1->info2, m1->c[0], m1->c[1]); + } + } + if (ret > 0) + fprintf(stderr, "Truncated! Continue anyway.\n"); + maq_delete_maqmap(mm); +} + +int main(int argc, char *argv[]) +{ + gzFile fp; + if (argc == 1) { + fprintf(stderr, "Usage: maq2tam \n"); + return 1; + } + fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r"); + maq2tam_core(fp); + gzclose(fp); + return 0; +} diff --git a/misc/md5.c b/misc/md5.c new file mode 100644 index 0000000..ccead0e --- /dev/null +++ b/misc/md5.c @@ -0,0 +1,307 @@ +/* + ********************************************************************** + ** md5.c ** + ** RSA Data Security, Inc. MD5 Message Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 1/91 SRD,AJ,BSK,JT Reference C Version ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +#include "md5.h" + +/* forward declaration */ +static void Transform (); + +static unsigned char PADDING[64] = { + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* F, G and H are basic MD5 functions: selection, majority, parity */ +#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) +#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | (~z))) + +/* ROTATE_LEFT rotates x left n bits */ +#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) + +/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */ +/* Rotation is separate from addition to prevent recomputation */ +#define FF(a, b, c, d, x, s, ac) \ + {(a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define GG(a, b, c, d, x, s, ac) \ + {(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define HH(a, b, c, d, x, s, ac) \ + {(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } +#define II(a, b, c, d, x, s, ac) \ + {(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \ + (a) = ROTATE_LEFT ((a), (s)); \ + (a) += (b); \ + } + +void MD5Init (mdContext) +MD5_CTX *mdContext; +{ + mdContext->i[0] = mdContext->i[1] = (UINT4)0; + + /* Load magic initialization constants. + */ + mdContext->buf[0] = (UINT4)0x67452301; + mdContext->buf[1] = (UINT4)0xefcdab89; + mdContext->buf[2] = (UINT4)0x98badcfe; + mdContext->buf[3] = (UINT4)0x10325476; +} + +void MD5Update (mdContext, inBuf, inLen) +MD5_CTX *mdContext; +unsigned char *inBuf; +unsigned int inLen; +{ + UINT4 in[16]; + int mdi; + unsigned int i, ii; + + /* compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* update number of bits */ + if ((mdContext->i[0] + ((UINT4)inLen << 3)) < mdContext->i[0]) + mdContext->i[1]++; + mdContext->i[0] += ((UINT4)inLen << 3); + mdContext->i[1] += ((UINT4)inLen >> 29); + + while (inLen--) { + /* add new character to buffer, increment mdi */ + mdContext->in[mdi++] = *inBuf++; + + /* transform if necessary */ + if (mdi == 0x40) { + for (i = 0, ii = 0; i < 16; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii+3]) << 24) | + (((UINT4)mdContext->in[ii+2]) << 16) | + (((UINT4)mdContext->in[ii+1]) << 8) | + ((UINT4)mdContext->in[ii]); + Transform (mdContext->buf, in); + mdi = 0; + } + } +} + +void MD5Final (mdContext) +MD5_CTX *mdContext; +{ + UINT4 in[16]; + int mdi; + unsigned int i, ii; + unsigned int padLen; + + /* save number of bits */ + in[14] = mdContext->i[0]; + in[15] = mdContext->i[1]; + + /* compute number of bytes mod 64 */ + mdi = (int)((mdContext->i[0] >> 3) & 0x3F); + + /* pad out to 56 mod 64 */ + padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi); + MD5Update (mdContext, PADDING, padLen); + + /* append length in bits and transform */ + for (i = 0, ii = 0; i < 14; i++, ii += 4) + in[i] = (((UINT4)mdContext->in[ii+3]) << 24) | + (((UINT4)mdContext->in[ii+2]) << 16) | + (((UINT4)mdContext->in[ii+1]) << 8) | + ((UINT4)mdContext->in[ii]); + Transform (mdContext->buf, in); + + /* store buffer in digest */ + for (i = 0, ii = 0; i < 4; i++, ii += 4) { + mdContext->digest[ii] = (unsigned char)(mdContext->buf[i] & 0xFF); + mdContext->digest[ii+1] = + (unsigned char)((mdContext->buf[i] >> 8) & 0xFF); + mdContext->digest[ii+2] = + (unsigned char)((mdContext->buf[i] >> 16) & 0xFF); + mdContext->digest[ii+3] = + (unsigned char)((mdContext->buf[i] >> 24) & 0xFF); + } +} + +/* Basic MD5 step. Transform buf based on in. + */ +static void Transform (buf, in) +UINT4 *buf; +UINT4 *in; +{ + UINT4 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; + + /* Round 1 */ +#define S11 7 +#define S12 12 +#define S13 17 +#define S14 22 + FF ( a, b, c, d, in[ 0], S11, 3614090360u); /* 1 */ + FF ( d, a, b, c, in[ 1], S12, 3905402710u); /* 2 */ + FF ( c, d, a, b, in[ 2], S13, 606105819u); /* 3 */ + FF ( b, c, d, a, in[ 3], S14, 3250441966u); /* 4 */ + FF ( a, b, c, d, in[ 4], S11, 4118548399u); /* 5 */ + FF ( d, a, b, c, in[ 5], S12, 1200080426u); /* 6 */ + FF ( c, d, a, b, in[ 6], S13, 2821735955u); /* 7 */ + FF ( b, c, d, a, in[ 7], S14, 4249261313u); /* 8 */ + FF ( a, b, c, d, in[ 8], S11, 1770035416u); /* 9 */ + FF ( d, a, b, c, in[ 9], S12, 2336552879u); /* 10 */ + FF ( c, d, a, b, in[10], S13, 4294925233u); /* 11 */ + FF ( b, c, d, a, in[11], S14, 2304563134u); /* 12 */ + FF ( a, b, c, d, in[12], S11, 1804603682u); /* 13 */ + FF ( d, a, b, c, in[13], S12, 4254626195u); /* 14 */ + FF ( c, d, a, b, in[14], S13, 2792965006u); /* 15 */ + FF ( b, c, d, a, in[15], S14, 1236535329u); /* 16 */ + + /* Round 2 */ +#define S21 5 +#define S22 9 +#define S23 14 +#define S24 20 + GG ( a, b, c, d, in[ 1], S21, 4129170786u); /* 17 */ + GG ( d, a, b, c, in[ 6], S22, 3225465664u); /* 18 */ + GG ( c, d, a, b, in[11], S23, 643717713u); /* 19 */ + GG ( b, c, d, a, in[ 0], S24, 3921069994u); /* 20 */ + GG ( a, b, c, d, in[ 5], S21, 3593408605u); /* 21 */ + GG ( d, a, b, c, in[10], S22, 38016083u); /* 22 */ + GG ( c, d, a, b, in[15], S23, 3634488961u); /* 23 */ + GG ( b, c, d, a, in[ 4], S24, 3889429448u); /* 24 */ + GG ( a, b, c, d, in[ 9], S21, 568446438u); /* 25 */ + GG ( d, a, b, c, in[14], S22, 3275163606u); /* 26 */ + GG ( c, d, a, b, in[ 3], S23, 4107603335u); /* 27 */ + GG ( b, c, d, a, in[ 8], S24, 1163531501u); /* 28 */ + GG ( a, b, c, d, in[13], S21, 2850285829u); /* 29 */ + GG ( d, a, b, c, in[ 2], S22, 4243563512u); /* 30 */ + GG ( c, d, a, b, in[ 7], S23, 1735328473u); /* 31 */ + GG ( b, c, d, a, in[12], S24, 2368359562u); /* 32 */ + + /* Round 3 */ +#define S31 4 +#define S32 11 +#define S33 16 +#define S34 23 + HH ( a, b, c, d, in[ 5], S31, 4294588738u); /* 33 */ + HH ( d, a, b, c, in[ 8], S32, 2272392833u); /* 34 */ + HH ( c, d, a, b, in[11], S33, 1839030562u); /* 35 */ + HH ( b, c, d, a, in[14], S34, 4259657740u); /* 36 */ + HH ( a, b, c, d, in[ 1], S31, 2763975236u); /* 37 */ + HH ( d, a, b, c, in[ 4], S32, 1272893353u); /* 38 */ + HH ( c, d, a, b, in[ 7], S33, 4139469664u); /* 39 */ + HH ( b, c, d, a, in[10], S34, 3200236656u); /* 40 */ + HH ( a, b, c, d, in[13], S31, 681279174u); /* 41 */ + HH ( d, a, b, c, in[ 0], S32, 3936430074u); /* 42 */ + HH ( c, d, a, b, in[ 3], S33, 3572445317u); /* 43 */ + HH ( b, c, d, a, in[ 6], S34, 76029189u); /* 44 */ + HH ( a, b, c, d, in[ 9], S31, 3654602809u); /* 45 */ + HH ( d, a, b, c, in[12], S32, 3873151461u); /* 46 */ + HH ( c, d, a, b, in[15], S33, 530742520u); /* 47 */ + HH ( b, c, d, a, in[ 2], S34, 3299628645u); /* 48 */ + + /* Round 4 */ +#define S41 6 +#define S42 10 +#define S43 15 +#define S44 21 + II ( a, b, c, d, in[ 0], S41, 4096336452u); /* 49 */ + II ( d, a, b, c, in[ 7], S42, 1126891415u); /* 50 */ + II ( c, d, a, b, in[14], S43, 2878612391u); /* 51 */ + II ( b, c, d, a, in[ 5], S44, 4237533241u); /* 52 */ + II ( a, b, c, d, in[12], S41, 1700485571u); /* 53 */ + II ( d, a, b, c, in[ 3], S42, 2399980690u); /* 54 */ + II ( c, d, a, b, in[10], S43, 4293915773u); /* 55 */ + II ( b, c, d, a, in[ 1], S44, 2240044497u); /* 56 */ + II ( a, b, c, d, in[ 8], S41, 1873313359u); /* 57 */ + II ( d, a, b, c, in[15], S42, 4264355552u); /* 58 */ + II ( c, d, a, b, in[ 6], S43, 2734768916u); /* 59 */ + II ( b, c, d, a, in[13], S44, 1309151649u); /* 60 */ + II ( a, b, c, d, in[ 4], S41, 4149444226u); /* 61 */ + II ( d, a, b, c, in[11], S42, 3174756917u); /* 62 */ + II ( c, d, a, b, in[ 2], S43, 718787259u); /* 63 */ + II ( b, c, d, a, in[ 9], S44, 3951481745u); /* 64 */ + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* lh3: the following code is added by me */ + +#ifdef MD5SUM_MAIN +#include +#include +#include +#define HEX_STR "0123456789abcdef" + +static void md5_one(const char *fn) +{ + unsigned char buf[4096]; + MD5_CTX md5; + int l; + FILE *fp; + + fp = strcmp(fn, "-")? fopen(fn, "r") : stdin; + if (fp == 0) { + fprintf(stderr, "md5sum: %s: No such file or directory\n", fn); + exit(1); + } + MD5Init(&md5); + while ((l = fread(buf, 1, 4096, fp)) > 0) + MD5Update(&md5, buf, l); + MD5Final(&md5); + if (fp != stdin) fclose(fp); + for (l = 0; l < 16; ++l) + printf("%c%c", HEX_STR[md5.digest[l]>>4&0xf], HEX_STR[md5.digest[l]&0xf]); + printf(" %s\n", fn); +} +int main(int argc, char *argv[]) +{ + int i; + if (argc == 1) md5_one("-"); + else for (i = 1; i < argc; ++i) md5_one(argv[i]); + return 0; +} +#endif diff --git a/misc/md5.h b/misc/md5.h new file mode 100644 index 0000000..678ac27 --- /dev/null +++ b/misc/md5.h @@ -0,0 +1,68 @@ +/* + ********************************************************************** + ** md5.h -- Header file for implementation of MD5 ** + ** RSA Data Security, Inc. MD5 Message Digest Algorithm ** + ** Created: 2/17/90 RLR ** + ** Revised: 12/27/90 SRD,AJ,BSK,JT Reference C version ** + ** Revised (for MD5): RLR 4/27/91 ** + ** -- G modified to have y&~z instead of y&z ** + ** -- FF, GG, HH modified to add in last register done ** + ** -- Access pattern: round 2 works mod 5, round 3 works mod 3 ** + ** -- distinct additive constant for each step ** + ** -- round 4 added, working mod 7 ** + ********************************************************************** + */ + +/* + ********************************************************************** + ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** + ** ** + ** License to copy and use this software is granted provided that ** + ** it is identified as the "RSA Data Security, Inc. MD5 Message ** + ** Digest Algorithm" in all material mentioning or referencing this ** + ** software or this function. ** + ** ** + ** License is also granted to make and use derivative works ** + ** provided that such works are identified as "derived from the RSA ** + ** Data Security, Inc. MD5 Message Digest Algorithm" in all ** + ** material mentioning or referencing the derived work. ** + ** ** + ** RSA Data Security, Inc. makes no representations concerning ** + ** either the merchantability of this software or the suitability ** + ** of this software for any particular purpose. It is provided "as ** + ** is" without express or implied warranty of any kind. ** + ** ** + ** These notices must be retained in any copies of any part of this ** + ** documentation and/or software. ** + ********************************************************************** + */ + +#ifndef MD5_H +#define MD5_H + +#include + +/* typedef a 32 bit type */ +typedef uint32_t UINT4; + +/* Data structure for MD5 (Message Digest) computation */ +typedef struct { + UINT4 i[2]; /* number of _bits_ handled mod 2^64 */ + UINT4 buf[4]; /* scratch buffer */ + unsigned char in[64]; /* input buffer */ + unsigned char digest[16]; /* actual digest after MD5Final call */ +} MD5_CTX; + +#ifdef __cplusplus +extern "C" { +#endif + + void MD5Init(MD5_CTX *mdContext); + void MD5Update(MD5_CTX *mdContext, unsigned char *inBuf, unsigned intinLen); + void MD5Final(MD5_CTX *mdContext); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/misc/md5fa.c b/misc/md5fa.c new file mode 100644 index 0000000..c41db2d --- /dev/null +++ b/misc/md5fa.c @@ -0,0 +1,58 @@ +#include +#include +#include "md5.h" +#include "kseq.h" + +#define HEX_STR "0123456789abcdef" + +KSEQ_INIT(gzFile, gzread) + +static void md5_one(const char *fn) +{ + MD5_CTX md5_one, md5_all; + int l, i, k; + gzFile fp; + kseq_t *seq; + unsigned char unordered[16]; + + for (l = 0; l < 16; ++l) unordered[l] = 0; + fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); + if (fp == 0) { + fprintf(stderr, "md5fa: %s: No such file or directory\n", fn); + exit(1); + } + + MD5Init(&md5_all); + seq = kseq_init(fp); + while ((l = kseq_read(seq)) >= 0) { + for (i = k = 0; i < seq->seq.l; ++i) { + if (islower(seq->seq.s[i])) seq->seq.s[k++] = toupper(seq->seq.s[i]); + else if (isupper(seq->seq.s[i])) seq->seq.s[k++] = seq->seq.s[i]; + } + MD5Init(&md5_one); + MD5Update(&md5_one, (unsigned char*)seq->seq.s, k); + MD5Final(&md5_one); + for (l = 0; l < 16; ++l) { + printf("%c%c", HEX_STR[md5_one.digest[l]>>4&0xf], HEX_STR[md5_one.digest[l]&0xf]); + unordered[l] ^= md5_one.digest[l]; + } + printf(" %s %s\n", fn, seq->name.s); + MD5Update(&md5_all, (unsigned char*)seq->seq.s, k); + } + MD5Final(&md5_all); + kseq_destroy(seq); + for (l = 0; l < 16; ++l) + printf("%c%c", HEX_STR[md5_all.digest[l]>>4&0xf], HEX_STR[md5_all.digest[l]&0xf]); + printf(" %s >ordered\n", fn); + for (l = 0; l < 16; ++l) + printf("%c%c", HEX_STR[unordered[l]>>4&0xf], HEX_STR[unordered[l]&0xf]); + printf(" %s >unordered\n", fn); +} + +int main(int argc, char *argv[]) +{ + int i; + if (argc == 1) md5_one("-"); + else for (i = 1; i < argc; ++i) md5_one(argv[i]); + return 0; +} diff --git a/razf.c b/razf.c new file mode 100644 index 0000000..6611f0b --- /dev/null +++ b/razf.c @@ -0,0 +1,647 @@ +/* + * RAZF : Random Access compressed(Z) File + * Version: 1.0 + * Release Date: 2008-10-27 + * + * Copyright 2008, Jue Ruan , Heng Li + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * To compile razf.c, zlib-1.2.3(or greater) is required. + */ + +#include +#include +#include "razf.h" + +static inline uint32_t byte_swap_4(uint32_t v){ + v = ((v & 0x0000FFFFU) << 16) | (v >> 16); + return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); +} + +static inline uint64_t byte_swap_8(uint64_t v){ + v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); + v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); + return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); +} + +static inline int is_big_endian(){ + int x = 0x01; + char *c = (char*)&x; + return (c[0] != 0x01); +} + +static void add_zindex(RAZF *rz, int64_t in, int64_t out){ + if(rz->index->size == rz->index->cap){ + rz->index->cap = rz->index->cap * 1.5 + 2; + rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap); + rz->index->bin_offsets = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1)); + } + if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out; + rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE]; + rz->index->size ++; +} + +static void save_zindex(RAZF *rz, int fd){ + int32_t i, v32; + int is_be; + is_be = is_big_endian(); + if(is_be) write(fd, &rz->index->size, sizeof(int)); + else { + v32 = byte_swap_4((uint32_t)rz->index->size); + write(fd, &v32, sizeof(uint32_t)); + } + v32 = rz->index->size / RZ_BIN_SIZE + 1; + if(!is_be){ + for(i=0;iindex->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); + for(i=0;iindex->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); + } + write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); + write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size); +} + +static void load_zindex(RAZF *rz, int fd){ + int32_t i, v32; + int is_be; + if(!rz->load_index) return; + if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex)); + is_be = is_big_endian(); + read(fd, &rz->index->size, sizeof(int)); + if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size); + rz->index->cap = rz->index->size; + v32 = rz->index->size / RZ_BIN_SIZE + 1; + rz->index->bin_offsets = malloc(sizeof(int64_t) * v32); + read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); + rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size); + read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size); + if(!is_be){ + for(i=0;iindex->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); + for(i=0;iindex->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); + } +} + +static RAZF* razf_open_w(int fd){ + RAZF *rz; + rz = calloc(1, sizeof(RAZF)); + rz->mode = 'w'; + rz->filedes = fd; + rz->stream = calloc(sizeof(z_stream), 1); + rz->inbuf = malloc(RZ_BUFFER_SIZE); + rz->outbuf = malloc(RZ_BUFFER_SIZE); + rz->index = calloc(sizeof(ZBlockIndex), 1); + deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); + rz->stream->avail_out = RZ_BUFFER_SIZE; + rz->stream->next_out = rz->outbuf; + rz->header = calloc(sizeof(gz_header), 1); + rz->header->os = 0x03; //Unix + rz->header->text = 0; + rz->header->time = 0; + rz->header->extra = malloc(7); + strncpy((char*)rz->header->extra, "RAZF", 4); + rz->header->extra[4] = 1; // obsolete field + // block size = RZ_BLOCK_SIZE, Big-Endian + rz->header->extra[5] = RZ_BLOCK_SIZE >> 8; + rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF; + rz->header->extra_len = 7; + rz->header->name = rz->header->comment = 0; + rz->header->hcrc = 0; + deflateSetHeader(rz->stream, rz->header); + rz->block_pos = rz->block_off = 0; + return rz; +} + +static void _razf_write(RAZF* rz, const void *data, int size){ + int tout; + rz->stream->avail_in = size; + rz->stream->next_in = (void*)data; + while(1){ + tout = rz->stream->avail_out; + deflate(rz->stream, Z_NO_FLUSH); + rz->out += tout - rz->stream->avail_out; + if(rz->stream->avail_out) break; + write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); + rz->stream->avail_out = RZ_BUFFER_SIZE; + rz->stream->next_out = rz->outbuf; + if(rz->stream->avail_in == 0) break; + }; + rz->in += size - rz->stream->avail_in; + rz->block_off += size - rz->stream->avail_in; +} + +static void razf_flush(RAZF *rz){ + uint32_t tout; + if(rz->buf_len){ + _razf_write(rz, rz->inbuf, rz->buf_len); + rz->buf_off = rz->buf_len = 0; + } + if(rz->stream->avail_out){ + write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); + rz->stream->avail_out = RZ_BUFFER_SIZE; + rz->stream->next_out = rz->outbuf; + } + while(1){ + tout = rz->stream->avail_out; + deflate(rz->stream, Z_FULL_FLUSH); + rz->out += tout - rz->stream->avail_out; + if(rz->stream->avail_out == 0){ + write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); + rz->stream->avail_out = RZ_BUFFER_SIZE; + rz->stream->next_out = rz->outbuf; + } else break; + } + rz->block_pos = rz->out; + rz->block_off = 0; +} + +static void razf_end_flush(RAZF *rz){ + uint32_t tout; + if(rz->buf_len){ + _razf_write(rz, rz->inbuf, rz->buf_len); + rz->buf_off = rz->buf_len = 0; + } + while(1){ + tout = rz->stream->avail_out; + deflate(rz->stream, Z_FINISH); + rz->out += tout - rz->stream->avail_out; + if(rz->stream->avail_out < RZ_BUFFER_SIZE){ + write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); + rz->stream->avail_out = RZ_BUFFER_SIZE; + rz->stream->next_out = rz->outbuf; + } else break; + } +} + +static void _razf_buffered_write(RAZF *rz, const void *data, int size){ + int i, n; + while(1){ + if(rz->buf_len == RZ_BUFFER_SIZE){ + _razf_write(rz, rz->inbuf, rz->buf_len); + rz->buf_len = 0; + } + if(size + rz->buf_len < RZ_BUFFER_SIZE){ + for(i=0;iinbuf + rz->buf_len)[i] = ((char*)data)[i]; + rz->buf_len += size; + return; + } else { + n = RZ_BUFFER_SIZE - rz->buf_len; + for(i=0;iinbuf + rz->buf_len)[i] = ((char*)data)[i]; + size -= n; + data += n; + rz->buf_len += n; + } + } +} + +int razf_write(RAZF* rz, const void *data, int size){ + int ori_size, n; + int64_t next_block; + ori_size = size; + next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE; + while(rz->in + rz->buf_len + size >= next_block){ + n = next_block - rz->in - rz->buf_len; + _razf_buffered_write(rz, data, n); + data += n; + size -= n; + razf_flush(rz); + add_zindex(rz, rz->in, rz->out); + next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE; + } + _razf_buffered_write(rz, data, size); + return ori_size; +} + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xE0 /* bits 5..7: reserved */ + +static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){ + int method, flags, n, len; + if(size < 2) return 0; + if(data[0] != 0x1f || data[1] != 0x8b) return 0; + if(size < 4) return 0; + method = data[2]; + flags = data[3]; + if(method != Z_DEFLATED || (flags & RESERVED)) return 0; + n = 4 + 6; // Skip 6 bytes + *extra_off = n + 2; + *extra_len = 0; + if(flags & EXTRA_FIELD){ + if(size < n + 2) return 0; + len = ((int)data[n + 1] << 8) | data[n]; + n += 2; + *extra_off = n; + while(len){ + if(n >= size) return 0; + n ++; + len --; + } + *extra_len = n - (*extra_off); + } + if(flags & ORIG_NAME) while(n < size && data[n++]); + if(flags & COMMENT) while(n < size && data[n++]); + if(flags & HEAD_CRC){ + if(n + 2 > size) return 0; + n += 2; + } + return n; +} + +static RAZF* razf_open_r(int fd, int _load_index){ + RAZF *rz; + int ext_off, ext_len; + int n, is_be, ret; + int64_t end; + unsigned char c[] = "RAZF"; + rz = calloc(1, sizeof(RAZF)); + rz->mode = 'r'; + rz->filedes = fd; + rz->stream = calloc(sizeof(z_stream), 1); + rz->inbuf = malloc(RZ_BUFFER_SIZE); + rz->outbuf = malloc(RZ_BUFFER_SIZE); + rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL; + n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); + ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len); + if(ret == 0){ + PLAIN_FILE: + rz->in = n; + rz->file_type = FILE_TYPE_PLAIN; + memcpy(rz->outbuf, rz->inbuf, n); + rz->buf_len = n; + free(rz->stream); + rz->stream = NULL; + return rz; + } + rz->header_size = ret; + ret = inflateInit2(rz->stream, -WINDOW_BITS); + if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;} + rz->stream->avail_in = n - rz->header_size; + rz->stream->next_in = rz->inbuf + rz->header_size; + rz->stream->avail_out = RZ_BUFFER_SIZE; + rz->stream->next_out = rz->outbuf; + rz->file_type = FILE_TYPE_GZ; + rz->in = rz->header_size; + rz->block_pos = rz->header_size; + rz->next_block_pos = rz->header_size; + rz->block_off = 0; + if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz; + if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){ + fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file. in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__); + return rz; + } + rz->load_index = _load_index; + rz->file_type = FILE_TYPE_RZ; + if(lseek(fd, -16, SEEK_END) == -1){ + UNSEEKABLE: + rz->seekable = 0; + rz->index = NULL; + rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL; + } else { + is_be = is_big_endian(); + rz->seekable = 1; + read(fd, &end, sizeof(int64_t)); + if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end); + else rz->src_end = end; + read(fd, &end, sizeof(int64_t)); + if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end); + else rz->end = end; + if(n > rz->end){ + rz->stream->avail_in -= n - rz->end; + n = rz->end; + } + if(rz->end > rz->src_end){ + lseek(fd, rz->in, SEEK_SET); + goto UNSEEKABLE; + } + if(lseek(fd, rz->end, SEEK_SET) != rz->end){ + lseek(fd, rz->in, SEEK_SET); + goto UNSEEKABLE; + } + load_zindex(rz, fd); + lseek(fd, n, SEEK_SET); + } + return rz; +} + +RAZF* razf_dopen(int fd, const char *mode){ + if(strcasecmp(mode, "r") == 0) return razf_open_r(fd, 1); + else if(strcasecmp(mode, "w") == 0) return razf_open_w(fd); + else return NULL; +} + +RAZF* razf_dopen2(int fd, const char *mode) +{ + if(strcasecmp(mode, "r") == 0) return razf_open_r(fd, 0); + else if(strcasecmp(mode, "w") == 0) return razf_open_w(fd); + else return NULL; +} + +static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){ + int fd; + RAZF *rz; + if(strcasecmp(mode, "r") == 0){ + fd = open(filename, O_RDONLY); + rz = razf_open_r(fd, _load_index); + } else if(strcasecmp(mode, "w") == 0){ + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); + rz = razf_open_w(fd); + } else return NULL; + return rz; +} + +RAZF* razf_open(const char *filename, const char *mode){ + return _razf_open(filename, mode, 1); +} + +RAZF* razf_open2(const char *filename, const char *mode){ + return _razf_open(filename, mode, 0); +} + +int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){ + int64_t n; + if(rz->mode != 'r' && rz->mode != 'R') return 0; + switch(rz->file_type){ + case FILE_TYPE_PLAIN: + if(rz->end == 0x7fffffffffffffffLL){ + if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0; + rz->end = lseek(rz->filedes, 0, SEEK_END); + lseek(rz->filedes, n, SEEK_SET); + } + *u_size = *c_size = rz->end; + return 1; + case FILE_TYPE_GZ: + return 0; + case FILE_TYPE_RZ: + if(rz->src_end == rz->end) return 0; + *u_size = rz->src_end; + *c_size = rz->end; + return 1; + default: + return 0; + } +} + +static int _razf_read(RAZF* rz, void *data, int size){ + int ret, tin; + if(rz->z_eof || rz->z_err) return 0; + if (rz->file_type == FILE_TYPE_PLAIN) { + ret = read(rz->filedes, data, size); + if (ret == 0) rz->z_eof = 1; + return ret; + } + rz->stream->avail_out = size; + rz->stream->next_out = data; + while(rz->stream->avail_out){ + if(rz->stream->avail_in == 0){ + if(rz->in >= rz->end){ rz->z_eof = 1; break; } + if(rz->end - rz->in < RZ_BUFFER_SIZE){ + rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in); + } else { + rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); + } + if(rz->stream->avail_in == 0){ + rz->z_eof = 1; + break; + } + rz->stream->next_in = rz->inbuf; + } + tin = rz->stream->avail_in; + ret = inflate(rz->stream, Z_BLOCK); + rz->in += tin - rz->stream->avail_in; + if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){ + fprintf(stderr, "[_razf_read] inflate error: %d (at %s:%d)\n", ret, __FILE__, __LINE__); + rz->z_err = 1; + break; + } + if(ret == Z_STREAM_END){ + rz->z_eof = 1; + break; + } + if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){ + rz->buf_flush = 1; + rz->next_block_pos = rz->in; + break; + } + } + return size - rz->stream->avail_out; +} + +int razf_read(RAZF *rz, void *data, int size){ + int ori_size, i; + ori_size = size; + while(size > 0){ + if(rz->buf_len){ + if(size < rz->buf_len){ + for(i=0;ioutbuf + rz->buf_off)[i]; + rz->buf_off += size; + rz->buf_len -= size; + data += size; + rz->block_off += size; + size = 0; + break; + } else { + for(i=0;ibuf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i]; + data += rz->buf_len; + size -= rz->buf_len; + rz->block_off += rz->buf_len; + rz->buf_off = 0; + rz->buf_len = 0; + if(rz->buf_flush){ + rz->block_pos = rz->next_block_pos; + rz->block_off = 0; + rz->buf_flush = 0; + } + } + } else if(rz->buf_flush){ + rz->block_pos = rz->next_block_pos; + rz->block_off = 0; + rz->buf_flush = 0; + } + if(rz->buf_flush) continue; + rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); + if(rz->z_eof && rz->buf_len == 0) break; + } + rz->out += ori_size - size; + return ori_size - size; +} + +int razf_skip(RAZF* rz, int size){ + int ori_size; + ori_size = size; + while(size > 0){ + if(rz->buf_len){ + if(size < rz->buf_len){ + rz->buf_off += size; + rz->buf_len -= size; + rz->block_off += size; + size = 0; + break; + } else { + size -= rz->buf_len; + rz->buf_off = 0; + rz->buf_len = 0; + rz->block_off += rz->buf_len; + if(rz->buf_flush){ + rz->block_pos = rz->next_block_pos; + rz->block_off = 0; + rz->buf_flush = 0; + } + } + } else if(rz->buf_flush){ + rz->block_pos = rz->next_block_pos; + rz->block_off = 0; + rz->buf_flush = 0; + } + if(rz->buf_flush) continue; + rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); + if(rz->z_eof) break; + } + rz->out += ori_size - size; + return ori_size - size; +} + +static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){ + lseek(rz->filedes, in, SEEK_SET); + rz->in = in; + rz->out = out; + rz->block_pos = in; + rz->next_block_pos = in; + rz->block_off = 0; + rz->buf_flush = 0; + rz->z_eof = rz->z_err = 0; + inflateReset(rz->stream); + rz->stream->avail_in = 0; + rz->buf_off = rz->buf_len = 0; +} + +int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){ + int64_t pos; + rz->z_eof = 0; + if(rz->file_type == FILE_TYPE_PLAIN){ + rz->buf_off = rz->buf_len = 0; + pos = block_start + block_offset; + pos = lseek(rz->filedes, pos, SEEK_SET); + rz->out = rz->in = pos; + return pos; + } + if(block_start == rz->block_pos && block_offset >= rz->block_off) { + block_offset -= rz->block_off; + goto SKIP; // Needn't reset inflate + } + if(block_start == 0) block_start = rz->header_size; // Automaticly revist wrong block_start + _razf_reset_read(rz, block_start, 0); + SKIP: + if(block_offset) razf_skip(rz, block_offset); + return rz->block_off; +} + +int64_t razf_seek(RAZF* rz, int64_t pos, int where){ + int64_t idx; + int64_t seek_pos, new_out; + rz->z_eof = 0; + if (where == SEEK_CUR) pos += rz->out; + else if (where == SEEK_END) pos += rz->src_end; + if(rz->file_type == FILE_TYPE_PLAIN){ + seek_pos = lseek(rz->filedes, pos, SEEK_SET); + rz->buf_off = rz->buf_len = 0; + rz->out = rz->in = seek_pos; + return seek_pos; + } else if(rz->file_type == FILE_TYPE_GZ){ + if(pos >= rz->out) goto SKIP; + return rz->out; + } + if(pos == rz->out) return pos; + if(pos > rz->src_end) return rz->out; + if(!rz->seekable || !rz->load_index){ + if(pos >= rz->out) goto SKIP; + } + idx = pos / RZ_BLOCK_SIZE - 1; + seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]); + new_out = (idx + 1) * RZ_BLOCK_SIZE; + if(pos > rz->out && new_out <= rz->out) goto SKIP; + _razf_reset_read(rz, seek_pos, new_out); + SKIP: + razf_skip(rz, (int)(pos - rz->out)); + return rz->out; +} + +uint64_t razf_tell2(RAZF *rz) +{ + /* + if (rz->load_index) { + int64_t idx, seek_pos; + idx = rz->out / RZ_BLOCK_SIZE - 1; + seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]); + if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off) + fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n", + (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off); + } + */ + return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff); +} + +int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where) +{ + if (where != SEEK_SET) return -1; + return razf_jump(rz, voffset>>16, voffset&0xffff); +} + +void razf_close(RAZF *rz){ + uint64_t v64; + if(rz->mode == 'w'){ + razf_end_flush(rz); + deflateEnd(rz->stream); + save_zindex(rz, rz->filedes); + if(is_big_endian()){ + write(rz->filedes, &rz->in, sizeof(int64_t)); + write(rz->filedes, &rz->out, sizeof(int64_t)); + } else { + v64 = byte_swap_8((uint64_t)rz->in); + write(rz->filedes, &v64, sizeof(int64_t)); + v64 = byte_swap_8((uint64_t)rz->out); + write(rz->filedes, &v64, sizeof(int64_t)); + } + } else if(rz->mode == 'r'){ + if(rz->stream) inflateEnd(rz->stream); + } + if(rz->inbuf) free(rz->inbuf); + if(rz->outbuf) free(rz->outbuf); + if(rz->header){ + free(rz->header->extra); + free(rz->header->name); + free(rz->header->comment); + free(rz->header); + } + if(rz->index){ + free(rz->index->bin_offsets); + free(rz->index->cell_offsets); + free(rz->index); + } + free(rz->stream); + close(rz->filedes); + free(rz); +} diff --git a/razf.h b/razf.h new file mode 100644 index 0000000..d391776 --- /dev/null +++ b/razf.h @@ -0,0 +1,117 @@ + /*- + * RAZF : Random Access compressed(Z) File + * Version: 1.0 + * Release Date: 2008-10-27 + * + * Copyright 2008, Jue Ruan , Heng Li + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#ifndef __RAZF_RJ_H +#define __RAZF_RJ_H + +#include +#include +#include "zlib.h" +#include "zutil.h" + +#define WINDOW_BITS 15 + +#ifndef RZ_BLOCK_SIZE +#define RZ_BLOCK_SIZE (1<mode from HEAD to TYPE after call inflateReset */ + int buf_off, buf_len; + int z_err, z_eof; + int seekable; + /* Indice where the source is seekable */ + int load_index; + /* set has_index to 0 in mode 'w', then index will be discarded */ +} RAZF; + +#ifdef __cplusplus +extern "C" { +#endif + + RAZF* razf_dopen(int data_fd, const char *mode); + RAZF *razf_open(const char *fn, const char *mode); + int razf_write(RAZF* rz, const void *data, int size); + int razf_read(RAZF* rz, void *data, int size); + int64_t razf_seek(RAZF* rz, int64_t pos, int where); + void razf_close(RAZF* rz); + +#define razf_tell(rz) ((rz)->out) + + RAZF* razf_open2(const char *filename, const char *mode); + RAZF* razf_dopen2(int fd, const char *mode); + uint64_t razf_tell2(RAZF *rz); + int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/razip.c b/razip.c new file mode 100644 index 0000000..0b67c6c --- /dev/null +++ b/razip.c @@ -0,0 +1,139 @@ +#include +#include +#include +#include +#include "razf.h" + +#define WINDOW_SIZE 4096 + +static int razf_main_usage() +{ + printf("\n"); + printf("Usage: razip [options] [file] ...\n\n"); + printf("Options: -c write on standard output, keep original files unchanged\n"); + printf(" -d decompress\n"); + printf(" -l list compressed file contents\n"); + printf(" -b INT decompress at INT position in the uncompressed file\n"); + printf(" -s INT decompress INT bytes in the uncompressed file\n"); + printf(" -h give this help\n"); + printf("\n"); + return 0; +} + +static int write_open(const char *fn, int is_forced) +{ + int fd = -1; + char c; + if (!is_forced) { + if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0644)) < 0 && errno == EEXIST) { + printf("razip: %s already exists; do you wish to overwrite (y or n)? ", fn); + scanf("%c", &c); + if (c != 'Y' && c != 'y') { + printf("razip: not overwritten\n"); + exit(1); + } + } + } + if (fd < 0) { + if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0) { + fprintf(stderr, "razip: %s: Fail to write\n", fn); + exit(1); + } + } + return fd; +} + +int main(int argc, char **argv) +{ + int c, compress, pstdout, is_forced; + RAZF *rz; + void *buffer; + long start, end, size; + + compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; + while((c = getopt(argc, argv, "cdlhfb:s:")) >= 0){ + switch(c){ + case 'h': return razf_main_usage(); + case 'd': compress = 0; break; + case 'c': pstdout = 1; break; + case 'l': compress = 2; break; + case 'b': start = atol(optarg); break; + case 's': size = atol(optarg); break; + case 'f': is_forced = 1; break; + } + } + if (size >= 0) end = start + size; + if(end >= 0 && end < start){ + fprintf(stderr, " -- Illegal region: [%ld, %ld] --\n", start, end); + return 1; + } + if(compress == 1){ + int f_src, f_dst = -1; + if(argc > optind){ + if((f_src = open(argv[optind], O_RDONLY)) < 0){ + fprintf(stderr, " -- Cannot open file: %s --\n", argv[optind]); + return 1; + } + if(pstdout){ + f_dst = fileno(stdout); + } else { + char *name = malloc(sizeof(strlen(argv[optind]) + 5)); + strcpy(name, argv[optind]); + strcat(name, ".rz"); + f_dst = write_open(name, is_forced); + if (f_dst < 0) return 1; + free(name); + } + } else if(pstdout){ + f_src = fileno(stdin); + f_dst = fileno(stdout); + } else return razf_main_usage(); + rz = razf_dopen(f_dst, "w"); + buffer = malloc(WINDOW_SIZE); + while((c = read(f_src, buffer, WINDOW_SIZE)) > 0) razf_write(rz, buffer, c); + razf_close(rz); // f_dst will be closed here + if (argc > optind) unlink(argv[optind]); + free(buffer); + close(f_src); + return 0; + } else { + if(argc <= optind) return razf_main_usage(); + if(compress == 2){ + rz = razf_open(argv[optind], "r"); + if(rz->file_type == FILE_TYPE_RZ) { + printf("%20s%20s%7s %s\n", "compressed", "uncompressed", "ratio", "name"); + printf("%20lld%20lld%6.1f%% %s\n", (long long)rz->end, (long long)rz->src_end, rz->end * 100.0f / rz->src_end, + argv[optind]); + } else fprintf(stdout, "%s is not a regular rz file\n", argv[optind]); + } else { + int f_dst; + if (argc > optind && !pstdout) { + char *name; + if (strstr(argv[optind], ".rz") - argv[optind] != strlen(argv[optind]) - 3) { + printf("razip: %s: unknown suffix -- ignored\n", argv[optind]); + return 1; + } + name = strdup(argv[optind]); + name[strlen(name) - 3] = '\0'; + f_dst = write_open(name, is_forced); + free(name); + } else f_dst = fileno(stdout); + rz = razf_open(argv[optind], "r"); + buffer = malloc(WINDOW_SIZE); + razf_seek(rz, start, SEEK_SET); + while(1){ + if(end < 0) c = razf_read(rz, buffer, WINDOW_SIZE); + else c = razf_read(rz, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); + if(c <= 0) break; + start += c; + write(f_dst, buffer, c); + if(end >= 0 && start >= end) break; + } + free(buffer); + if (!pstdout) unlink(argv[optind]); + } + razf_close(rz); + return 0; + } +} + diff --git a/samtools.1 b/samtools.1 new file mode 100644 index 0000000..91f627f --- /dev/null +++ b/samtools.1 @@ -0,0 +1,258 @@ +.TH samtools 1 "22 December 2008" "samtools-0.1.1" "Bioinformatics tools" +.SH NAME +.PP +samtools - Utilities for the Sequence Alignment/Map (SAM) format +.SH SYNOPSIS +.PP +samtools import ref_list.txt aln.sam.gz aln.bam +.PP +samtools sort aln.bam aln.sorted +.PP +samtools index aln.sorted.bam +.PP +samtools view aln.sorted.bam chr2:20,100,000-20,200,000 +.PP +samtools merge out.bam in1.bam in2.bam in3.bam +.PP +samtools faidx ref.fasta +.PP +samtools pileup -f ref.fasta aln.sorted.bam +.PP +samtools tview aln.sorted.bam ref.fasta + +.SH DESCRIPTION +.PP +Samtools is a set of utilities that manipulate alignments in the BAM +format. It imports from and exports to the SAM (Sequence +Alignment/Map) format, does sorting, merging and indexing, and +allows to retrieve reads in any regions swiftly. + +.SH COMMANDS AND OPTIONS +.TP 10 +.B import +samtools import + +Convert alignments in SAM format to BAM format. File +.I +is TAB-delimited. Each line must contain the reference name and the +length of the reference, one line for each distinct reference; +additional fields are ignored. This file also defines the order of the +reference sequences in sorting. File +.I +can be optionally compressed by zlib or gzip. A single hyphen is +recognized as stdin or stdout, depending on the context. + +.TP +.B sort +samtools sort [-n] [-m maxMem] + +Sort alignments based on the leftmost coordinate. File +.I .bam +will be created. This command may also create temporary files +.I .%d.bam +when the whole alignment cannot be fitted into memory (controlled by +option -m). + +.B OPTIONS: +.RS +.TP 8 +.B -n +Sort by read names rather than by chromosomal coordinates +.TP +.B -m INT +Approximately the maximum required memory. +.RE + +.TP +.B merge +samtools merge [-n] [...] + +Merge multiple sorted alignments. The header of +.I +will be copied to +.I +and the headers of other files will be ignored. + +.B OPTIONS: +.RS +.TP 8 +.B -n +The input alignments are sorted by read names rather than by chromosomal +coordinates +.RE + +.TP +.B index +samtools index + +Index sorted alignment for fast random access. Index file +.I .bai +will be created. + +.TP +.B view +samtools view [-b] [region1 [...]] + +Extract/print all or sub alignments in SAM or BAM format. If no region +is specified, all the alignments will be printed; otherwise only +alignments overlapping with the specified regions will be output. An +alignment may be given multiple times if it is overlapping several +regions. A region can be presented, for example, in the following +format: `chr2', `chr2:1000000' or `chr2:1,000,000-2,000,000'. + +.B OPTIONS: +.RS +.TP 8 +.B -b +Output in the BAM format. +.RE + +.TP +.B faidx +samtools faidx [region1 [...]] + +Index reference sequence in the FASTA format or extract subsequence from +indexed reference sequence. If no region is specified, +.B faidx +will index the file and create +.I .fai +on the disk. If regions are speficified, the subsequences will be +retrieved and printed to stdout in the FASTA format. The input file can +be compressed in the +.B RAZF +format. + +.TP +.B pileup +samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] +[-s] [-c] [-T theta] [-N nHap] [-r pairDiffRate] + +Print the alignment in the pileup format. In the pileup format, each +line represents a genomic position, consisting of chromosome name, +coordinate, reference base, read bases, read qualities and alignment +mapping qualities. Information on match, mismatch, indel, strand, +mapping quality and start and end of a read are all encoded at the read +base column. At this column, a dot stands for a match to the reference +base on the forward strand, a comma for a match on the reverse strand, +`ACGTN' for a mismatch on the forward strand and `acgtn' for a mismatch +on the reverse strand. A pattern `\\+[0-9]+[ACGTNacgtn]+' indicates +there is an insertion between this reference position and the next +reference position. The length of the insertion is given by the integer +in the pattern, followed by the inserted sequence. Similarly, a pattern +`-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference. Also at +the read base column, a symbol `^' marks the start of a read segment +which is a contiguous subsequence on the read separated by `N/S/H' CIGAR +operations. The ASCII of the character following `^' minus 33 gives the +mapping quality. A symbol `$' marks the end of a read segment. + +If option +.B -c +is applied, the consensus base, consensus quality, SNP quality and +maximum mapping quality of the reads covering the site will be inserted +between the `reference base' and the `read bases' columns. An indel +occupies an additional line. Each indel line consists of chromosome +name, coordinate, a star, top two high-scoring ins/del sequences, the +number of reads strongly supporting the first indel, the number of reads +strongly supporting the second indel, the number of reads that confer +little information on distinguishing indels and the number of reads that +contain indels different from the top two ones. + +.B OPTIONS: +.RS + +.TP 10 +.B -s +Print the mapping quality as the last column. This option makes the +output easier to parse, although this format is not space efficient. + +.TP +.B -f FILE +The reference sequence in the FASTA format. Index file +.I FILE.fai +will be created if +absent. + +.TP +.B -t FILE +List of reference names ane sequence lengths, in the format described +for the +.B import +command. If this option is present, samtools assumes the input +.I +is in SAM format; otherwise it assumes in BAM format. + +.TP +.B -l FILE +List of sites at which pileup is output. This file is space +delimited. The first two columns are required to be chromosome and +1-based coordinate. Additional columns are ignored. It is +recommended to use option +.B -s +together with +.B -l +as in the default format we may not know the mapping quality. + +.TP +.B -c +Call the consensus sequnce using MAQ consensus model. Options +.B -T, +.B -N +and +.B -r +are only effective when +.B -c +is in use. + +.TP +.B -T FLOAT +The theta parameter (error dependency coefficient) in the maq consensus +calling model [0.85] + +.TP +.B -N INT +Number of haplotypes in the sample (>=2) [2] + +.TP +.B -r FLOAT +Expected fraction of differences between a pair of haplotypes [0.001] + +.RE + +.TP +.B tview +samtools tview [ref.fasta] + +Text alignment viewer (based on the ncurses library). In the viewer, +press `?' for help and press `g' to check the alignment start from a +region in the format like `chr10:10,000,000'. Note that if the region +showed on the screen contains no mapped reads, a blank screen will be +seen. This is a known issue and will be improved later. + +.RE + +.SH LIMITATIONS +.PP +.IP o 2 +In general, more testing is needed to ensure there is no severe bug. +.IP o 2 +PCR duplicate removal has not been implemented. +.IP o 2 +Only MAQ->SAM converter is implemented. More converters are needed. +.IP o 2 +Reference sequence names and lengths are not acquired from the BAM/SAM header. +.IP o 2 +CIGAR operations N and P may not be properly handled. +.IP o 2 +There is a small known memory leak in the viewer. + +.SH AUTHOR +.PP +Heng Li from the Sanger Institute is the author of samtools. Bob +Handsaker from the Broad Institute implemented the BGZF library and Jue +Ruan from Beijing Genomics Institute wrote the RAZF library. Various +people in the 1000Genomes Project contributed to the SAM format +specification. + +.SH SEE ALSO +.PP +Samtools website: http://samtools.sourceforge.net diff --git a/source.dot b/source.dot new file mode 100644 index 0000000..cfa2222 --- /dev/null +++ b/source.dot @@ -0,0 +1,15 @@ +digraph { + faidx[label="faidx.c\n(faidx)"] + import[label="bam_import.c\n(import)"] + plcmd[label="bam_plcmd.c\n(pileup)"] + sort[label="bam_sort.c\n(sort, merge)"] + index[label="bam_index.c\n(index)"] + tview[label="bam_tview.c\n(tview)"] + "bam_aux.c" -> {"bam.c", import} + "bgzf.c" -> "bam.c" + "bam.c" -> {index, "bam_pileup.c", sort, import} + "bam_pileup.c" -> {"bam_lpileup.c", plcmd} + {"bam_lpileup.c", index, faidx, "bam_maqcns.c"} -> tview + {import, faidx, "bam_maqcns.c"} -> plcmd + {tview, plcmd, faidx, sort, import, index} -> "bamtk.c\n(view)" +} \ No newline at end of file diff --git a/zutil.h b/zutil.h new file mode 100644 index 0000000..b7d5eff --- /dev/null +++ b/zutil.h @@ -0,0 +1,269 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2005 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#define ZLIB_INTERNAL +#include "zlib.h" + +#ifdef STDC +# ifndef _WIN32_WCE +# include +# endif +# include +# include +#endif +#ifdef NO_ERRNO_H +# ifdef _WIN32_WCE + /* The Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. We rename it to + * avoid conflict with other libraries that use the same workaround. + */ +# define errno z_errno +# endif + extern int errno; +#else +# ifndef _WIN32_WCE +# include +# endif +#endif + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +extern const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = (char*)ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 0x01 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 0x02 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 0x05 +#endif + +#ifdef OS2 +# define OS_CODE 0x06 +# ifdef M_I86 + #include +# endif +#endif + +#if defined(MACOS) || defined(TARGET_OS_MAC) +# define OS_CODE 0x07 +# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fdopen */ +# else +# ifndef fdopen +# define fdopen(fd,mode) NULL /* No fdopen() */ +# endif +# endif +#endif + +#ifdef TOPS20 +# define OS_CODE 0x0a +#endif + +#ifdef WIN32 +# ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ +# define OS_CODE 0x0b +# endif +#endif + +#ifdef __50SERIES /* Prime/PRIMOS */ +# define OS_CODE 0x0f +#endif + +#if defined(_BEOS_) || defined(RISCOS) +# define fdopen(fd,mode) NULL /* No fdopen() */ +#endif + +#if (defined(_MSC_VER) && (_MSC_VER > 600)) +# if defined(_WIN32_WCE) +# define fdopen(fd,mode) NULL /* No fdopen() */ +# ifndef _PTRDIFF_T_DEFINED + typedef int ptrdiff_t; +# define _PTRDIFF_T_DEFINED +# endif +# else +# define fdopen(fd,type) _fdopen(fd,type) +# endif +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS + /* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 + /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# define vsnprintf _vsnprintf +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +#endif +#ifdef VMS +# define NO_vsnprintf +#endif + +#if defined(pyr) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); + extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); + extern void zmemzero OF((Bytef* dest, uInt len)); +#endif + +/* Diagnostic functions */ +#ifdef DEBUG +# include + extern int z_verbose; + extern void z_error OF((char *m)); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); +void zcfree OF((voidpf opaque, voidpf ptr)); + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +#endif /* ZUTIL_H */ -- 2.39.2