From 308da19f6dfbbf50bbce5aa87b32e2f269650c80 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 2 Dec 2013 19:45:39 +0000 Subject: [PATCH] added tests for slice_align git-svn-id: http://biopieces.googlecode.com/svn/trunk@2266 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/slice_align | 8 +++- bp_test/in/slice_align.in.1 | 80 +++++++++++++++++++++++++++++++++++ bp_test/in/slice_align.in.2 | 2 + bp_test/out/slice_align.out.1 | 80 +++++++++++++++++++++++++++++++++++ bp_test/out/slice_align.out.2 | 80 +++++++++++++++++++++++++++++++++++ bp_test/out/slice_align.out.3 | 80 +++++++++++++++++++++++++++++++++++ bp_test/test/test_slice_align | 32 ++++++++++++++ 7 files changed, 360 insertions(+), 2 deletions(-) create mode 100644 bp_test/in/slice_align.in.1 create mode 100644 bp_test/in/slice_align.in.2 create mode 100644 bp_test/out/slice_align.out.1 create mode 100644 bp_test/out/slice_align.out.2 create mode 100644 bp_test/out/slice_align.out.3 create mode 100755 bp_test/test/test_slice_align diff --git a/bp_bin/slice_align b/bp_bin/slice_align index 586c270..f63ac7e 100755 --- a/bp_bin/slice_align +++ b/bp_bin/slice_align @@ -24,7 +24,7 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Join sequences in the stream. +# Slice aligned sequences in the stream to obtain subsequences. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< @@ -60,13 +60,17 @@ else raise "either --beg/--end or --forward/--reverse|--reverse_rc must be specified" end +if options[:template_file] + template = Fasta.open(options[:template_file]).get_entry +end + Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| if record[:SEQ] entry = Seq.new(seq: record[:SEQ]) unless options[:beg] - compact = Seq.new(seq: entry.seq.dup) + compact = template ? template : Seq.new(seq: entry.seq.dup) compact.seq.delete! "-.~" fmatch = compact.patmatch(options[:forward], diff --git a/bp_test/in/slice_align.in.1 b/bp_test/in/slice_align.in.1 new file mode 100644 index 0000000..1aa1a32 --- /dev/null +++ b/bp_test/in/slice_align.in.1 @@ -0,0 +1,80 @@ +SEQ: A-TGAC-GCTGGCGGCATGCTTTACACATGCAAGTCGAACG-GCAGCGG-----GGGCTTCGGCCT----GCC-G--GCGAG-TGGCGAACGGGTGAGTA +SEQ_NAME: ID00000000 +SEQ_LEN: 100 +--- +SEQ: ACG-AC-GCTGGCGGCATGCTTA-CACATGCAAGTCGCACGAAC------------CTTTCGGG-------------GTT-GGTGGCGGACGGGTGAGTA +SEQ_NAME: ID00000001 +SEQ_LEN: 100 +--- +SEQ: ACG-AC-GTT-GCGATGCGTCTTAAGCATGCAAGTCGAGCGGGC--TTA----TTCGGGCAACTGGA----TA-A--GTTAG-CGGCGAACTGGTGAGTA +SEQ_NAME: ID00000002 +SEQ_LEN: 100 +--- +SEQ: ACGA-C-GCTGGCGGCAGGCCTAATACATGCAAGTCGAGCTGCA------------CCTTCGGG-------------TGAGC-TGGCGGACGGGTGAGTA +SEQ_NAME: ID00000003 +SEQ_LEN: 100 +--- +SEQ: ACGA-C-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGTGAGTA +SEQ_NAME: ID00000004 +SEQ_LEN: 100 +--- +SEQ: ACGA-CAGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGTGAGTA +SEQ_NAME: ID00000005 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAA-AA-------CACTTCGGTG-------T-GAGTAGAG-CGGCGGACGGGTGAGTA +SEQ_NAME: ID00000006 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCCTAATACATGCAAGTCGAGCGGTC------------CTTTCGGG-------------GGCAG-CGGCGGACGGGTGAGTA +SEQ_NAME: ID00000007 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAACG-ATGACTC----TCTAGCTTGCTAGA----GAAG--ATTAG-TGGCGGACGGGTGAGTA +SEQ_NAME: ID00000008 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAACGGGC-----------A-CTTCGG-T------------GCTAG-TGGCAGACGGGTGAGTA +SEQ_NAME: ID00000009 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------ATAGCAATAT------------GTCAG-CGGCAGACGGGTGAGTA +SEQ_NAME: ID00000010 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGTGAGTA +SEQ_NAME: ID00000011 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCATGCTTAACACATGCAAGTCGCACGGTC-------------AGCAAT--------------GGCAG-TGGCGGACGGGTGAGTA +SEQ_NAME: ID00000012 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCATGCTTAACACATGCAAGTCGCGCGGTC-------------AGCAAT--------------GGCAG-CGGCGGACGGGTGAGTA +SEQ_NAME: ID00000013 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCGCGCCTAACACATGCAAGTCGAACGAGC--GAG--A-GAGAGCTTGCTTTCT---CG-A--GCGAG-TGGCGAACGGGTGAGTA +SEQ_NAME: ID00000014 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACGGGTGAGTA +SEQ_NAME: ID00000015 +SEQ_LEN: 100 +--- +SEQ: ACGAAC-GCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACGGGTGAGTA +SEQ_NAME: ID00000016 +SEQ_LEN: 100 +--- +SEQ: ACGACC-GCT-GCGGCGTGCCTAACACATGCAAGTCCGACGTGA--AAG-----GGGAGCAATCCC----CCG-G--TAGGG-TGGCAAACGGGTGAGTA +SEQ_NAME: ID00000017 +SEQ_LEN: 100 +--- +SEQ: AT---C--AGGGCGGGATGCCTAACACATGCAAGTCGAACG-GCAGCACAG-GGAGAGCTTGCTCTC-TGGGT-G--GCGAG-TGGCGGACGGGTGAGGA +SEQ_NAME: ID00000018 +SEQ_LEN: 100 +--- +SEQ: AT--AC--CTGGCGG-AGGCCT-ACACATGCAAGTCGTACG-GT-AGAC----AGAAACTTGCTTCT----CT-T--GAGAT-CCGCGGACGGGTGAGTA +SEQ_NAME: ID00000019 +SEQ_LEN: 100 +--- diff --git a/bp_test/in/slice_align.in.2 b/bp_test/in/slice_align.in.2 new file mode 100644 index 0000000..8771a3d --- /dev/null +++ b/bp_test/in/slice_align.in.2 @@ -0,0 +1,2 @@ +>template +AT--AC-GCT-GCGG-AGGC-TAA-ACATGCA-GTCGGGCG-GAAACGA--T-GGTAGCTTGCTACCA--GGC-G--TCGAG-CGGCGGACGGGTGAGTA diff --git a/bp_test/out/slice_align.out.1 b/bp_test/out/slice_align.out.1 new file mode 100644 index 0000000..0aa62a6 --- /dev/null +++ b/bp_test/out/slice_align.out.1 @@ -0,0 +1,80 @@ +SEQ: AC-GCT +SEQ_NAME: ID00000000 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000001 +SEQ_LEN: 6 +--- +SEQ: AC-GTT +SEQ_NAME: ID00000002 +SEQ_LEN: 6 +--- +SEQ: -C-GCT +SEQ_NAME: ID00000003 +SEQ_LEN: 6 +--- +SEQ: -C-GCT +SEQ_NAME: ID00000004 +SEQ_LEN: 6 +--- +SEQ: -CAGCT +SEQ_NAME: ID00000005 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000006 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000007 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000008 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000009 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000010 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000011 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000012 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000013 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000014 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000015 +SEQ_LEN: 6 +--- +SEQ: AC-GCT +SEQ_NAME: ID00000016 +SEQ_LEN: 6 +--- +SEQ: CC-GCT +SEQ_NAME: ID00000017 +SEQ_LEN: 6 +--- +SEQ: -C--AG +SEQ_NAME: ID00000018 +SEQ_LEN: 6 +--- +SEQ: AC--CT +SEQ_NAME: ID00000019 +SEQ_LEN: 6 +--- diff --git a/bp_test/out/slice_align.out.2 b/bp_test/out/slice_align.out.2 new file mode 100644 index 0000000..08f4855 --- /dev/null +++ b/bp_test/out/slice_align.out.2 @@ -0,0 +1,80 @@ +SEQ: TGGCGGCATGCTTTACACATGCAAGTCGAACG-GCAGCGG-----GGGCTTCGGCCT----GCC-G--GCGAG-TGGCGAACGGGT +SEQ_NAME: ID00000000 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCATGCTTA-CACATGCAAGTCGCACGAAC------------CTTTCGGG-------------GTT-GGTGGCGGACGGGT +SEQ_NAME: ID00000001 +SEQ_LEN: 86 +--- +SEQ: T-GCGATGCGTCTTAAGCATGCAAGTCGAGCGGGC--TTA----TTCGGGCAACTGGA----TA-A--GTTAG-CGGCGAACTGGT +SEQ_NAME: ID00000002 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCCTAATACATGCAAGTCGAGCTGCA------------CCTTCGGG-------------TGAGC-TGGCGGACGGGT +SEQ_NAME: ID00000003 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGT +SEQ_NAME: ID00000004 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGT +SEQ_NAME: ID00000005 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAA-AA-------CACTTCGGTG-------T-GAGTAGAG-CGGCGGACGGGT +SEQ_NAME: ID00000006 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCCTAATACATGCAAGTCGAGCGGTC------------CTTTCGGG-------------GGCAG-CGGCGGACGGGT +SEQ_NAME: ID00000007 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAACG-ATGACTC----TCTAGCTTGCTAGA----GAAG--ATTAG-TGGCGGACGGGT +SEQ_NAME: ID00000008 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAACGGGC-----------A-CTTCGG-T------------GCTAG-TGGCAGACGGGT +SEQ_NAME: ID00000009 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------ATAGCAATAT------------GTCAG-CGGCAGACGGGT +SEQ_NAME: ID00000010 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGT +SEQ_NAME: ID00000011 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCATGCTTAACACATGCAAGTCGCACGGTC-------------AGCAAT--------------GGCAG-TGGCGGACGGGT +SEQ_NAME: ID00000012 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCATGCTTAACACATGCAAGTCGCGCGGTC-------------AGCAAT--------------GGCAG-CGGCGGACGGGT +SEQ_NAME: ID00000013 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCGCGCCTAACACATGCAAGTCGAACGAGC--GAG--A-GAGAGCTTGCTTTCT---CG-A--GCGAG-TGGCGAACGGGT +SEQ_NAME: ID00000014 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACGGGT +SEQ_NAME: ID00000015 +SEQ_LEN: 86 +--- +SEQ: TGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACGGGT +SEQ_NAME: ID00000016 +SEQ_LEN: 86 +--- +SEQ: T-GCGGCGTGCCTAACACATGCAAGTCCGACGTGA--AAG-----GGGAGCAATCCC----CCG-G--TAGGG-TGGCAAACGGGT +SEQ_NAME: ID00000017 +SEQ_LEN: 86 +--- +SEQ: GGGCGGGATGCCTAACACATGCAAGTCGAACG-GCAGCACAG-GGAGAGCTTGCTCTC-TGGGT-G--GCGAG-TGGCGGACGGGT +SEQ_NAME: ID00000018 +SEQ_LEN: 86 +--- +SEQ: TGGCGG-AGGCCT-ACACATGCAAGTCGTACG-GT-AGAC----AGAAACTTGCTTCT----CT-T--GAGAT-CCGCGGACGGGT +SEQ_NAME: ID00000019 +SEQ_LEN: 86 +--- diff --git a/bp_test/out/slice_align.out.3 b/bp_test/out/slice_align.out.3 new file mode 100644 index 0000000..10c9e86 --- /dev/null +++ b/bp_test/out/slice_align.out.3 @@ -0,0 +1,80 @@ +SEQ: A-TGAC-GCTGGCGGCATGCTTTACACATGCAAGTCGAACG-GCAGCGG-----GGGCTTCGGCCT----GCC-G--GCGAG-TGGCGAACG +SEQ_NAME: ID00000000 +SEQ_LEN: 92 +--- +SEQ: ACG-AC-GCTGGCGGCATGCTTA-CACATGCAAGTCGCACGAAC------------CTTTCGGG-------------GTT-GGTGGCGGACG +SEQ_NAME: ID00000001 +SEQ_LEN: 92 +--- +SEQ: ACG-AC-GTT-GCGATGCGTCTTAAGCATGCAAGTCGAGCGGGC--TTA----TTCGGGCAACTGGA----TA-A--GTTAG-CGGCGAACT +SEQ_NAME: ID00000002 +SEQ_LEN: 92 +--- +SEQ: ACGA-C-GCTGGCGGCAGGCCTAATACATGCAAGTCGAGCTGCA------------CCTTCGGG-------------TGAGC-TGGCGGACG +SEQ_NAME: ID00000003 +SEQ_LEN: 92 +--- +SEQ: ACGA-C-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACG +SEQ_NAME: ID00000004 +SEQ_LEN: 92 +--- +SEQ: ACGA-CAGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACG +SEQ_NAME: ID00000005 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAA-AA-------CACTTCGGTG-------T-GAGTAGAG-CGGCGGACG +SEQ_NAME: ID00000006 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCCTAATACATGCAAGTCGAGCGGTC------------CTTTCGGG-------------GGCAG-CGGCGGACG +SEQ_NAME: ID00000007 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAACG-ATGACTC----TCTAGCTTGCTAGA----GAAG--ATTAG-TGGCGGACG +SEQ_NAME: ID00000008 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAACGGGC-----------A-CTTCGG-T------------GCTAG-TGGCAGACG +SEQ_NAME: ID00000009 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------ATAGCAATAT------------GTCAG-CGGCAGACG +SEQ_NAME: ID00000010 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACG +SEQ_NAME: ID00000011 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCATGCTTAACACATGCAAGTCGCACGGTC-------------AGCAAT--------------GGCAG-TGGCGGACG +SEQ_NAME: ID00000012 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCATGCTTAACACATGCAAGTCGCGCGGTC-------------AGCAAT--------------GGCAG-CGGCGGACG +SEQ_NAME: ID00000013 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCGCGCCTAACACATGCAAGTCGAACGAGC--GAG--A-GAGAGCTTGCTTTCT---CG-A--GCGAG-TGGCGAACG +SEQ_NAME: ID00000014 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACG +SEQ_NAME: ID00000015 +SEQ_LEN: 92 +--- +SEQ: ACGAAC-GCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACG +SEQ_NAME: ID00000016 +SEQ_LEN: 92 +--- +SEQ: ACGACC-GCT-GCGGCGTGCCTAACACATGCAAGTCCGACGTGA--AAG-----GGGAGCAATCCC----CCG-G--TAGGG-TGGCAAACG +SEQ_NAME: ID00000017 +SEQ_LEN: 92 +--- +SEQ: AT---C--AGGGCGGGATGCCTAACACATGCAAGTCGAACG-GCAGCACAG-GGAGAGCTTGCTCTC-TGGGT-G--GCGAG-TGGCGGACG +SEQ_NAME: ID00000018 +SEQ_LEN: 92 +--- +SEQ: AT--AC--CTGGCGG-AGGCCT-ACACATGCAAGTCGTACG-GT-AGAC----AGAAACTTGCTTCT----CT-T--GAGAT-CCGCGGACG +SEQ_NAME: ID00000019 +SEQ_LEN: 92 +--- diff --git a/bp_test/test/test_slice_align b/bp_test/test/test_slice_align new file mode 100755 index 0000000..f9a7422 --- /dev/null +++ b/bp_test/test/test_slice_align @@ -0,0 +1,32 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -I $in.1 -b 5 -e 10 -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -I $in.1 -f TGGCGGCATG -r GCGAACGGGT -m 0 -i 0 -d 0 -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in.1 -f TGGCGGCATG -R ACCCGTTCGC -m 0 -i 0 -d 0 -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in.1 -f TGGgGGCATG -r GCGAtCGGGT -m 1 -i 0 -d 0 -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in.1 -f TGGCGcGCATG -r GCGAtACGGGT -m 0 -i 1 -d 0 -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in.1 -f TGGCGCATG -r GCGAAGGGT -m 0 -i 0 -d 1 -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in.1 -f ATACGCTGCGGAGGCTA -r TTGCTACCAGGCGTCGAGCGGCGGACGGG -t $in.2 -m 0 -d 0 -i 0 -O $tmp" +assert_no_diff $tmp $out.3 +clean + -- 2.39.2