]> git.donarmstrong.com Git - biopieces.git/commitdiff
added tests for slice_align
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 2 Dec 2013 19:45:39 +0000 (19:45 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 2 Dec 2013 19:45:39 +0000 (19:45 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@2266 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/slice_align
bp_test/in/slice_align.in.1 [new file with mode: 0644]
bp_test/in/slice_align.in.2 [new file with mode: 0644]
bp_test/out/slice_align.out.1 [new file with mode: 0644]
bp_test/out/slice_align.out.2 [new file with mode: 0644]
bp_test/out/slice_align.out.3 [new file with mode: 0644]
bp_test/test/test_slice_align [new file with mode: 0755]

index 586c2701570dfbd46d546a3cdf0e3bc012f731e5..f63ac7e203c2e41c40c15d294fd11798e760b5d5 100755 (executable)
@@ -24,7 +24,7 @@
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
-# Join sequences in the stream.
+# Slice aligned sequences in the stream to obtain subsequences.
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
@@ -60,13 +60,17 @@ else
   raise "either --beg/--end or --forward/--reverse|--reverse_rc must be specified"
 end
 
+if options[:template_file]
+  template = Fasta.open(options[:template_file]).get_entry
+end
+
 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
   input.each_record do |record|
     if record[:SEQ]
       entry = Seq.new(seq: record[:SEQ])
 
       unless options[:beg]
-        compact = Seq.new(seq: entry.seq.dup)
+        compact = template ? template : Seq.new(seq: entry.seq.dup)
         compact.seq.delete! "-.~"
 
         fmatch = compact.patmatch(options[:forward],
diff --git a/bp_test/in/slice_align.in.1 b/bp_test/in/slice_align.in.1
new file mode 100644 (file)
index 0000000..1aa1a32
--- /dev/null
@@ -0,0 +1,80 @@
+SEQ: A-TGAC-GCTGGCGGCATGCTTTACACATGCAAGTCGAACG-GCAGCGG-----GGGCTTCGGCCT----GCC-G--GCGAG-TGGCGAACGGGTGAGTA
+SEQ_NAME: ID00000000
+SEQ_LEN: 100
+---
+SEQ: ACG-AC-GCTGGCGGCATGCTTA-CACATGCAAGTCGCACGAAC------------CTTTCGGG-------------GTT-GGTGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000001
+SEQ_LEN: 100
+---
+SEQ: ACG-AC-GTT-GCGATGCGTCTTAAGCATGCAAGTCGAGCGGGC--TTA----TTCGGGCAACTGGA----TA-A--GTTAG-CGGCGAACTGGTGAGTA
+SEQ_NAME: ID00000002
+SEQ_LEN: 100
+---
+SEQ: ACGA-C-GCTGGCGGCAGGCCTAATACATGCAAGTCGAGCTGCA------------CCTTCGGG-------------TGAGC-TGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000003
+SEQ_LEN: 100
+---
+SEQ: ACGA-C-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGTGAGTA
+SEQ_NAME: ID00000004
+SEQ_LEN: 100
+---
+SEQ: ACGA-CAGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGTGAGTA
+SEQ_NAME: ID00000005
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAA-AA-------CACTTCGGTG-------T-GAGTAGAG-CGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000006
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCCTAATACATGCAAGTCGAGCGGTC------------CTTTCGGG-------------GGCAG-CGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000007
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAACG-ATGACTC----TCTAGCTTGCTAGA----GAAG--ATTAG-TGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000008
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAACGGGC-----------A-CTTCGG-T------------GCTAG-TGGCAGACGGGTGAGTA
+SEQ_NAME: ID00000009
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------ATAGCAATAT------------GTCAG-CGGCAGACGGGTGAGTA
+SEQ_NAME: ID00000010
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGTGAGTA
+SEQ_NAME: ID00000011
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCATGCTTAACACATGCAAGTCGCACGGTC-------------AGCAAT--------------GGCAG-TGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000012
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCATGCTTAACACATGCAAGTCGCGCGGTC-------------AGCAAT--------------GGCAG-CGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000013
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCGCGCCTAACACATGCAAGTCGAACGAGC--GAG--A-GAGAGCTTGCTTTCT---CG-A--GCGAG-TGGCGAACGGGTGAGTA
+SEQ_NAME: ID00000014
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000015
+SEQ_LEN: 100
+---
+SEQ: ACGAAC-GCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACGGGTGAGTA
+SEQ_NAME: ID00000016
+SEQ_LEN: 100
+---
+SEQ: ACGACC-GCT-GCGGCGTGCCTAACACATGCAAGTCCGACGTGA--AAG-----GGGAGCAATCCC----CCG-G--TAGGG-TGGCAAACGGGTGAGTA
+SEQ_NAME: ID00000017
+SEQ_LEN: 100
+---
+SEQ: AT---C--AGGGCGGGATGCCTAACACATGCAAGTCGAACG-GCAGCACAG-GGAGAGCTTGCTCTC-TGGGT-G--GCGAG-TGGCGGACGGGTGAGGA
+SEQ_NAME: ID00000018
+SEQ_LEN: 100
+---
+SEQ: AT--AC--CTGGCGG-AGGCCT-ACACATGCAAGTCGTACG-GT-AGAC----AGAAACTTGCTTCT----CT-T--GAGAT-CCGCGGACGGGTGAGTA
+SEQ_NAME: ID00000019
+SEQ_LEN: 100
+---
diff --git a/bp_test/in/slice_align.in.2 b/bp_test/in/slice_align.in.2
new file mode 100644 (file)
index 0000000..8771a3d
--- /dev/null
@@ -0,0 +1,2 @@
+>template
+AT--AC-GCT-GCGG-AGGC-TAA-ACATGCA-GTCGGGCG-GAAACGA--T-GGTAGCTTGCTACCA--GGC-G--TCGAG-CGGCGGACGGGTGAGTA
diff --git a/bp_test/out/slice_align.out.1 b/bp_test/out/slice_align.out.1
new file mode 100644 (file)
index 0000000..0aa62a6
--- /dev/null
@@ -0,0 +1,80 @@
+SEQ: AC-GCT
+SEQ_NAME: ID00000000
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000001
+SEQ_LEN: 6
+---
+SEQ: AC-GTT
+SEQ_NAME: ID00000002
+SEQ_LEN: 6
+---
+SEQ: -C-GCT
+SEQ_NAME: ID00000003
+SEQ_LEN: 6
+---
+SEQ: -C-GCT
+SEQ_NAME: ID00000004
+SEQ_LEN: 6
+---
+SEQ: -CAGCT
+SEQ_NAME: ID00000005
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000006
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000007
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000008
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000009
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000010
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000011
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000012
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000013
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000014
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000015
+SEQ_LEN: 6
+---
+SEQ: AC-GCT
+SEQ_NAME: ID00000016
+SEQ_LEN: 6
+---
+SEQ: CC-GCT
+SEQ_NAME: ID00000017
+SEQ_LEN: 6
+---
+SEQ: -C--AG
+SEQ_NAME: ID00000018
+SEQ_LEN: 6
+---
+SEQ: AC--CT
+SEQ_NAME: ID00000019
+SEQ_LEN: 6
+---
diff --git a/bp_test/out/slice_align.out.2 b/bp_test/out/slice_align.out.2
new file mode 100644 (file)
index 0000000..08f4855
--- /dev/null
@@ -0,0 +1,80 @@
+SEQ: TGGCGGCATGCTTTACACATGCAAGTCGAACG-GCAGCGG-----GGGCTTCGGCCT----GCC-G--GCGAG-TGGCGAACGGGT
+SEQ_NAME: ID00000000
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCATGCTTA-CACATGCAAGTCGCACGAAC------------CTTTCGGG-------------GTT-GGTGGCGGACGGGT
+SEQ_NAME: ID00000001
+SEQ_LEN: 86
+---
+SEQ: T-GCGATGCGTCTTAAGCATGCAAGTCGAGCGGGC--TTA----TTCGGGCAACTGGA----TA-A--GTTAG-CGGCGAACTGGT
+SEQ_NAME: ID00000002
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCCTAATACATGCAAGTCGAGCTGCA------------CCTTCGGG-------------TGAGC-TGGCGGACGGGT
+SEQ_NAME: ID00000003
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGT
+SEQ_NAME: ID00000004
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGT
+SEQ_NAME: ID00000005
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAA-AA-------CACTTCGGTG-------T-GAGTAGAG-CGGCGGACGGGT
+SEQ_NAME: ID00000006
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCCTAATACATGCAAGTCGAGCGGTC------------CTTTCGGG-------------GGCAG-CGGCGGACGGGT
+SEQ_NAME: ID00000007
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAACG-ATGACTC----TCTAGCTTGCTAGA----GAAG--ATTAG-TGGCGGACGGGT
+SEQ_NAME: ID00000008
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAACGGGC-----------A-CTTCGG-T------------GCTAG-TGGCAGACGGGT
+SEQ_NAME: ID00000009
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------ATAGCAATAT------------GTCAG-CGGCAGACGGGT
+SEQ_NAME: ID00000010
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACGGGT
+SEQ_NAME: ID00000011
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCATGCTTAACACATGCAAGTCGCACGGTC-------------AGCAAT--------------GGCAG-TGGCGGACGGGT
+SEQ_NAME: ID00000012
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCATGCTTAACACATGCAAGTCGCGCGGTC-------------AGCAAT--------------GGCAG-CGGCGGACGGGT
+SEQ_NAME: ID00000013
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCGCGCCTAACACATGCAAGTCGAACGAGC--GAG--A-GAGAGCTTGCTTTCT---CG-A--GCGAG-TGGCGAACGGGT
+SEQ_NAME: ID00000014
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACGGGT
+SEQ_NAME: ID00000015
+SEQ_LEN: 86
+---
+SEQ: TGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACGGGT
+SEQ_NAME: ID00000016
+SEQ_LEN: 86
+---
+SEQ: T-GCGGCGTGCCTAACACATGCAAGTCCGACGTGA--AAG-----GGGAGCAATCCC----CCG-G--TAGGG-TGGCAAACGGGT
+SEQ_NAME: ID00000017
+SEQ_LEN: 86
+---
+SEQ: GGGCGGGATGCCTAACACATGCAAGTCGAACG-GCAGCACAG-GGAGAGCTTGCTCTC-TGGGT-G--GCGAG-TGGCGGACGGGT
+SEQ_NAME: ID00000018
+SEQ_LEN: 86
+---
+SEQ: TGGCGG-AGGCCT-ACACATGCAAGTCGTACG-GT-AGAC----AGAAACTTGCTTCT----CT-T--GAGAT-CCGCGGACGGGT
+SEQ_NAME: ID00000019
+SEQ_LEN: 86
+---
diff --git a/bp_test/out/slice_align.out.3 b/bp_test/out/slice_align.out.3
new file mode 100644 (file)
index 0000000..10c9e86
--- /dev/null
@@ -0,0 +1,80 @@
+SEQ: A-TGAC-GCTGGCGGCATGCTTTACACATGCAAGTCGAACG-GCAGCGG-----GGGCTTCGGCCT----GCC-G--GCGAG-TGGCGAACG
+SEQ_NAME: ID00000000
+SEQ_LEN: 92
+---
+SEQ: ACG-AC-GCTGGCGGCATGCTTA-CACATGCAAGTCGCACGAAC------------CTTTCGGG-------------GTT-GGTGGCGGACG
+SEQ_NAME: ID00000001
+SEQ_LEN: 92
+---
+SEQ: ACG-AC-GTT-GCGATGCGTCTTAAGCATGCAAGTCGAGCGGGC--TTA----TTCGGGCAACTGGA----TA-A--GTTAG-CGGCGAACT
+SEQ_NAME: ID00000002
+SEQ_LEN: 92
+---
+SEQ: ACGA-C-GCTGGCGGCAGGCCTAATACATGCAAGTCGAGCTGCA------------CCTTCGGG-------------TGAGC-TGGCGGACG
+SEQ_NAME: ID00000003
+SEQ_LEN: 92
+---
+SEQ: ACGA-C-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACG
+SEQ_NAME: ID00000004
+SEQ_LEN: 92
+---
+SEQ: ACGA-CAGCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACG
+SEQ_NAME: ID00000005
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCCTAACACATGCAAGTCGAGCGCGAA-AA-------CACTTCGGTG-------T-GAGTAGAG-CGGCGGACG
+SEQ_NAME: ID00000006
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCCTAATACATGCAAGTCGAGCGGTC------------CTTTCGGG-------------GGCAG-CGGCGGACG
+SEQ_NAME: ID00000007
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAACG-ATGACTC----TCTAGCTTGCTAGA----GAAG--ATTAG-TGGCGGACG
+SEQ_NAME: ID00000008
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAACGGGC-----------A-CTTCGG-T------------GCTAG-TGGCAGACG
+SEQ_NAME: ID00000009
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------ATAGCAATAT------------GTCAG-CGGCAGACG
+SEQ_NAME: ID00000010
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCAGGCTTAACACATGCAAGTCGAGCGGGC-----------GTAGCAATAC------------GTCAG-CGGCAGACG
+SEQ_NAME: ID00000011
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCATGCTTAACACATGCAAGTCGCACGGTC-------------AGCAAT--------------GGCAG-TGGCGGACG
+SEQ_NAME: ID00000012
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCATGCTTAACACATGCAAGTCGCGCGGTC-------------AGCAAT--------------GGCAG-CGGCGGACG
+SEQ_NAME: ID00000013
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCGCGCCTAACACATGCAAGTCGAACGAGC--GAG--A-GAGAGCTTGCTTTCT---CG-A--GCGAG-TGGCGAACG
+SEQ_NAME: ID00000014
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACG
+SEQ_NAME: ID00000015
+SEQ_LEN: 92
+---
+SEQ: ACGAAC-GCTGGCGGCGTGCCTAATACATGCAAGTCGAGCGGAC--CGA--C-GGGAGCTTGCTCCCT---TA-G--GTCAG-CGGCGGACG
+SEQ_NAME: ID00000016
+SEQ_LEN: 92
+---
+SEQ: ACGACC-GCT-GCGGCGTGCCTAACACATGCAAGTCCGACGTGA--AAG-----GGGAGCAATCCC----CCG-G--TAGGG-TGGCAAACG
+SEQ_NAME: ID00000017
+SEQ_LEN: 92
+---
+SEQ: AT---C--AGGGCGGGATGCCTAACACATGCAAGTCGAACG-GCAGCACAG-GGAGAGCTTGCTCTC-TGGGT-G--GCGAG-TGGCGGACG
+SEQ_NAME: ID00000018
+SEQ_LEN: 92
+---
+SEQ: AT--AC--CTGGCGG-AGGCCT-ACACATGCAAGTCGTACG-GT-AGAC----AGAAACTTGCTTCT----CT-T--GAGAT-CCGCGGACG
+SEQ_NAME: ID00000019
+SEQ_LEN: 92
+---
diff --git a/bp_test/test/test_slice_align b/bp_test/test/test_slice_align
new file mode 100755 (executable)
index 0000000..f9a7422
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -I $in.1 -b 5 -e 10 -O $tmp"
+assert_no_diff $tmp $out.1
+clean
+
+run "$bp -I $in.1 -f TGGCGGCATG -r GCGAACGGGT -m 0 -i 0 -d 0 -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in.1 -f TGGCGGCATG -R ACCCGTTCGC -m 0 -i 0 -d 0 -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in.1 -f TGGgGGCATG -r GCGAtCGGGT -m 1 -i 0 -d 0 -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in.1 -f TGGCGcGCATG -r GCGAtACGGGT -m 0 -i 1 -d 0 -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in.1 -f TGGCGCATG -r GCGAAGGGT -m 0 -i 0 -d 1 -O $tmp"
+assert_no_diff $tmp $out.2
+clean
+
+run "$bp -I $in.1 -f ATACGCTGCGGAGGCTA -r TTGCTACCAGGCGTCGAGCGGCGGACGGG -t $in.2 -m 0 -d 0 -i 0 -O $tmp"
+assert_no_diff $tmp $out.3
+clean
+