-#!/usr/bin/env perl
+#!/usr/bin/env ruby
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2011 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# Trim sequence ends for residues with a low quality score.
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-use warnings;
-use strict;
-use Maasha::Biopieces;
-use Maasha::Fastq;
-
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+# This program is part of the Biopieces framework (www.biopieces.org).
-my ( $options, $in, $out, $record, $left, $right, $pos_l, $pos_r );
-
-$options = Maasha::Biopieces::parse_options(
- [
- { long => 'min', short => 'm', type => 'uint', mandatory => 'no', default => 15, allowed => undef, disallowed => 0 },
- { long => 'trim', short => 't', type => 'string', mandatory => 'no', default => 'both', allowed => 'left,right,both', disallowed => undef },
- ]
-);
-
-$right = 1 if $options->{ 'trim' } eq 'right';
-$left = 1 if $options->{ 'trim' } eq 'left';
-$right = 1 if $options->{ 'trim' } eq 'both';
-$left = 1 if $options->{ 'trim' } eq 'both';
-
-$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
-$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
-
-while ( $record = Maasha::Biopieces::get_record( $in ) )
-{
- if ( exists $record->{ 'SEQ' } and exists $record->{ 'SCORES' } )
- {
- if ( $right )
- {
- $pos_r = Maasha::Fastq::trim_right( $record->{ 'SCORES' }, $options->{ 'min' } );
-
- $record->{ 'SEQ' } = substr $record->{ 'SEQ' }, 0, $pos_r + 1;
- $record->{ 'SCORES' } = substr $record->{ 'SCORES' }, 0, $pos_r + 1;
- $record->{ 'TRIM_RIGHT' } = $pos_r;
- }
-
- if ( $left )
- {
- $pos_l = Maasha::Fastq::trim_left( $record->{ 'SCORES' }, $options->{ 'min' } );
-
- $record->{ 'SEQ' } = substr $record->{ 'SEQ' }, $pos_l;
- $record->{ 'SCORES' } = substr $record->{ 'SCORES' }, $pos_l;
- $record->{ 'TRIM_LEFT' } = $pos_l;
- }
-
- $record->{ 'SEQ_LEN' } = length $record->{ 'SEQ' };
- }
-
- Maasha::Biopieces::put_record( $record, $out );
-}
-
-Maasha::Biopieces::close_stream( $in );
-Maasha::Biopieces::close_stream( $out );
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+# Trim sequence ends for residues with a low quality score.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-BEGIN
-{
- Maasha::Biopieces::status_set();
-}
-
-
-END
-{
- Maasha::Biopieces::status_log();
-}
+require 'maasha/biopieces'
+require 'pp'
+
+casts = []
+casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>true, :default=>15, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'trim', :short=>'t', :type=>'string', :mandatory=>true, :default=>'both', :allowed=>'left,right,both', :disallowed=>nil}
+
+options = Biopieces.options_parse(ARGV, casts)
+
+ILLUMINA_BASE = 64
+
+regex_left = Regexp.new("^[#{(ILLUMINA_BASE).chr}-#{(ILLUMINA_BASE + options[:min]).chr}]+")
+regex_right = Regexp.new("[#{(ILLUMINA_BASE).chr}-#{(ILLUMINA_BASE + options[:min]).chr}]+$")
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record.has_key? :SEQ and record.has_key? :SCORES
+ case options[:trim]
+ when /both/
+ record[:SCORES].match(regex_right) do |m|
+ record[:SEQ] = record[:SEQ][0 ... record[:SEQ].length - m.to_s.length]
+ record[:SCORES] = $`
+ record[:SEQ_LEN] = record[:SEQ].length
+ record[:TRIM_LEFT] = m.to_s.length
+ end
+ record[:SCORES].match(regex_left) do |m|
+ record[:SEQ] = record[:SEQ][m.to_s.length ... record[:SEQ].length]
+ record[:SCORES] = $'
+ record[:SEQ_LEN] = record[:SEQ].length
+ record[:TRIM_RIGHT] = $'.length - 1
+ end
+ when /left/
+ record[:SCORES].match(regex_left) do |m|
+ record[:SEQ] = record[:SEQ][m.to_s.length ... record[:SEQ].length]
+ record[:SCORES] = $'
+ record[:SEQ_LEN] = record[:SEQ].length
+ record[:TRIM_LEFT] = m.to_s.length
+ end
+ when /right/
+ record[:SCORES].match(regex_right) do |m|
+ record[:SEQ] = record[:SEQ][0 ... record[:SEQ].length - m.to_s.length]
+ record[:SCORES] = $`
+ record[:SEQ_LEN] = record[:SEQ].length
+ record[:TRIM_RIGHT] = $`.length - 1
+ end
+ end
+ end
+
+ output.puts record
+ end
+end
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<