36 files changed:
Fasta.open(infile, mode="w") do |fasta_output|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Fasta.open(infile, mode="w") do |fasta_output|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ
seq = Seq.new_bp(record)
total += record[:SEQ].length
seq = Seq.new_bp(record)
total += record[:SEQ].length
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ
seq = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SEQ_TYPE], record[:SCORE])
comp = seq.composition
seq = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SEQ_TYPE], record[:SCORE])
comp = seq.composition
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? options[:key].to_sym
+ if record[options[:key].to_sym]
record[(options[:key] + "_BIN").to_sym] = (record[options[:key].to_sym].to_i / options[:bin_size]) * options[:bin_size]
end
record[(options[:key] + "_BIN").to_sym] = (record[options[:key].to_sym].to_i / options[:bin_size]) * options[:bin_size]
end
input.each_record do |record|
output.puts record
input.each_record do |record|
output.puts record
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ if record[:SEQ_NAME] and record[:SEQ]
seq = Seq.new_bp(record)
unless got1
seq = Seq.new_bp(record)
unless got1
input.each_record do |record|
output.puts record
input.each_record do |record|
output.puts record
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ and record.has_key? :SCORES
+ if record[:SEQ_NAME] and record[:SEQ] and record[:SCORES]
entry = Seq.new_bp(record)
io_fq.puts entry.to_fastq
entry = Seq.new_bp(record)
io_fq.puts entry.to_fastq
input.each_record do |record|
output.puts record unless options[:no_stream]
input.each_record do |record|
output.puts record unless options[:no_stream]
- if record.has_key? :SEQ
total += record[:SEQ].length
lengths << record[:SEQ].length
end
total += record[:SEQ].length
lengths << record[:SEQ].length
end
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- seq << record[:SEQ] if record.has_key? :SEQ
+ seq << record[:SEQ] if record[:SEQ]
output.puts record unless options[:no_stream]
end
output.puts record unless options[:no_stream]
end
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ
trim_beg = 0
trim_end = record[:SEQ].length
trim_beg = 0
trim_end = record[:SEQ].length
record[:SEQ] = record[:SEQ][trim_beg ... trim_end]
record[:SEQ_LEN] = record[:SEQ].length
record[:SEQ] = record[:SEQ][trim_beg ... trim_end]
record[:SEQ_LEN] = record[:SEQ].length
- record[:SCORES] = record[:SCORES][trim_beg ... trim_end] if record.has_key? :SCORES
+ record[:SCORES] = record[:SCORES][trim_beg ... trim_end] if record[:SCORES]
Fasta.open(fasta_file, "w") do |fasta_io|
Fastq.open(fastq_file, "w") do |fastq_io|
input.each_record do |record|
Fasta.open(fasta_file, "w") do |fasta_io|
Fastq.open(fastq_file, "w") do |fastq_io|
input.each_record do |record|
- if record.has_key? :SEQ and record.has_key? :SCORES
+ if record[:SEQ] and record[:SCORES]
entry = Seq.new_bp(record)
entry.seq_name = seq_count.to_s
entry = Seq.new_bp(record)
entry.seq_name = seq_count.to_s
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ if record[:SEQ_NAME] and record[:SEQ]
seq = Seq.new_bp(record)
seq.each_digest(options[:pattern].to_s, options[:cut_pos]) do |digest|
seq = Seq.new_bp(record)
seq.each_digest(options[:pattern].to_s, options[:cut_pos]) do |digest|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
(0 ... record[key].to_i).each { output.puts record }
else
output.puts record
(0 ... record[key].to_i).each { output.puts record }
else
output.puts record
hamming_dist = 0
barcode = seq[@pos ... @pos + size].upcase.to_sym
hamming_dist = 0
barcode = seq[@pos ... @pos + size].upcase.to_sym
- if @barcode_hash.has_key? barcode
+ if @barcode_hash[barcode]
return BarCode.new(barcode, @barcode_hash[barcode], @pos, size, hamming_dist)
elsif @max_mismatches > 0
@barcode_hash.each_key do |key|
return BarCode.new(barcode, @barcode_hash[barcode], @pos, size, hamming_dist)
elsif @max_mismatches > 0
@barcode_hash.each_key do |key|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ
if barcode = bc_finder.find_barcode(record[:SEQ])
record.merge!(barcode.to_hash)
if barcode = bc_finder.find_barcode(record[:SEQ])
record.merge!(barcode.to_hash)
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ
seq = Seq.new(nil, record[:SEQ])
record[:HOMOPOL_MAX] = seq.homopol_max(options[:min])
seq = Seq.new(nil, record[:SEQ])
record[:HOMOPOL_MAX] = seq.homopol_max(options[:min])
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ
unless seq.seq_name
seq.seq_name = record[:SEQ_NAME]
end
unless seq.seq_name
seq.seq_name = record[:SEQ_NAME]
end
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- record[:SEQ_LEN] = record[:SEQ].length if record.has_key? :SEQ
+ record[:SEQ_LEN] = record[:SEQ].length if record[:SEQ]
output.puts record
end
end
output.puts record
end
end
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ
entry = Seq.new_bp(record)
options[:hardmask] ? entry.mask_seq_hard!(options[:cutoff]) : entry.mask_seq_soft!(options[:cutoff])
entry = Seq.new_bp(record)
options[:hardmask] ? entry.mask_seq_hard!(options[:cutoff]) : entry.mask_seq_soft!(options[:cutoff])
Biopieces.open(options[:stream_in], tmp_file) do |input, output|
Fasta.open(in_file, mode="w") do |fasta_io|
input.each_record do |record|
Biopieces.open(options[:stream_in], tmp_file) do |input, output|
Fasta.open(in_file, mode="w") do |fasta_io|
input.each_record do |record|
- if record.has_key? :SEQ_NAME
seq_name_hash[seq_name_count] = record[:SEQ_NAME]
record[:SEQ_NAME] = seq_name_count
seq_name_count += 1
seq_name_hash[seq_name_count] = record[:SEQ_NAME]
record[:SEQ_NAME] = seq_name_count
seq_name_count += 1
Biopieces.open(tmp_file, options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(tmp_file, options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ_NAME
key = record[:SEQ_NAME].to_i
record[:SEQ_NAME] = seq_name_hash[key]
if options[:inline]
key = record[:SEQ_NAME].to_i
record[:SEQ_NAME] = seq_name_hash[key]
if options[:inline]
- if results.has_key? key
results[key].each do |result|
record[:PATTERN] = options[:pattern]
record[:MATCH] = result.match
results[key].each do |result|
record[:PATTERN] = options[:pattern]
record[:MATCH] = result.match
input.each_record do |record|
output.puts record
input.each_record do |record|
output.puts record
- if record.has_key? :SEQ
entry = Seq.new_bp(record)
ios.puts entry.to_fasta
end
entry = Seq.new_bp(record)
ios.puts entry.to_fasta
end
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
count_hash[record[key].to_i] += 1
end
count_hash[record[key].to_i] += 1
end
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_with_index do |record, count|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_with_index do |record, count|
- output.puts record unless options.has_key? :no_stream
+ output.puts record unless options[:no_stream]
$stderr.printf "\n% 9d ", count if (count % (options[:count] * 100)) == 0
$stderr.print "." if (count % options[:count]) == 0
end
$stderr.printf "\n% 9d ", count if (count % (options[:count] * 100)) == 0
$stderr.print "." if (count % options[:count]) == 0
end
- if options.has_key? :data_in
options[:data_in].each do |file|
EMBL.open(file, mode='r') do |embl_io|
embl_io.each(hash_keys, hash_feats, hash_quals) do |entry|
options[:data_in].each do |file|
EMBL.open(file, mode='r') do |embl_io|
embl_io.each(hash_keys, hash_feats, hash_quals) do |entry|
- if options.has_key? :num and options[:num] == num
+ if options[:num] and options[:num] == num
- if options.has_key? :data_in
options[:data_in].each do |file|
Fasta.open(file, mode='r') do |fasta|
fasta.each do |entry|
output.puts entry.to_bp
num += 1
options[:data_in].each do |file|
Fasta.open(file, mode='r') do |fasta|
fasta.each do |entry|
output.puts entry.to_bp
num += 1
- if options.has_key? :num and options[:num] == num
+ if options[:num] and options[:num] == num
- if options.has_key? :data_in
options[:data_in].each do |file|
Genbank.open(file, mode='r') do |gb|
gb.each(hash_keys, hash_feats, hash_quals) do |entry|
options[:data_in].each do |file|
Genbank.open(file, mode='r') do |gb|
gb.each(hash_keys, hash_feats, hash_quals) do |entry|
- if options.has_key? :num and options[:num] == num
+ if options[:num] and options[:num] == num
- if options.has_key? :data_in
options[:data_in].each do |file|
SFF.open(file, mode='r') do |sff|
sff.each do |entry|
options[:data_in].each do |file|
SFF.open(file, mode='r') do |sff|
sff.each do |entry|
output.puts entry.to_bp
num += 1
output.puts entry.to_bp
num += 1
- if options.has_key? :num and options[:num] == num
+ if options[:num] and options[:num] == num
Biopieces.open(options[:stream_in], file_records) do |input, output|
input.each do |record|
Biopieces.open(options[:stream_in], file_records) do |input, output|
input.each do |record|
- if record.has_key? :SEQ
na_mask = NArray.int(record[:SEQ].length) unless na_mask
na_seq = NArray.to_na(record[:SEQ], "byte")
na_mask += na_seq.eq('-'.ord)
na_mask = NArray.int(record[:SEQ].length) unless na_mask
na_seq = NArray.to_na(record[:SEQ], "byte")
na_mask += na_seq.eq('-'.ord)
Biopieces.open(file_records, options[:stream_out]) do |input, output|
input.each do |record|
Biopieces.open(file_records, options[:stream_out]) do |input, output|
input.each do |record|
- if sum > 0 and record.has_key? :SEQ
+ if sum > 0 and record[:SEQ]
na_seq = NArray.to_na(record[:SEQ], "byte")
record[:SEQ] = na_seq[na_mask].to_s
record[:SEQ_LEN] = record[:SEQ].length
na_seq = NArray.to_na(record[:SEQ], "byte")
record[:SEQ] = na_seq[na_mask].to_s
record[:SEQ_LEN] = record[:SEQ].length
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each do |record|
- if record.has_key? :SEQ
forward = false
reverse = false
seq = Seq.new_bp(record)
forward = false
reverse = false
seq = Seq.new_bp(record)
k = fields[options[:search_col] - 1]
v = fields[options[:replace_col] - 1]
k = fields[options[:search_col] - 1]
v = fields[options[:replace_col] - 1]
- raise RuntimeError, "duplicate key: #{k} found in file" if replace_hash.has_key? k
+ raise RuntimeError, "duplicate key: #{k} found in file" if replace_hash[k]
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? key
- if replace_hash.has_key? record[key]
+ if record[key]
+ if replace_hash[record[key]]
record[key] = replace_hash[record[key]]
end
end
record[key] = replace_hash[record[key]]
end
end
# Method to convert scores from ASCII encode string to
# a semi-colon seperated string of decimal values.
def scores2dec!
# Method to convert scores from ASCII encode string to
# a semi-colon seperated string of decimal values.
def scores2dec!
- if self.has_key? :SCORES
self[:SCORES].gsub! /./ do |score|
score = (score.ord - Seq::SCORE_BASE).to_s + ";"
end
self[:SCORES].gsub! /./ do |score|
score = (score.ord - Seq::SCORE_BASE).to_s + ";"
end
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ and record[:SEQ].length >= options[:size]
+ if record[:SEQ] and record[:SEQ].length >= options[:size]
entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
entry.type = 'dna'
entry = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SCORES])
entry.type = 'dna'
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- record[:SEQ].swapcase! if record.has_key? :SEQ
+ record[:SEQ].swapcase! if record[:SEQ]
output.puts record
end
end
output.puts record
end
end
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ and record.has_key? :SCORES
+ if record[:SEQ] and record[:SCORES]
entry = Seq.new_bp(record)
case options[:trim]
entry = Seq.new_bp(record)
case options[:trim]
input.each_record do |record|
output.puts record
input.each_record do |record|
output.puts record
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ if record[:SEQ_NAME] and record[:SEQ]
fasta_io.puts Seq.new_bp(record).to_fasta
end
end
fasta_io.puts Seq.new_bp(record).to_fasta
end
end
Biopieces.open(file_records, options[:stream_out]) do |input, output|
input.each_record do |record|
Biopieces.open(file_records, options[:stream_out]) do |input, output|
input.each_record do |record|
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ
- if hash.has_key? record[:SEQ_NAME].to_sym
+ if record[:SEQ_NAME] and record[:SEQ]
+ if hash[record[:SEQ_NAME].to_sym]
us = hash[record[:SEQ_NAME].to_sym]
record[:CLUSTER] = us[:CLUSTER].to_i
record[:IDENT] = us[:IDENT].to_i
us = hash[record[:SEQ_NAME].to_sym]
record[:CLUSTER] = us[:CLUSTER].to_i
record[:IDENT] = us[:IDENT].to_i
input.each_record do |record|
output.puts record
input.each_record do |record|
output.puts record
- if record.has_key? :SEQ_NAME and record.has_key? :SEQ
+ if record[:SEQ_NAME] and record[:SEQ]
fasta_io.puts Seq.new_bp(record).to_fasta
end
end
fasta_io.puts Seq.new_bp(record).to_fasta
end
end
if record[:SEQ_NAME] and record[:SEQ] and record[key]
seq = Seq.new_bp(record)
if record[:SEQ_NAME] and record[:SEQ] and record[key]
seq = Seq.new_bp(record)
- if fh_hash.has_key? record[key].to_sym
+ if fh_hash[record[key].to_sym]
fasta_io = fh_hash[record[key].to_sym]
else
fasta_file = File.join(options[:dir], record[key] + ".fasta")
fasta_io = fh_hash[record[key].to_sym]
else
fasta_file = File.join(options[:dir], record[key] + ".fasta")
io_out.puts entry.to_fastq
end
io_out.puts entry.to_fastq
end
- output.puts record unless options.has_key? :no_stream
+ output.puts record unless options[:no_stream]
if record[:SEQ_NAME] and record[:SEQ] and record[:SCORES] and record[key]
seq = Seq.new_bp(record)
if record[:SEQ_NAME] and record[:SEQ] and record[:SCORES] and record[key]
seq = Seq.new_bp(record)
- if fh_hash.has_key? record[key].to_sym
+ if fh_hash[record[key].to_sym]
fastq_io = fh_hash[record[key].to_sym]
else
fastq_file = File.join(options[:dir], record[key] + ".fastq")
fastq_io = fh_hash[record[key].to_sym]
else
fastq_file = File.join(options[:dir], record[key] + ".fastq")