# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
require 'maasha/biopieces'
+require 'maasha/roche'
require 'maasha/bits'
require 'pp'
-# Hard coded Roche Genome Sequencing MIDs
-GSMID_HASH = {
- ACGAGTGCGT: "MID1",
- TCTCTATGCG: "MID10",
- TAGACTGCAC: "MID100",
- TAGCGCGCGC: "MID101",
- TAGCTCTATC: "MID102",
- TATAGACATC: "MID103",
- TATGATACGC: "MID104",
- TCACTCATAC: "MID105",
- TCATCGAGTC: "MID106",
- TCGAGCTCTC: "MID107",
- TCGCAGACAC: "MID108",
- TCTGTCTCGC: "MID109",
- TGATACGTCT: "MID11",
- TGAGTGACGC: "MID110",
- TGATGTGTAC: "MID111",
- TGCTATAGAC: "MID112",
- TGCTCGCTAC: "MID113",
- ACGTGCAGCG: "MID114",
- ACTCACAGAG: "MID115",
- AGACTCAGCG: "MID116",
- AGAGAGTGTG: "MID117",
- AGCTATCGCG: "MID118",
- AGTCTGACTG: "MID119",
- TACTGAGCTA: "MID12",
- AGTGAGCTCG: "MID120",
- ATAGCTCTCG: "MID121",
- ATCACGTGCG: "MID122",
- ATCGTAGCAG: "MID123",
- ATCGTCTGTG: "MID124",
- ATGTACGATG: "MID125",
- ATGTGTCTAG: "MID126",
- CACACGATAG: "MID127",
- CACTCGCACG: "MID128",
- CAGACGTCTG: "MID129",
- CATAGTAGTG: "MID13",
- CAGTACTGCG: "MID130",
- CGACAGCGAG: "MID131",
- CGATCTGTCG: "MID132",
- CGCGTGCTAG: "MID133",
- CGCTCGAGTG: "MID134",
- CGTGATGACG: "MID135",
- CTATGTACAG: "MID136",
- CTCGATATAG: "MID137",
- CTCGCACGCG: "MID138",
- CTGCGTCACG: "MID139",
- CGAGAGATAC: "MID14",
- CTGTGCGTCG: "MID140",
- TAGCATACTG: "MID141",
- TATACATGTG: "MID142",
- TATCACTCAG: "MID143",
- TATCTGATAG: "MID144",
- TCGTGACATG: "MID145",
- TCTGATCGAG: "MID146",
- TGACATCTCG: "MID147",
- TGAGCTAGAG: "MID148",
- TGATAGAGCG: "MID149",
- ATACGACGTA: "MID15",
- TGCGTGTGCG: "MID150",
- TGCTAGTCAG: "MID151",
- TGTATCACAG: "MID152",
- TGTGCGCGTG: "MID153",
- TCACGTACTA: "MID16",
- CGTCTAGTAC: "MID17",
- TCTACGTAGC: "MID18",
- TGTACTACTC: "MID19",
- ACGCTCGACA: "MID2",
- ACGACTACAG: "MID20",
- CGTAGACTAG: "MID21",
- TACGAGTATG: "MID22",
- TACTCTCGTG: "MID23",
- TAGAGACGAG: "MID24",
- TCGTCGCTCG: "MID25",
- ACATACGCGT: "MID26",
- ACGCGAGTAT: "MID27",
- ACTACTATGT: "MID28",
- ACTGTACAGT: "MID29",
- AGACGCACTC: "MID3",
- AGACTATACT: "MID30",
- AGCGTCGTCT: "MID31",
- AGTACGCTAT: "MID32",
- ATAGAGTACT: "MID33",
- CACGCTACGT: "MID34",
- CAGTAGACGT: "MID35",
- CGACGTGACT: "MID36",
- TACACACACT: "MID37",
- TACACGTGAT: "MID38",
- TACAGATCGT: "MID39",
- AGCACTGTAG: "MID4",
- TACGCTGTCT: "MID40",
- TAGTGTAGAT: "MID41",
- TCGATCACGT: "MID42",
- TCGCACTAGT: "MID43",
- TCTAGCGACT: "MID44",
- TCTATACTAT: "MID45",
- TGACGTATGT: "MID46",
- TGTGAGTAGT: "MID47",
- ACAGTATATA: "MID48",
- ACGCGATCGA: "MID49",
- ATCAGACACG: "MID5",
- ACTAGCAGTA: "MID50",
- AGCTCACGTA: "MID51",
- AGTATACATA: "MID52",
- AGTCGAGAGA: "MID53",
- AGTGCTACGA: "MID54",
- CGATCGTATA: "MID55",
- CGCAGTACGA: "MID56",
- CGCGTATACA: "MID57",
- CGTACAGTCA: "MID58",
- CGTACTCAGA: "MID59",
- ATATCGCGAG: "MID6",
- CTACGCTCTA: "MID60",
- CTATAGCGTA: "MID61",
- TACGTCATCA: "MID62",
- TAGTCGCATA: "MID63",
- TATATATACA: "MID64",
- TATGCTAGTA: "MID65",
- TCACGCGAGA: "MID66",
- TCGATAGTGA: "MID67",
- TCGCTGCGTA: "MID68",
- TCTGACGTCA: "MID69",
- CGTGTCTCTA: "MID7",
- TGAGTCAGTA: "MID70",
- TGTAGTGTGA: "MID71",
- TGTCACACGA: "MID72",
- TGTCGTCGCA: "MID73",
- ACACATACGC: "MID74",
- ACAGTCGTGC: "MID75",
- ACATGACGAC: "MID76",
- ACGACAGCTC: "MID77",
- ACGTCTCATC: "MID78",
- ACTCATCTAC: "MID79",
- CTCGCGTGTC: "MID8",
- ACTCGCGCAC: "MID80",
- AGAGCGTCAC: "MID81",
- AGCGACTAGC: "MID82",
- AGTAGTGATC: "MID83",
- AGTGACACAC: "MID84",
- AGTGTATGTC: "MID85",
- ATAGATAGAC: "MID86",
- ATATAGTCGC: "MID87",
- ATCTACTGAC: "MID88",
- CACGTAGATC: "MID89",
- TAGTATCAGC: "MID9",
- CACGTGTCGC: "MID90",
- CATACTCTAC: "MID91",
- CGACACTATC: "MID92",
- CGAGACGCGC: "MID93",
- CGTATGCGAC: "MID94",
- CGTCGATCTC: "MID95",
- CTACGACTGC: "MID96",
- CTAGTCACTC: "MID97",
- CTCTACGCTC: "MID98",
- CTGTACATAC: "MID99"
-}
-
-# Hard coded Roche Rapid Labrary MIDs
-RLMID_HASH = {
- ACACGACGACT: "RL1",
- ACACGTAGTAT: "RL2",
- ACACTACTCGT: "RL3",
- ACGACACGTAT: "RL4",
- ACGAGTAGACT: "RL5",
- ACGCGTCTAGT: "RL6",
- ACGTACACACT: "RL7",
- ACGTACTGTGT: "RL8",
- ACGTAGATCGT: "RL9",
- ACTACGTCTCT: "RL10",
- ACTATACGAGT: "RL11",
- ACTCGCGTCGT: "RL12",
- AGACTCGACGT: "RL13",
- AGTACGAGAGT: "RL14",
- AGTACTACTAT: "RL15",
- AGTAGACGTCT: "RL16",
- AGTCGTACACT: "RL17",
- AGTGTAGTAGT: "RL18",
- ATAGTATACGT: "RL19",
- CAGTACGTACT: "RL20",
- CGACGACGCGT: "RL21",
- CGACGAGTACT: "RL22",
- CGATACTACGT: "RL23",
- CGTACGTCGAT: "RL24",
- CTACTCGTAGT: "RL25",
- GTACAGTACGT: "RL26",
- GTCGTACGTAT: "RL27",
- GTGTACGACGT: "RL28",
- ACACAGTGAGT: "RL29",
- ACACTCATACT: "RL30",
- ACAGACAGCGT: "RL31",
- ACAGACTATAT: "RL32",
- ACAGAGACTCT: "RL33",
- ACAGCTCGTGT: "RL34",
- ACAGTGTCGAT: "RL35",
- ACGAGCGCGCT: "RL36",
- ACGATGAGTGT: "RL37",
- ACGCGAGAGAT: "RL38",
- ACGCTCTCTCT: "RL39",
- ACGTCGCTGAT: "RL40",
- ACGTCTAGCAT: "RL41",
- ACTAGTGATAT: "RL42",
- ACTCACACTGT: "RL43",
- ACTCACTAGCT: "RL44",
- ACTCTATATAT: "RL45",
- ACTGATCTCGT: "RL46",
- ACTGCTGTACT: "RL47",
- ACTGTAGCGCT: "RL48",
- AGACACTCACT: "RL49",
- AGACATATAGT: "RL50"
-}
-
class BarCodeFinderError < StandardError; end
class BarCodeFinder
bc_finder = BarCodeFinder.new(options[:pos], options[:mismatches])
bc_finder.parse_barcodes(options[:barcodes_in]) if options[:barcodes_in]
-bc_finder.load_barcodes(GSMID_HASH) if options[:gsmids]
-bc_finder.load_barcodes(RLMID_HASH) if options[:rlmids]
+bc_finder.load_barcodes(Roche::GSMID_HASH) if options[:gsmids]
+bc_finder.load_barcodes(Roche::RLMID_HASH) if options[:rlmids]
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|