1 package Maasha::UCSC::PSL;
3 # Copyright (C) 2007-2008 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Routines for interaction with PSL entries and files.
27 # Read more about the PSL format here: http://genome.ucsc.edu/goldenPath/help/hgTracksHelp.html#PSL
30 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
40 use vars qw( @ISA @EXPORT_OK );
44 @ISA = qw( Exporter );
50 matches => 0, # PSL field names
71 MATCHES => 0, # Biopieces field names
95 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> PSL format <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
100 # Martin A. Hansen, August 2008.
102 # Reads PSL next entry from a PSL file and returns an array ref.
104 my ( $fh, # file handle of PSL filefull path to PSL file
109 my ( $line, @fields );
111 while ( $line = <$fh> )
113 next if $line !~ /^\d/;
117 @fields = split "\t", $line;
119 return wantarray ? @fields : \@fields; # 21 fields
128 # Martin A. Hansen, November 2008
130 # Converts PSL record keys to Biopiece record keys.
141 BLOCK_LENS => $psl->[ blockSizes ],
142 SNUMINSERT => $psl->[ tNumInsert ],
143 Q_END => $psl->[ qEnd ] - 1,
144 SBASEINSERT => $psl->[ tBaseInsert ],
145 S_END => $psl->[ tEnd ] - 1,
146 QBASEINSERT => $psl->[ qBaseInsert ],
147 REPMATCHES => $psl->[ repMatches ],
148 QNUMINSERT => $psl->[ qNumInsert ],
149 MISMATCHES => $psl->[ misMatches ],
150 BLOCK_COUNT => $psl->[ blockCount ],
151 Q_LEN => $psl->[ qSize ],
152 S_ID => $psl->[ tName ],
153 STRAND => $psl->[ strand ],
154 Q_ID => $psl->[ qName ],
155 MATCHES => $psl->[ matches ],
156 S_LEN => $psl->[ tSize ],
157 NCOUNT => $psl->[ nCount ],
158 Q_BEGS => $psl->[ qStarts ],
159 S_BEGS => $psl->[ tStarts ],
160 S_BEG => $psl->[ tStart ],
161 Q_BEG => $psl->[ qStart ],
164 $record{ "SCORE" } = $record{ "MATCHES" } + int( $record{ "REPMATCHES" } / 2 ) - $record{ "MISMATCHES" } - $record{ "QNUMINSERT" } - $record{ "SNUMINSERT" };
165 $record{ "SPAN" } = $record{ "Q_END" } - $record{ "Q_BEG" };
167 return wantarray ? %record : \%record;
173 # Martin A. Hansen, November 2008.
175 # Calculates the score for a PSL entry according to:
176 # http://genome.ucsc.edu/FAQ/FAQblat#blat4
178 my ( $psl_entry, # array ref
185 $score = $psl_entry->[ MATCHES ] +
186 int( $psl_entry->[ REPMATCHES ] / 2 ) -
187 $psl_entry->[ MISMATCHES ] -
188 $psl_entry->[ QNUMINSERT ] -
189 $psl_entry->[ SNUMINSERT ];
197 # Martin A. Hansen, September 2007.
199 # Write a PSL header to file.
201 my ( $fh, # file handle - OPTIONAL
206 $fh = \*STDOUT if not $fh;
208 print $fh qq(psLayout version 3
209 match mis- rep. N's Q gap Q gap T gap T gap strand Q Q Q Q T T T T block blockSizes qStart match match count bases count bases name size start end name size start end count
210 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
217 # Martin A. Hansen, September 2007.
219 # Write a PSL entry to file.
221 my ( $record, # hashref
222 $fh, # file handle - OPTIONAL
227 $fh = \*STDOUT if not $fh;
231 push @output, $record->{ "MATCHES" };
232 push @output, $record->{ "MISMATCHES" };
233 push @output, $record->{ "REPMATCHES" };
234 push @output, $record->{ "NCOUNT" };
235 push @output, $record->{ "QNUMINSERT" };
236 push @output, $record->{ "QBASEINSERT" };
237 push @output, $record->{ "SNUMINSERT" };
238 push @output, $record->{ "SBASEINSERT" };
239 push @output, $record->{ "STRAND" };
240 push @output, $record->{ "Q_ID" };
241 push @output, $record->{ "Q_LEN" };
242 push @output, $record->{ "Q_BEG" };
243 push @output, $record->{ "Q_END" } + 1;
244 push @output, $record->{ "S_ID" };
245 push @output, $record->{ "S_LEN" };
246 push @output, $record->{ "S_BEG" };
247 push @output, $record->{ "S_END" } + 1;
248 push @output, $record->{ "BLOCK_COUNT" };
249 push @output, $record->{ "BLOCK_LENS" };
250 push @output, $record->{ "Q_BEGS" };
251 push @output, $record->{ "S_BEGS" };
253 print $fh join( "\t", @output ), "\n";
257 sub psl_upload_to_ucsc
259 # Martin A. Hansen, September 2007.
261 # Upload a PSL file to the UCSC database.
263 my ( $file, # file to upload,
264 $options, # argument hashref
265 $append, # flag indicating table should be appended
273 $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", "-append", $file;
275 $args = join " ", $options->{ "database" }, "-table=$options->{ 'table' }", "-clientLoad", $file;
278 Maasha::Common::run( "hgLoadPsl", "$args > /dev/null 2>&1" );
282 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<