# name { body } or name { or name { body
# See rules in perltidy's perldoc page
# Section: Other Controls - Creating a new abbreviation
- if ( $line =~ /^((\w+)\s*\{)(.*)?$/ ) {
- ( $name, $body ) = ( $2, $3 );
+ if ( $line =~ /^(?: (\w+) \s* \{ ) (.*)? $/x ) {
+ ( $name, $body ) = ( $1, $2 );
# Cannot start new abbreviation unless old abbreviation is complete
last if ($opening_brace_line);
}
}
else {
- # it is rare to arrive here (identifier with spaces)
+ # Could be something like '* STDERR' or '$ debug'
}
}
}
# safety (the tokenizer should have done this).
# To avoid trimming qw quotes use -ntqw; this causes the
# tokenizer to set them as type 'Q' instead of 'q'.
- $token =~ s/^ \s+ | \s+ $//x;
+ $token =~ s/^ \s+ | \s+ $//gx;
$rtoken_vars->[_TOKEN_] = $token;
if ( $self->[_save_logfile_] && $token =~ /\t/ ) {
$self->note_embedded_tab($input_line_number);
# slightly modifying an existing token.
my ( $rold_token, $type, $token ) = @_;
if ( !defined($token) ) {
- if ( $type eq 'b' ) {
- $token = SPACE;
- }
- elsif ( $type eq 'q' ) {
- $token = EMPTY_STRING;
- }
- elsif ( $type eq '->' ) {
- $token = '->';
- }
- elsif ( $type eq ';' ) {
- $token = ';';
- }
- elsif ( $type eq ',' ) {
- $token = ',';
- }
- else {
+
+ $token =
+ $type eq 'b' ? SPACE
+ : $type eq 'q' ? EMPTY_STRING
+ : $type eq '->' ? $type
+ : $type eq ';' ? $type
+ : $type eq ',' ? $type
+ : undef;
+
+ if ( !defined($token) ) {
+
+ $token = $type;
# Unexpected type ... this sub will work as long as both $token and
# $type are defined, but we should catch any unexpected types during
EOM
}
- # Shouldn't get here
- $token = $type;
}
}
}
}
- # it is a ternary - no special processing for these yet
+ # it is a ternary or input file is unbalanced
else {
}
#####################################################################
#
-# The Perl::Tidy::Tokenizer package is essentially a filter which
-# reads lines of perl source code from a source object and provides
-# corresponding tokenized lines through its get_line() method. Lines
-# flow from the source_object to the caller like this:
+# Perl::Tidy::Tokenizer reads a source and breaks it into a stream of tokens
#
-# source_object --> Tokenizer --> calling routine
-# get_line() get_line() line_of_tokens
+# Usage:
+#
+# STEP 1: initialize or re-initialze Tokenizer with user options
+# Perl::Tidy::Tokenizer::check_options($rOpts);
+#
+# STEP 2: create a tokenizer for a specific input source object
+# my $tokenizer = Perl::Tidy::Tokenizer->new(
+# source_object => $source,
+# ...
+# );
+#
+# STEP 3: get and process each tokenized 'line' (a hash ref of token info)
+# while ( my $line = $tokenizer->get_line() ) {
+# $formatter->write_line($line);
+# }
+#
+# STEP 4: report errors
+# my $severe_error = $tokenizer->report_tokenization_errors();
#
# The source object can be a STRING ref, an ARRAY ref, or an object with a
# get_line() method which supplies one line (a character string) perl call.
-# The Tokenizer returns a reference to a data structure 'line_of_tokens'
-# containing one tokenized line for each call to its get_line() method.
#
# NOTE: This is not a real class. Only one tokenizer my be used.
#
# handle an object - must have a get_line method
else {
+
+ # This will die if user's object does have a 'get_line' method
while ( my $line = $line_source_object->get_line() ) {
push( @{$rinput_lines}, $line );
}
# check for error of extra whitespace
# note for PERL6: leading whitespace is allowed
else {
- $candidate_target =~ s/^ \s+ | \s+ $//x;
+ $candidate_target =~ s/^ \s+ | \s+ $//gx; # trim both ends
if ( $candidate_target eq $here_doc_target ) {
$self->[_nearly_matched_here_target_at_] = $input_line_number;
}