3 # Break long quoted strings in perl code into smaller pieces
4 # This version only breaks at blanks. See sub break_at_blanks to
8 # break_long_quotes.pl -ln myfile.pl >myfile.new
10 # where n specifies the maximum quote length.
13 # 1. Use with caution - has not been extensively tested
15 # 2. The output is not beautified so that you can use diff to see what
16 # changed. If all is ok, run the output through perltidy to clean it up.
18 # 3. This version only breaks single-line quotes contained within
19 # either single or double quotes.
21 # Steve Hancock, Sept 28, 2006
26 use vars qw($opt_l $opt_h);
29 usage: break_long_quotes.pl [ -ln ] filename >outfile
30 where n=line length (default 72)
33 getopts('hl:') or die "$usage";
34 if ($opt_h) { die $usage }
35 if ( !defined $opt_l ) {
39 $opt_l =~ /^\d+$/ or die "$usage";
42 unless ( @ARGV == 1 ) { die $usage }
44 scan_file( $file, $opt_l );
47 my ( $file, $line_length ) = @_;
50 my $fh = IO::File->new( $file, 'r' );
51 unless ($fh) { die "cannot open '$file': $!\n" }
52 my $formatter = MyWriter->new($line_length);
55 'formatter' => $formatter, # callback object
57 'argv' => "-npro -se", # don't need .perltidyrc
61 die "Error calling perltidy\n";
64 } ## end sub scan_file
66 #####################################################################
68 # This is a class with a write_line() method which receives
69 # tokenized lines from perltidy
71 #####################################################################
76 my ( $class, $line_length ) = @_;
77 my $comment_block = "";
79 _rcomment_block => \$comment_block,
80 _maximum_comment_length => 0,
81 _max_quote_length => $line_length,
82 _in_hanging_side_comment => 0,
88 # This is called from perltidy line-by-line
89 # We will look for quotes and fix them up if necessary
91 my $line_of_tokens = shift;
92 my $line_type = $line_of_tokens->{_line_type};
93 my $input_line_number = $line_of_tokens->{_line_number};
94 my $input_line = $line_of_tokens->{_line_text}; # the original line
95 my $rtoken_type = $line_of_tokens->{_rtoken_type}; # type of tokens
96 my $rtokens = $line_of_tokens->{_rtokens}; # text of tokens
97 my $starting_in_quote =
98 $line_of_tokens->{_starting_in_quote}; # text of tokens
99 my $ending_in_quote = $line_of_tokens->{_ending_in_quote}; # text of tokens
100 my $max_quote_length = $self->{_max_quote_length};
103 # look in lines of CODE (and not POD for example)
104 if ( $line_type eq 'CODE' && @$rtoken_type ) {
106 my $jmax = @$rtoken_type - 1;
108 # find leading whitespace
109 my $leading_whitespace = ( $input_line =~ /^(\s*)/ ) ? $1 : "";
110 if ($starting_in_quote) {$leading_whitespace=""};
111 my $new_line = $leading_whitespace;
113 # loop over tokens looking for quotes (token type Q)
114 for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
116 # pull out the actual token text
117 my $token = $$rtokens[$j];
119 # look for long quoted strings on a single line
120 # (multiple line quotes not currently handled)
121 if ( $$rtoken_type[$j] eq 'Q'
122 && !( $j == 0 && $starting_in_quote )
123 && !( $j == $jmax && $ending_in_quote )
124 && ( length($token) > $max_quote_length ) )
126 my $quote_char = substr( $token, 0, 1 );
127 if ( $quote_char eq '"' || $quote_char eq '\'' ) {
129 # safety check - shouldn't happen
130 my $check_char = substr( $token, -1, 1 );
131 if ( $check_char ne $quote_char ) {
133 programming error at line $input_line
134 starting quote character is <<$quote_char>> but ending quote character is <<$check_char>>
138 } ## end if ( $check_char ne $quote_char)
140 break_at_blanks( $token, $quote_char, $max_quote_length );
141 } ## end if ( $quote_char eq '"'...
142 } ## end if ( $$rtoken_type[$j]...
144 } ## end for ( my $j = 0 ; $j <=...
146 # substitute the modified line for the original line
147 $input_line = $new_line;
148 } ## end if ( $line_type eq 'CODE')
151 $self->print($input_line."\n");
153 } ## end sub write_line
155 sub break_at_blanks {
157 # break a string at one or more spaces so that the longest substring is
158 # less than the desired length (if possible).
159 my ( $str, $quote_char, $max_length ) = @_;
164 while ( ( $quote_pos = index( $str, $blank, 1 + $quote_pos ) ) >= 0 ) {
166 # as a precaution, do not break if preceded by a backslash
167 if ( $quote_pos > 0 ) {
168 next if ( substr( $str, $quote_pos - 1, 1 ) eq '\\' );
170 push @break_after_pos, $quote_pos;
171 } ## end while ( ( $quote_pos = index...
172 push @break_after_pos, length($str);
174 my $starting_pos = 0;
176 for ( my $i = 1 ; $i < @break_after_pos ; $i++ ) {
177 my $pos = $break_after_pos[$i];
178 my $length = $pos - $starting_pos;
179 if ( $length > $max_length - 1 ) {
180 $pos = $break_after_pos[ $i - 1 ];
181 $new_str .= substr( $str, $starting_pos, $pos - $starting_pos + 1 )
182 . "$quote_char . $quote_char";
183 $starting_pos = $pos + 1;
184 } ## end if ( $length > $max_length...
185 } ## end for ( my $i = 1 ; $i < ...
186 my $pos = length($str);
187 $new_str .= substr( $str, $starting_pos, $pos );
189 } ## end sub break_at_blanks
192 my ( $self, $input_line ) = @_;
196 # called once after the last line of a file
197 sub finish_formatting {
199 $self->flush_comments();