#!/usr/bin/perl -w =head1 NAME rred - A perl regular expression debugging tool =head1 DESCRIPTION rred is an interactive command line interface for experimenting with regular expressions. Given a regular expression and a text string, it shows how the regular expression (and any parenthesised subexpressions) matches against the text string. The text string, expression and match modifiers can be modified interactively. The interface uses the Term::ReadLine library for input, so if you have a package like Term::ReadLine::Gnu installed, you'll have all the comforts of command line history and editing. If not, you'll get a less glamorous but adequate stub interface (see L for details). =head2 COMMANDS Input lines are of the form [argument] Where is one of: =over =item t Specifies the remainder of the input as the I to match against. =item e Specifies the remainder of the input as the regular I to test. =item m Specifies the remainder of the input as I (zero or more of c, g, i, o, m, s, x) for the expression match. The I value defaults to 'g'. =item q Sets the I character to use arount the I. The default is '"' (double quote). This means that escape sequences can be used, and that the usual characters must be escaped. =item d Sets the I for the I. The default is '/'. =item x Quits the program (as will an EOF (ctrl-d) character). =back =head2 EVALUATION Both the I string and the regular I entered are executed in a perl 'eval' statement (see L). The I string is wrapped in the quote characters specified with the 'q' command. The I is evaluated using the m// operator (see L), with the I and I specified using the 'm' and 'd' commands. If either the I string or regular expression match produces a runtime error, it will be caught and displayed. =head2 OUTPUT After each command, perl code for the pattern match being tested is printed. If both the I and I have been set, the code will be executed. If the I matches against the I, a line containing the I will be printed, followed by one line for each of $&, $1, $2, etc. that is defined (where $& is the entire match, $1 is the first subexpression, $2 the second, etc.). The values of $&, $1, $2, etc. are indicated by '^' symbols under the corresponding part of the I. Zero-length matches are indicated by a '/\' around the correct position. If the I value includes a 'g', the above will be repeated if there are multiple matches. =head1 BUGS =over =item * The '^' markers don't line up correctly if the I string contains tab characters (\t). =item * Wrapping of I strings wider than the display should be more intelligent - the '^' markers should appear between the wrapped lines. =item * Multiline I strings work (use \n), but the highlights don't line up correctly. =back New bug reports and patches are welcome. =head1 AUTHOR and CREDITS Written by Hannes Reich (hannesATskynet.ie). Inspired by the nifty regular expression tool in ActiveState's Komodo IDE. This code is in the public domain. Permission is granted to anyone to use it for any purpose. =head1 SEE ALSO L, L, L =cut use strict; use English; use Term::ReadLine; sub show_matches($$$$$); sub show_submatches($\@\@); my $Version = "rred regular expression debugger version 0.1"; my $Help = < Set text to match against e Set regular expression m Set match modifiers (default is 'g') q Set text quotes (default is '"') d Set expression delimiters (default is '/') x, ^D exit HELP_END ; ################################################################ # Main code my $input = new Term::ReadLine 'rred'; print "$Version\n"; # print "Using ", $input->ReadLine, " for input\n"; print "\n$Help\n"; # Match parameters my ($text, $expression); my $text_quote = '"'; my $re_delimiter = '/'; my $modifiers = 'g'; INPUT_LOOP: while (defined ($ARG = $input->readline('rred> '))) { # Process commands or blank lines if (/^\s*([temqdx]|$)/) { # t, e, m, q, d : set parameters to the rest of the input line $text = $POSTMATCH if $1 eq 't'; $expression = $POSTMATCH if $1 eq 'e'; $modifiers = $POSTMATCH if $1 eq 'm'; $text_quote = $POSTMATCH if $1 eq 'q'; $re_delimiter = $POSTMATCH if $1 eq 'd'; # x : exit last INPUT_LOOP if $1 eq 'x'; show_matches($text, $expression, $modifiers, $text_quote, $re_delimiter); next INPUT_LOOP; } # Fall-through for bad input print "Don't understand '$ARG'\n$Help"; } # If we got an eof, print a newline before exiting (looks neater). print "\n" if !defined $ARG; exit; ################################################################ # Subs ################################################################ # show_matches($text, $expression, $modifiers, $text_quote, $re_delimiter) # # Evaluate the regular expression and display matches or evaluation # errors found. # # $text and $expression may be undef. # sub show_matches($$$$$) { my ($input_text, $expression, $modifiers, $quote, $re_delimiter) = @ARG; # Construct the code to execute from the input parameters my $assignment_code = '$text = ' . $quote . (defined $input_text ? $input_text : '') . $quote; my $match_code = '$text =~ m' . $re_delimiter . (defined $expression ? $expression : '') . $re_delimiter . (defined $modifiers ? $modifiers : ''); # Show the code print "$assignment_code; $match_code;\n"; # Bail unless we have some text return unless defined $input_text and defined $text_quote; # Execute the text assignment so we get escape character expansion my $text; eval $assignment_code; if (defined $@ and $@ ne '') # Check for eval errors { print "Error in text: $@\n"; return; } # Bail unless we have an RE and modifiers to execute return unless defined $expression and defined $modifiers and defined $re_delimiter; print "\n"; # Execute the match code and loop through all the matches my $found_match = 0; my (@a, @e); # Start and end position of each match M: while(eval "my \$r = ($match_code); \@a = \@-; \@e = \@+; return \$r") { $found_match = 1; show_submatches($text, @a, @e); last M if $modifiers !~ /g/; # Bail out if this isn't an "m//g" } if (defined $@ and $@ ne '') # Check for eval errors { print "Error in expression: $@\n"; return; } # Say so if we found no matches print "No match.\n" unless $found_match; } ################################################################ # show_submatches($text, \@submatch_starts, \@submatch_ends) # # Display a set of submatches ($&, $1, $2 etc..) from a regular # expression match. # # $text is the text matched against. # # \@submatch_starts and \@submatch_ends are references to arrays # containing the values of @- and @+ following the expression # match (These are the numeric offsets of the beginning and end of # each submatch - see L for details). # sub show_submatches($\@\@) { my($text, $a, $e) = @ARG; print "\t $text\n"; # Note extra space to accomodate zero-length # matches at start of text. # Loop through all the "submatches" ($&, $1, $2, etc.). SUBMATCH: for my $submatch (0..$#$a) { # Starting and ending offsets. my ($start, $end) = ($$a[$submatch], $$e[$submatch]); # Expressions with '|' branches can result in undefined values # in the @- and @+ arrays when a branch containing brackets is # not taken. next SUBMATCH unless(defined $start and defined $end); print $submatch == 0 ? "\$&\t" :"\$$submatch\t"; # Highlight the match with '^'s if it's non-empty if ($end != $start) { print ' ', ' ' x $start, '^' x ($end - $start), "\n"; } # Highlight a zero-length match with "/\" else { if (0 == $start) # special case for start of text { print "/\\\n"; } else { print ' ', ' ' x ($start - 1), "/\\\n"; } } } }