Perl/PrereqScanner/NotQuiteLite.pm

package Perl::PrereqScanner::NotQuiteLite;

use strict;
use warnings;
use Carp;
use Perl::PrereqScanner::NotQuiteLite::Context;
use Perl::PrereqScanner::NotQuiteLite::Util;

our $VERSION = '0.9914';

our @BUNDLED_PARSERS = qw/
  Aliased AnyMoose Autouse Catalyst ClassAccessor
  ClassAutouse ClassLoad Core Inline KeywordDeclare Later
  Mixin ModuleRuntime MojoBase Moose MooseXDeclare ObjectPad Only
  PackageVariant Plack POE Prefork Superclass Syntax SyntaxCollector
  TestClassMost TestMore TestRequires UniversalVersion Unless
/;
our @DEFAULT_PARSERS = qw/Core Moose/;

### Helpers For Debugging

use constant DEBUG => !!$ENV{PERL_PSNQL_DEBUG} || 0;
use constant DEBUG_RE => DEBUG > 3 ? 1 : 0;

sub _debug {}
sub _error {}
sub _dump_stack {}

if (DEBUG) {
  require Data::Dump; Data::Dump->import(qw/dump/);
  no warnings 'redefine';
  *_debug = sub { print @_, "\n" };
  *_error = sub { print @_, "*" x 50, "\n" };
  *_dump_stack = sub {
    my ($c, $char) = @_;
    my $stacked = join '', map {($_->[2] ? "($_->[2])" : '').$_->[0]} @{$c->{stack}};
    _debug("$char \t\t\t\t stacked: $stacked");
  };
}

sub _match_error {
  my $rstr = shift;
  $@ = shift() . substr($$rstr, pos($$rstr), 100);
  return;
}

### Global Variables To Be Sorted Out Later

my %unsupported_packages = map {$_ => 1} qw(
);

my %sub_keywords = (
  'Function::Parameters' => [qw/fun method/],
  'TryCatch' => [qw/try catch/],
);

my %filter_modules = (
  tt => sub { ${$_[0]} =~ s|\G.+?no\s*tt\s*;||s; 0; },
  'Text::RewriteRules' => sub { ${$_[0]} =~ s|RULES.+?ENDRULES\n||gs; 1 },
);

my %is_conditional = map {$_ => 1} qw(
  if elsif unless else given when
  for foreach while until
);

my %ends_expr = map {$_ => 1} qw(
  and or xor
  if else elsif unless when default
  for foreach while until
  && || !~ =~ = += -= *= /= **= //= %= ^= |=
  > < >= <= <> <=> cmp ge gt le lt eq ne ? :
);

my %has_sideff = map {$_ => 1} qw(
  and or xor && || //
  if unless when
);

# keywords that allow /regexp/ to follow directly
my %regexp_may_follow = map {$_ => 1} qw(
  and or cmp if elsif unless eq ne
  gt lt ge le for while until grep map not split when
  return
);

my $re_namespace = qr/(?:::|')?(?:[a-zA-Z0-9_]+(?:(?:::|')[a-zA-Z0-9_]+)*)/;
my $re_nonblock_chars = qr/[^\\\(\)\{\}\[\]\<\>\/"'`#q~,\s]*/;
my $re_variable = qr/
  (?:$re_namespace)
  | (?:\^[A-Z\]])
  | (?:\{\^[A-Z0-9_]+\})
  | (?:[_"\(\)<\\\&`'\+\-,.\/\%#:=~\|?!\@\*\[\]\^])
/x;
my $re_pod = qr/(
  =[a-zA-Z]\w*\b
  .*?
  (?:(?:\n)
  =cut\b.*?(?:\n|\z)|\z)
)/sx;
my $re_comment = qr/(?:\s*#[^\n]*?\n)*(?:\s*#[^\n]*?)(?:\n|$)/s;

my $g_re_scalar_variable = qr{\G(\$(?:$re_variable))};
my $g_re_hash_shortcut = qr{\G(\{\s*(?:[\+\-]?\w+|(['"])[\w\s]+\2|(?:$re_nonblock_chars))\s*(?<!\$)\})};
my $g_re_prototype = qr{\G(\([^\)]*?\))};

my %ReStrInDelims;
sub _gen_re_str_in_delims {
  my $delim = shift;
  $ReStrInDelims{$delim} ||= do {
    if ($delim eq '\\') {
      qr/(?:[^\\]*(?:(?:\\\\)[^\\]*)*)/s;
    } else {
      $delim = quotemeta $delim;
      qr/(?:[^\\$delim]*(?:\\.[^\\$delim]*)*)/s;
    }
  };
}

my $re_str_in_single_quotes = _gen_re_str_in_delims(q{'});
my $re_str_in_double_quotes = _gen_re_str_in_delims(q{"});
my $re_str_in_backticks     = _gen_re_str_in_delims(q{`});

my %ReStrInDelimsWithEndDelim;
sub _gen_re_str_in_delims_with_end_delim {
  my $delim = shift;
  $ReStrInDelimsWithEndDelim{$delim} ||= do {
    my $re = _gen_re_str_in_delims($delim);
    qr{$re\Q$delim\E};
  };
}

my %RdelSkip;
sub _gen_rdel_and_re_skip {
  my $ldel = shift;
  @{$RdelSkip{$ldel} ||= do {
    (my $rdel = $ldel) =~ tr/[({</])}>/;
    my $re_skip = qr{[^\Q$ldel$rdel\E\\]+};
    [$rdel, $re_skip];
  }};
}

my %RegexpShortcut;
sub _gen_re_regexp_shortcut {
  my ($ldel, $rdel) = @_;
  $RegexpShortcut{$ldel} ||= do {
    $ldel = quotemeta $ldel;
    $rdel = $rdel ? quotemeta $rdel : $ldel;
    qr{(?:[^\\\(\)\{\}\[\]<>$ldel$rdel]*(?:\\.[^\\\(\)\[\]\{\}<>$ldel$rdel]*)*)$rdel};
  };
}

############################

my %LOADED;

sub new {
  my ($class, %args) = @_;

  my %mapping;
  my @parsers = $class->_get_parsers($args{parsers});
  for my $parser (@parsers) {
    if (!exists $LOADED{$parser}) {
      eval "require $parser; 1";
      if (my $error = $@) {
        $parser->can('register') or die "Parser Error: $error";
      }
      $LOADED{$parser} = $parser->can('register') ? $parser->register(%args) : undef;
    }
    my $parser_mapping = $LOADED{$parser} or next;
    for my $type (qw/use no keyword method/) {
      next unless exists $parser_mapping->{$type};
      for my $name (keys %{$parser_mapping->{$type}}) {
        $mapping{$type}{$name} = [
          $parser,
          $parser_mapping->{$type}{$name},
          (($type eq 'use' or $type eq 'no') ? ($name) : ()),
        ];
      }
    }
    if ($parser->can('register_fqfn')) {
      my $fqfn_mapping = $parser->register_fqfn;
      for my $name (keys %$fqfn_mapping) {
        my ($module) = $name =~ /^(.+)::/;
        $mapping{keyword}{$name} = [
          $parser,
          $fqfn_mapping->{$name},
          $module,
        ];
      }
    }
  }
  $args{_} = \%mapping;

  bless \%args, $class;
}

sub _get_parsers {
  my ($class, $list) = @_;
  my @parsers;
  my %should_ignore;
  for my $parser (@{$list || [qw/:default/]}) {
    if ($parser eq ':installed') {
      require Module::Find;
      push @parsers, Module::Find::findsubmod("$class\::Parser");
    } elsif ($parser eq ':bundled') {
      push @parsers, map {"$class\::Parser::$_"} @BUNDLED_PARSERS;
    } elsif ($parser eq ':default') {
      push @parsers, map {"$class\::Parser::$_"} @DEFAULT_PARSERS;
    } elsif ($parser =~ s/^\+//) {
      push @parsers, $parser;
    } elsif ($parser =~ s/^\-//) {
      $should_ignore{"$class\::Parser\::$parser"} = 1;
    } elsif ($parser =~ /^$class\::Parser::/) {
      push @parsers, $parser;
    } else {
      push @parsers, "$class\::Parser\::$parser";
    }
  }
  grep {!$should_ignore{$_}} @parsers;
}

sub scan_file {
  my ($self, $file) = @_;
  _debug("START SCANNING $file") if DEBUG;
  print STDERR " Scanning $file\n" if $self->{verbose};
  open my $fh, '<', $file or croak "Can't open $file: $!";
  my $code = do { local $/; <$fh> };
  $self->{file} = $file;
  $self->scan_string($code);
}

sub scan_string {
  my ($self, $string) = @_;

  $string = '' unless defined $string;

  my $c = Perl::PrereqScanner::NotQuiteLite::Context->new(%$self);

  if ($self->{quick}) {
    $c->{file_size} = length $string;
    $self->_skim_string($c, \$string) if $c->{file_size} > 30_000;
  }

  # UTF8 BOM
  if ($string =~ s/\A(\xef\xbb\xbf)//s) {
    utf8::decode($string);
    $c->{decoded} = 1;
  }
  # Other BOMs (TODO: also decode?)
  $string =~ s/\A(\x00\x00\xfe\xff|\xff\xfe\x00\x00|\xfe\xff|\xff\xfe)//s;

  # normalize
  if ("\n" eq "\015") {
    $string =~ s/(?:\015?\012)/\n/gs;
  } elsif ("\n" eq "\012") {
    $string =~ s/(?:\015\012?)/\n/gs;
  } elsif ("\n" eq "\015\012") {
    $string =~ s/(?:\015(?!\012)|(?<!\015)\012)/\n/gs;
  } else {
    $string =~ s/(?:\015\012|\015|\012)/\n/gs;
  }
  $string =~ s/[ \t]+/ /g;
  $string =~ s/(?: *\n)+/\n/gs;

  # FIXME
  $c->{stack} = [];
  $c->{errors} = [];
  $c->{callback} = {
    use     => \&_use,
    require => \&_require,
    no      => \&_no,
  };
  $c->{wants_doc} = 0;

  pos($string) = 0;

  {
    local $@;
    eval { $self->_scan($c, \$string, 0) };
    push @{$c->{errors}}, "Scan Error: $@" if $@;
    if ($c->{redo}) {
      delete $c->{redo};
      delete $c->{ended};
      @{$c->{stack}} = ();
      redo;
    }
  }

  if (@{$c->{stack}} and !$c->{quick}) {
    require Data::Dump;
    push @{$c->{errors}}, Data::Dump::dump($c->{stack});
  }

  $c->remove_inner_packages_from_requirements;
  $c->merge_perl;

  $c;
}

sub _skim_string {
  my ($self, $c, $rstr) = @_;
  my $pos = pos($$rstr) || 0;
  my $last_found = 0;
  my $saw_moose;
  my $re = qr/\G.*?\b((?:use|require|no)\s+(?:[A-Za-z][A-Za-z0-9_]*::)*[A-Za-z][A-Za-z0-9_]*)/;
  while(my ($match) = $$rstr =~ /$re/gc) {
    $last_found = pos($$rstr) + length $match;
    if (!$saw_moose and $match =~ /^use\s+(?:Mo(?:o|(?:[ou]se))?X?|MooseX::Declare)\b/) {
      $re = qr/\G.*?\b((?:(?:use|require|no)\s+(?:[A-Za-z][A-Za-z0-9_]*::)*[A-Za-z][A-Za-z0-9_]*)|(?:(?:extends|with)\s+(?:["']|q[a-z]*[^a-zA-Z0-9_])(?:[A-Za-z][A-Za-z0-9_]*::)*[A-Za-z][A-Za-z0-9_]*))/;
      $saw_moose = 1;
    }
  }
  $c->{last_found_by_skimming} = $last_found;
  pos($$rstr) = $pos;
}

sub _scan {
  my ($self, $c, $rstr, $parent_scope) = @_;

  if (@{$c->{stack}} > 90) {
    _error("deep recursion found");
    $c->{ended} = 1;
  }

  _dump_stack($c, "BEGIN SCOPE") if DEBUG;

  # found __DATA|END__ somewhere?
  return $c if $c->{ended};

  my $wants_doc = $c->{wants_doc};
  my $line_top = 1;
  my $waiting_for_a_block;

  my $current_scope = 0;
  my ($token, $token_desc, $token_type) = ('', '', '');
  my ($prev_token, $prev_token_type) = ('', '');
  my ($stack, $unstack);
  my (@keywords, @tokens, @scope_tokens);
  my $caller_package;
  my $prepend;
  my ($pos, $c1);
  my $prev_pos = 0;
  while(defined($pos = pos($$rstr))) {
    $token = undef;

    # cache first letter for better performance
    $c1 = substr($$rstr, $pos, 1);

    if ($line_top) {
      if ($c1 eq '=') {
        if ($$rstr =~ m/\G($re_pod)/gcsx) {
          ($token, $token_desc, $token_type) = ($1, 'POD', '') if $wants_doc;
          next;
        }
      }
    }
    if ($c1 eq "\n") {
      pos($$rstr)++;
      $line_top = 1;
      next;
    }

    $line_top = 0;
    # ignore whitespaces
    if ($c1 eq ' ') {
      pos($$rstr)++;
      next;
    } elsif ($c1 eq '_') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '_' and $$rstr =~ m/\G(__(?:DATA|END)__\b)(?!\s*=>)/gc) {
        if ($wants_doc) {
          ($token, $token_desc, $token_type) = ($1, 'END_OF_CODE', '');
          next;
        } else {
          $c->{ended} = 1;
          last;
        }
      }
    } elsif ($c1 eq '#') {
      if ($$rstr =~ m{\G($re_comment)}gcs) {
        ($token, $token_desc, $token_type) = ($1, 'COMMENT', '') if $wants_doc;
        $line_top = 1;
        next;
      }
    } elsif ($c1 eq ';') {
      pos($$rstr) = $pos + 1;
      ($token, $token_desc, $token_type) = ($c1, ';', ';');
      $current_scope |= F_STATEMENT_END|F_EXPR_END;
      next;
    } elsif ($c1 eq '$') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '#') {
        if (substr($$rstr, $pos + 2, 1) eq '{') {
          if ($$rstr =~ m{\G(\$\#\{[\w\s]+\})}gc) {
            ($token, $token_desc, $token_type) = ($1, '$#{NAME}', 'EXPR');
            next;
          } else {
            pos($$rstr) = $pos + 3;
            ($token, $token_desc, $token_type) = ('$#{', '$#{', 'EXPR');
            $stack = [$token, $pos, 'VARIABLE'];
            next;
          }
        } elsif ($$rstr =~ m{\G(\$\#(?:$re_namespace))}gc) {
          ($token, $token_desc, $token_type) = ($1, '$#NAME', 'EXPR');
          next;
        } elsif ($prev_token_type eq 'ARROW') {
          my $c3 = substr($$rstr, $pos + 2, 1);
          if ($c3 eq '*') {
            pos($$rstr) = $pos + 3;
            ($token, $token_desc, $token_type) = ('$#*', 'VARIABLE', 'VARIABLE');
            $c->add_perl('5.020', '->$#*');
            next;
          }
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('$#', 'SPECIAL_VARIABLE', 'EXPR');
          next;
        }
      } elsif ($c2 eq '$') {
        if ($$rstr =~ m{\G(\$(?:\$)+(?:$re_namespace))}gc) {
          ($token, $token_desc, $token_type) = ($1, '$$NAME', 'VARIABLE');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('$$', 'SPECIAL_VARIABLE', 'EXPR');
          next;
        }
      } elsif ($c2 eq '{') {
        if ($$rstr =~ m{\G(\$\{[\w\s]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '${NAME}', 'VARIABLE');
          if ($prev_token_type eq 'KEYWORD' and $c->token_expects_fh_or_block_list($prev_token)) {
            $token_type = '';
            next;
          }
        } elsif ($$rstr =~ m{\G(\$\{\^[A-Z_]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '${^NAME}', 'VARIABLE');
          if ($token eq '${^CAPTURE}' or $token eq '${^CAPTURE_ALL}') {
            $c->add_perl('5.026', '${^CAPTURE}');
          }
          if ($token eq '${^SAFE_LOCALES}') {
            $c->add_perl('5.028', '${^SAFE_LOCALES}');
          }
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('${', '${', 'VARIABLE');
          $stack = [$token, $pos, 'VARIABLE'];
        }
        if ($parent_scope & F_EXPECTS_BRACKET) {
          $current_scope |= F_SCOPE_END;
        }
        next;
      } elsif ($c2 eq '*' and $prev_token_type eq 'ARROW') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('$*', '$*', 'VARIABLE');
        $c->add_perl('5.020', '->$*');
        next;
      } elsif ($c2 eq '+' or $c2 eq '-') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('$'.$c2, 'SPECIAL_VARIABLE', 'VARIABLE');
        $c->add_perl('5.010', '$'.$c2);
        next;
      } elsif ($$rstr =~ m{$g_re_scalar_variable}gc) {
        ($token, $token_desc, $token_type) = ($1, '$NAME', 'VARIABLE');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'VARIABLE');
        next;
      }
    } elsif ($c1 eq '@') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '_' and $$rstr =~ m{\G\@_\b}gc) {
        ($token, $token_desc, $token_type) = ('@_', 'SPECIAL_VARIABLE', 'VARIABLE');
        next;
      } elsif ($c2 eq '{') {
        if ($$rstr =~ m{\G(\@\{[\w\s]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '@{NAME}', 'VARIABLE');
          if ($token eq '@{^CAPTURE}' or $token eq '@{^CAPTURE_ALL}') {
            $c->add_perl('5.026', '@{^CAPTURE}');
          }
        } elsif ($$rstr =~ m{\G(\@\{\^[A-Z_]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '@{^NAME}', 'VARIABLE');
          if ($token eq '@{^CAPTURE}' or $token eq '@{^CAPTURE_ALL}') {
            $c->add_perl('5.026', '@{^CAPTURE}');
          }
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('@{', '@{', 'VARIABLE');
          $stack = [$token, $pos, 'VARIABLE'];
        }
        if ($prev_token_type eq 'ARROW') {
          $c->add_perl('5.020', '->@{}');
        }
        if ($parent_scope & F_EXPECTS_BRACKET) {
          $current_scope |= F_SCOPE_END;
        }
        next;
      } elsif ($c2 eq '$') {
        if ($$rstr =~ m{\G(\@\$(?:$re_namespace))}gc) {
          ($token, $token_desc, $token_type) = ($1, '@$NAME', 'VARIABLE');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('@$', '@$', 'VARIABLE');
          next;
        }
      } elsif ($prev_token_type eq 'ARROW') {
        # postderef
        if ($c2 eq '*') {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('@*', '@*', 'VARIABLE');
          $c->add_perl('5.020', '->@*');
          next;
        } else {
          pos($$rstr) = $pos + 1;
          ($token, $token_desc, $token_type) = ('@', '@', 'VARIABLE');
          $c->add_perl('5.020', '->@');
          next;
        }
      } elsif ($c2 eq '[') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('@[', 'SPECIAL_VARIABLE', 'VARIABLE');
        next;
      } elsif ($c2 eq '+' or $c2 eq '-') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('@'.$c2, 'SPECIAL_VARIABLE', 'VARIABLE');
        $c->add_perl('5.010', '@'.$c2);
        next;
      } elsif ($$rstr =~ m{\G(\@(?:$re_namespace))}gc) {
        ($token, $token_desc, $token_type) = ($1, '@NAME', 'VARIABLE');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'VARIABLE');
        next;
      }
    } elsif ($c1 eq '%') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '{') {
        if ($$rstr =~ m{\G(\%\{[\w\s]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '%{NAME}', 'VARIABLE');
        } elsif ($$rstr =~ m{\G(\%\{\^[A-Z_]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '%{^NAME}', 'VARIABLE');
          if ($token eq '%{^CAPTURE}' or $token eq '%{^CAPTURE_ALL}') {
            $c->add_perl('5.026', '%{^CAPTURE}');
          }
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('%{', '%{', 'VARIABLE');
          $stack = [$token, $pos, 'VARIABLE'];
        }
        if ($prev_token_type eq 'ARROW') {
          $c->add_perl('5.020', '->%{');
        }
        if ($parent_scope & F_EXPECTS_BRACKET) {
          $current_scope |= F_SCOPE_END;
        }
        next;
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('%=', '%=', 'OP');
        next;
      } elsif ($$rstr =~ m{\G(\%\$(?:$re_namespace))}gc) {
        ($token, $token_desc, $token_type) = ($1, '%$NAME', 'VARIABLE');
        next;
      } elsif ($$rstr =~ m{\G(\%(?:$re_namespace))}gc) {
        ($token, $token_desc, $token_type) = ($1, '%NAME', 'VARIABLE');
        next;
      } elsif ($prev_token_type eq 'VARIABLE' or $prev_token_type eq 'EXPR') {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      } elsif ($prev_token_type eq 'ARROW') {
        if ($c2 eq '*') {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('%*', '%*', 'VARIABLE');
          $c->add_perl('5.020', '->%*');
          next;
        } else {
          pos($$rstr) = $pos + 1;
          ($token, $token_desc, $token_type) = ('%', '%', 'VARIABLE');
          $c->add_perl('5.020', '->%');
          next;
        }
      } elsif ($c2 eq '+' or $c2 eq '-') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('%'.$c2, 'SPECIAL_VARIABLE', 'VARIABLE');
        $c->add_perl('5.010', '%'.$c2);
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'VARIABLE');
        next;
      }
    } elsif ($c1 eq '*') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '{') {
        if ($prev_token_type eq 'ARROW') {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('*{', '*{', 'VARIABLE');
          $c->add_perl('5.020', '->*{}');
          next;
        } elsif ($$rstr =~ m{\G(\*\{[\w\s]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '*{NAME}', 'VARIABLE');
          if ($prev_token eq 'KEYWORD' and $c->token_expects_fh_or_block_list($prev_token)) {
            $token_type = '';
            next;
          }
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('*{', '*{', 'VARIABLE');
          $stack = [$token, $pos, 'VARIABLE'];
        }
        if ($parent_scope & F_EXPECTS_BRACKET) {
          $current_scope |= F_SCOPE_END;
        }
        next;
      } elsif ($c2 eq '*') {
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('**=', '**=', 'OP');
          next;
        } elsif ($prev_token_type eq 'ARROW') {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('**', '**', 'VARIABLE');
          $c->add_perl('5.020', '->**');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('**', '**', 'OP');
          next;
        }
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('*=', '*=', 'OP');
        next;
      } elsif ($$rstr =~ m{\G(\*(?:$re_namespace))}gc) {
        ($token, $token_desc, $token_type) = ($1, '*NAME', 'VARIABLE');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '&') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '&') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('&&', '&&', 'OP');
        next;
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('&=', '&=', 'OP');
        next;
      } elsif ($c2 eq '{') {
        if ($$rstr =~ m{\G(\&\{[\w\s]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '&{NAME}', 'EXPR');
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('&{', '&{', 'EXPR');
          $stack = [$token, $pos, 'FUNC'];
        }
        if ($parent_scope & F_EXPECTS_BRACKET) {
          $current_scope |= F_SCOPE_END;
        }
        next;
      } elsif ($c2 eq '.') {
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('&.=', '&.=', 'OP');
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('&.', '&.', 'OP');
        }
        $c->add_perl('5.022', '&.');
        next;
      } elsif ($$rstr =~ m{\G(\&(?:$re_namespace))}gc) {
        ($token, $token_desc, $token_type) = ($1, '&NAME', 'EXPR');
        next;
      } elsif ($$rstr =~ m{\G(\&\$(?:$re_namespace))}gc) {
        ($token, $token_desc, $token_type) = ($1, '&$NAME', 'EXPR');
        next;
      } elsif ($prev_token_type eq 'ARROW') {
        if ($c2 eq '*') {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('&*', '&*', 'VARIABLE');
          $c->add_perl('5.020', '->&*');
          next;
        }
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '\\') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '{') {
        if ($$rstr =~ m{\G(\\\{[\w\s]+\})}gc) {
          ($token, $token_desc, $token_type) = ($1, '\\{NAME}', 'VARIABLE');
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('\\{', '\\{', 'VARIABLE');
          $stack = [$token, $pos, 'VARIABLE'];
        }
        if ($parent_scope & F_EXPECTS_BRACKET) {
          $current_scope |= F_SCOPE_END;
        }
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, '');
        next;
      }
    } elsif ($c1 eq '-') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '>') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('->', 'ARROW', 'ARROW');
        if ($prev_token_type eq 'WORD' or $prev_token_type eq 'KEYWORD') {
          $caller_package = $prev_token;
          $current_scope |= F_KEEP_TOKENS;
        }
        next;
      } elsif ($c2 eq '-') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('--', '--', $prev_token_type);
        next;
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('-=', '-=', 'OP');
        next;
      } elsif ($$rstr =~ m{\G(\-[ABCMORSTWXbcdefgkloprstuwxz]\b)}gc) {
        ($token, $token_desc, $token_type) = ($1, 'FILE_TEST', 'EXPR');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq q{"}) {
      if ($$rstr =~ m{\G(?:\"($re_str_in_double_quotes)\")}gcs) {
        ($token, $token_desc, $token_type) = ([$1, q{"}], 'STRING', 'STRING');
        next;
      }
    } elsif ($c1 eq q{'}) {
      if ($$rstr =~ m{\G(?:\'($re_str_in_single_quotes)\')}gcs) {
        ($token, $token_desc, $token_type) = ([$1, q{'}], 'STRING', 'STRING');
        next;
      }
    } elsif ($c1 eq '`') {
      if ($$rstr =~ m{\G(?:\`($re_str_in_backticks)\`)}gcs) {
        ($token, $token_desc, $token_type) = ([$1, q{`}], 'BACKTICK', 'EXPR');
        next;
      }
    } elsif ($c1 eq '/') {
      if ($prev_token_type eq '' or $prev_token_type eq 'OP' or ($prev_token_type eq 'KEYWORD' and $regexp_may_follow{$prev_token})) { # undoubtedly regexp
        if (my $regexp = $self->_match_regexp0($c, $rstr, $pos, 'm')) {
          ($token, $token_desc, $token_type) = ($regexp, 'REGEXP', 'EXPR');
          next;
        } else {
          # the above may fail
          _debug("REGEXP ERROR: $@") if DEBUG;
          pos($$rstr) = $pos;
        }
      }
      if (($prev_token_type eq '' or (!($current_scope & F_EXPR) and $prev_token_type eq 'WORD')) or ($prev_token_type eq 'KEYWORD' and @keywords and $prev_token eq $keywords[-1] and $regexp_may_follow{$prev_token})) {

        if (my $regexp = $self->_match_regexp0($c, $rstr, $pos)) {
          ($token, $token_desc, $token_type) = ($regexp, 'REGEXP', 'EXPR');
          next;
        } else {
          # the above may fail
          _debug("REGEXP ERROR: $@") if DEBUG;
          pos($$rstr) = $pos;
        }
      }
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '/') {
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('//=', '//=', 'OP');
          $c->add_perl('5.010', '//=');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('//', '//', 'OP');
          $c->add_perl('5.010', '//');
          next;
        }
      }
      if ($c2 eq '=') { # this may be a part of /=.../
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('/=', '/=', 'OP');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '{') {
      if ($$rstr =~ m{$g_re_hash_shortcut}gc) {
        ($token, $token_desc) = ($1, '{EXPR}');
        if ($current_scope & F_EVAL) {
          $current_scope &= MASK_EVAL;
          $c->{eval} = ($current_scope | $parent_scope) & F_EVAL ? 1 : 0;
        }
        if ($parent_scope & F_EXPECTS_BRACKET) {
          $current_scope |= F_SCOPE_END;
          next;
        }
        if ($prev_token_type eq 'ARROW' or $prev_token_type eq 'VARIABLE') {
          $token_type = 'VARIABLE';
          next;
        } elsif ($waiting_for_a_block) {
          $waiting_for_a_block = 0;
          if (@keywords and $c->token_expects_block($keywords[0])) {
            my $first_token = $keywords[0];
            $current_scope |= F_EXPR_END;
            if ($c->token_defines_sub($first_token) and $c->has_callback_for(sub => $first_token)) {
              $c->run_callback_for(sub => $first_token, \@tokens);
              $current_scope &= MASK_KEEP_TOKENS;
              @tokens = ();
            }
          }
          next;
        } elsif ($prev_token_type eq 'KEYWORD' and $c->token_expects_fh_or_block_list($prev_token)) {
          $token_type = '';
          next;
        } else {
          $token_type = 'EXPR';
          next;
        }
      }
      pos($$rstr) = $pos + 1;
      ($token, $token_desc) = ($c1, $c1);
      my $stack_owner;
      if (@keywords) {
        for(my $i = @keywords; $i > 0; $i--) {
          my $keyword = $keywords[$i - 1];
          if ($c->token_expects_block($keyword)) {
            $stack_owner = $keyword;
            if (@tokens and $c->token_defines_sub($keyword) and $c->has_callback_for(sub => $keyword)) {
              $c->run_callback_for(sub => $keyword, \@tokens);
              $current_scope &= MASK_KEEP_TOKENS;
              @tokens = ();
            }
            last;
          }
        }
      }
      $stack = [$token, $pos, $stack_owner || ''];
      if ($parent_scope & F_EXPECTS_BRACKET) {
        $current_scope |= F_SCOPE_END|F_STATEMENT_END|F_EXPR_END;
        next;
      }
      if ($prev_token_type eq 'ARROW' or $prev_token_type eq 'VARIABLE') {
        $token_type = 'VARIABLE';
      } elsif ($waiting_for_a_block) {
        $waiting_for_a_block = 0;
      } else {
        $token_type = (($current_scope | $parent_scope) & F_KEEP_TOKENS) ? 'EXPR' : '';
      }
      next;
    } elsif ($c1 eq '[') {
      if ($$rstr =~ m{\G(\[(?:$re_nonblock_chars)\])}gc) {
        ($token, $token_desc, $token_type) = ($1, '[EXPR]', 'VARIABLE');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'VARIABLE');
        $stack = [$token, $pos, 'VARIABLE'];
        next;
      }
    } elsif ($c1 eq '(') {
      my $prototype_re = $c->prototype_re;
      if ($waiting_for_a_block and @keywords and $c->token_defines_sub($keywords[-1]) and $$rstr =~ m{$prototype_re}gc) {
        my $proto = $1;
        if ($proto =~ /^\([\\\$\@\%\&\[\]\*;\+]*\)$/) {
          ($token, $token_desc, $token_type) = ($proto, '(PROTOTYPE)', '');
        } else {
          ($token, $token_desc, $token_type) = ($proto, '(SIGNATURES)', '');
          $c->add_perl('5.020', 'signatures');
        }
        next;
      } elsif ($$rstr =~ m{\G\(((?:$re_nonblock_chars)(?<!\$))\)}gc) {
        ($token, $token_desc, $token_type) = ([[[$1, 'EXPR']]], '()', 'EXPR');
        if ($prev_token_type eq 'KEYWORD' and @keywords and $keywords[-1] eq $prev_token and !$c->token_expects_expr_block($prev_token)) {
          if ($prev_token eq 'eval') {
            $current_scope &= MASK_EVAL;
            $c->{eval} = ($current_scope | $parent_scope) & F_EVAL ? 1 : 0;
          }
          pop @keywords;
        }
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'EXPR');
        my $stack_owner;
        if (@keywords) {
          for (my $i = @keywords; $i > 0; $i--) {
            my $keyword = $keywords[$i - 1];
            if ($c->token_expects_block($keyword)) {
              $stack_owner = $keyword;
              last;
            }
          }
        }
        $stack = [$token, $pos, $stack_owner || ''];
        next;
      }
    } elsif ($c1 eq '}') {
      pos($$rstr) = $pos + 1;
      ($token, $token_desc, $token_type) = ($c1, $c1, '');
      $unstack = $token;
      $current_scope |= F_STATEMENT_END|F_EXPR_END;
      next;
    } elsif ($c1 eq ']') {
      pos($$rstr) = $pos + 1;
      ($token, $token_desc, $token_type) = ($c1, $c1, '');
      $unstack = $token;
      next;
    } elsif ($c1 eq ')') {
      pos($$rstr) = $pos + 1;
      ($token, $token_desc, $token_type) = ($c1, $c1, '');
      $unstack = $token;
      next;
    } elsif ($c1 eq '<') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '<'){
        if ($$rstr =~ m{\G(<<(?:
          \\. |
          \w+ |
          [./-] |
          \[[^\]]*\] |
          \{[^\}]*\} |
          \* |
          \? |
          \~ |
          \$ |
        )*(?<!\-)>>)}gcx) {
          ($token, $token_desc, $token_type) = ($1, '<<NAME>>', 'EXPR');
          $c->add_perl('5.022', '<<NAME>>');
          next;
        } elsif ($$rstr =~ m{\G<<~?\s*(?:
          \\?[A-Za-z_][\w]* |
          "(?:[^\\"]*(?:\\.[^\\"]*)*)" |
          '(?:[^\\']*(?:\\.[^\\']*)*)' |
          `(?:[^\\`]*(?:\\.[^\\`]*)*)`
        )}sx) {
          if (my $heredoc = $self->_match_heredoc($c, $rstr)) {
            ($token, $token_desc, $token_type) = ($heredoc, 'HEREDOC', 'EXPR');
            next;
          } else {
            # the above may fail
            pos($$rstr) = $pos;
          }
        }
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('<<=', '<<=', 'OP');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('<<', '<<', 'OP');
          next;
        }
      } elsif ($c2 eq '=') {
        if (substr($$rstr, $pos + 2, 1) eq '>') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('<=>', '<=>', 'OP');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('<=', '<=', 'OP');
          next;
        }
      } elsif ($c2 eq '>') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('<>', '<>', 'OP');
        next;
      } elsif ($$rstr =~ m{\G(<(?:
        \\. |
        \w+ |
        [./-] |
        \[[^\]]*\] |
        \{[^\}]*\} |
        \* |
        \? |
        \~ |
        \$ |
      )*(?<!\-)>)}gcx) {
        ($token, $token_desc, $token_type) = ($1, '<NAME>', 'EXPR');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq ':') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq ':') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('::', '::', '');
        next;
      }
      if ($waiting_for_a_block and @keywords and $c->token_defines_sub($keywords[-1])) {
        while($$rstr =~ m{\G\s*(:?\s*[\w]+)}gcs) {
          my $startpos = pos($$rstr);
          if (substr($$rstr, $startpos, 1) eq '(') {
            my @nest = '(';
            pos($$rstr) = $startpos + 1;
            my ($p, $c1);
            while(defined($p = pos($$rstr))) {
              $c1 = substr($$rstr, $p, 1);
              if ($c1 eq '\\') {
                pos($$rstr) = $p + 2;
                next;
              }
              if ($c1 eq ')') {
                pop @nest;
                pos($$rstr) = $p + 1;
                last unless @nest;
              }
              if ($c1 eq '(') {
                push @nest, $c1;
                pos($$rstr) = $p + 1;
                next;
              }
              $$rstr =~ m{\G([^\\()]+)}gc and next;
            }
          }
        }
        $token = substr($$rstr, $pos, pos($$rstr) - $pos);
        ($token_desc, $token_type) = ('ATTRIBUTE', '');
        if ($token =~ /^:prototype\(/) {
          $c->add_perl('5.020', ':prototype');
        }
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '=') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '>') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('=>', 'COMMA', 'OP');
        if (@keywords and $prev_token_type eq 'KEYWORD' and $keywords[-1] eq $prev_token) {
          pop @keywords;
          if (!@keywords and ($current_scope & F_KEEP_TOKENS)) {
            $current_scope &= MASK_KEEP_TOKENS;
            @tokens = ();
          }
        }
        next;
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('==', '==', 'OP');
        next;
      } elsif ($c2 eq '~') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('=~', '=~', 'OP');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '>') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '>') {
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('>>=', '>>=', 'OP');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('>>', '>>', 'OP');
          next;
        }
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('>=', '>=', 'OP');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '+') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '+') {
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('++=', '++=', 'OP');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('++', '++', $prev_token_type);
          next;
        }
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('+=', '+=', 'OP');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '|') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '|') {
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('||=', '||=', 'OP');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('||', '||', 'OP');
          next;
        }
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('|=', '|=', 'OP');
        next;
      } elsif ($c2 eq '.') {
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('|.=', '|.=', 'OP');
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('|.', '|.', 'OP');
        }
        $c->add_perl('5.022', '|.');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '^') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('^=', '^=', 'OP');
        next;
      } elsif ($c2 eq '.') {
        if (substr($$rstr, $pos + 2, 1) eq '=') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('^.=', '^.=', 'OP');
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('^.', '^.', 'OP');
        }
        $c->add_perl('5.022', '^.');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '!') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '~') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('!~', '!~', 'OP');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '~') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '~') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('~~', '~~', 'OP');
        $c->add_perl('5.010', '~~');
        next;
      } elsif ($c2 eq '.') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('~.', '~.', 'OP');
        $c->add_perl('5.022', '~.');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq ',') {
      pos($$rstr) = $pos + 1;
      ($token, $token_desc, $token_type) = ($c1, 'COMMA', 'OP');
      next;
    } elsif ($c1 eq '?') {
      pos($$rstr) = $pos + 1;
      ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
      next;
    } elsif ($c1 eq '.') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq '.') {
        if (substr($$rstr, $pos + 2, 1) eq '.') {
          pos($$rstr) = $pos + 3;
          ($token, $token_desc, $token_type) = ('...', '...', 'OP');
          $c->add_perl('5.012', '...');
          next;
        } else {
          pos($$rstr) = $pos + 2;
          ($token, $token_desc, $token_type) = ('..', '..', 'OP');
          next;
        }
      } elsif ($c2 eq '=') {
        pos($$rstr) = $pos + 2;
        ($token, $token_desc, $token_type) = ('.=', '.=', 'OP');
        next;
      } else {
        pos($$rstr) = $pos + 1;
        ($token, $token_desc, $token_type) = ($c1, $c1, 'OP');
        next;
      }
    } elsif ($c1 eq '0') {
      my $c2 = substr($$rstr, $pos + 1, 1);
      if ($c2 eq 'x') {
        if ($$rstr =~ m{\G(0x[0-9A-Fa-f_]+)}gc) {
          ($token, $token_desc, $token_type) = ($1, 'HEX NUMBER', 'EXPR');
          next;
        }
      } elsif ($c2 eq 'b') {
        if ($$rstr =~ m{\G(0b[01_]+)}gc) {
          ($token, $token_desc, $token_type) = ($1, 'BINARY NUMBER', 'EXPR');
          next;
        }
      }
    }

    if ($$rstr =~ m{\G((?:0|[1-9][0-9_]*)(?:\.[0-9][0-9_]*)?)}gc) {
      my $number = $1;
      my $p = pos($$rstr);
      my $n1 = substr($$rstr, $p, 1);
      if ($n1 eq '.') {
        if ($$rstr =~ m{\G((?:\.[0-9_])+)}gc) {
          $number .= $1;
          ($token, $token_desc, $token_type) = ($number, 'VERSION_STRING', 'EXPR');
          next;
        } elsif (substr($$rstr, $p, 2) ne '..') {
          $number .= '.';
          pos($$rstr) = $p + 1;
        }
      } elsif ($n1 eq 'E' or $n1 eq 'e') {
        if ($$rstr =~ m{\G([Ee][+-]?[0-9]+)}gc) {
          $number .= $1;
        }
      }
      ($token, $token_desc, $token_type) = ($number, 'NUMBER', 'EXPR');
      if ($prepend) {
        $token = "$prepend$token";
        pop @tokens if @tokens and $tokens[-1][0] eq $prepend;
        pop @scope_tokens if @scope_tokens and $scope_tokens[-1][0] eq $prepend;
      }
      next;
    }

    if ($prev_token_type ne 'ARROW' and ($prev_token_type ne 'KEYWORD' or !$c->token_expects_word($prev_token))) {
      if ($prev_token_type eq 'EXPR' or $prev_token_type eq 'VARIABLE') {
        if ($c1 eq 'x') {
          if ($$rstr =~ m{\G(x\b(?!\s*=>))}gc){
            ($token, $token_desc, $token_type) = ($1, $1, '');
            next;
          }
        }
      }

      if ($c1 eq 'q') {
        my $quotelike_re = $c->quotelike_re;
        if ($$rstr =~ m{\G((?:$quotelike_re)\b(?!\s*=>))}gc) {
          if (my $quotelike = $self->_match_quotelike($c, $rstr, $1)) {
            ($token, $token_desc, $token_type) = ($quotelike, 'STRING', 'STRING');
            next;
          } else {
            _debug("QUOTELIKE ERROR: $@") if DEBUG;
            pos($$rstr) = $pos;
          }
        } elsif ($$rstr =~ m{\G((?:qw)\b(?!\s*=>))}gc) {
          if (my $quotelike = $self->_match_quotelike($c, $rstr, $1)) {
            ($token, $token_desc, $token_type) = ($quotelike, 'QUOTED_WORD_LIST', 'EXPR');
            next;
          } else {
            _debug("QUOTELIKE ERROR: $@") if DEBUG;
            pos($$rstr) = $pos;
          }
        } elsif ($$rstr =~ m{\G((?:qx)\b(?!\s*=>))}gc) {
          if (my $quotelike = $self->_match_quotelike($c, $rstr, $1)) {
            ($token, $token_desc, $token_type) = ($quotelike, 'BACKTICK', 'EXPR');
            next;
          } else {
            _debug("QUOTELIKE ERROR: $@") if DEBUG;
            pos($$rstr) = $pos;
          }
        } elsif ($$rstr =~ m{\G(qr\b(?!\s*=>))}gc) {
          if (my $regexp = $self->_match_regexp($c, $rstr)) {
            ($token, $token_desc, $token_type) = ($regexp, 'qr', 'EXPR');
            next;
          } else {
            _debug("QUOTELIKE ERROR: $@") if DEBUG;
            pos($$rstr) = $pos;
          }
        }
      } elsif ($c1 eq 'm') {
        if ($$rstr =~ m{\G(m\b(?!\s*=>))}gc) {
          if (my $regexp = $self->_match_regexp($c, $rstr)) {
            ($token, $token_desc, $token_type) = ($regexp, 'm', 'EXPR');
            next;
          } else {
            _debug("REGEXP ERROR: $@") if DEBUG;
            pos($$rstr) = $pos;
          }
        }
      } elsif ($c1 eq 's') {
        if ($$rstr =~ m{\G(s\b(?!\s*=>))}gc) {
          if (my $regexp = $self->_match_substitute($c, $rstr)) {
            ($token, $token_desc, $token_type) = ($regexp, 's', 'EXPR');
            next;
          } else {
            _debug("SUBSTITUTE ERROR: $@") if DEBUG;
            pos($$rstr) = $pos;
          }
        }
      } elsif ($c1 eq 't') {
        if ($$rstr =~ m{\G(tr\b(?!\s*=>))}gc) {
          if (my $trans = $self->_match_transliterate($c, $rstr)) {
            ($token, $token_desc, $token_type) = ($trans, 'tr', 'EXPR');
            next;
          } else {
            _debug("TRANSLITERATE ERROR: $@") if DEBUG;
            pos($$rstr) = $pos;
          }
        }
      } elsif ($c1 eq 'y') {
        if ($$rstr =~ m{\G(y\b(?!\s*=>))}gc) {
          if (my $trans = $self->_match_transliterate($c, $rstr)) {
            ($token, $token_desc, $token_type) = ($trans, 'y', 'EXPR');
            next;
          } else {
            _debug("TRANSLITERATE ERROR: $@") if DEBUG;
            pos($$rstr) = $pos;
          }
        }
      }
    }

    if ($$rstr =~ m{\G(\w+)}gc) {
      $token = $1;
      if ($prev_token_type eq 'ARROW') {
        $$rstr =~ m{\G((?:(?:::|')\w+)+)\b}gc and $token .= $1;
        ($token_desc, $token_type) = ('METHOD', 'METHOD');
      } elsif ($token eq 'CORE') {
        ($token_desc, $token_type) = ('NAMESPACE', 'WORD');
      } elsif ($token eq 'format') {
        if ($$rstr =~ m{\G([^=]*?=[ \t]*\n.*?\n\.\n)}gcs) {
          $token .= $1;
          ($token_desc, $token_type) = ('FORMAT', '');
          $current_scope |= F_STATEMENT_END|F_EXPR_END;
          next;
        }
      } elsif ($c->token_is_keyword($token) and ($prev_token_type ne 'KEYWORD' or !$c->token_expects_word($prev_token) or ($prev_token eq 'sub' and $token eq 'BEGIN'))) {
        if ($c->token_is_op_keyword($token)) {
          ($token_desc, $token_type) = ($token, 'OP');
        } else {
          ($token_desc, $token_type) = ('KEYWORD', 'KEYWORD');
          $c->check_new_keyword($token);
          push @keywords, $token unless $token eq 'undef';
        }
      } else {
        if ($c1 eq 'v' and $token =~ /^v(?:0|[1-9][0-9]*)$/) {
          if ($$rstr =~ m{\G((?:\.[0-9][0-9_]*)+)}gc) {
            $token .= $1;
            ($token_desc, $token_type) = ('VERSION_STRING', 'EXPR');
            next;
          }
        }
        $$rstr =~ m{\G((?:(?:::|')\w+)+)\b}gc and $token .= $1;
        ($token_desc, $token_type) = ('WORD', 'WORD');
        if ($prepend) {
          $token = "$prepend$token";
          pop @tokens if @tokens and $tokens[-1][0] eq $prepend;
          pop @scope_tokens if @scope_tokens and $scope_tokens[-1][0] eq $prepend;
        }
      }
      next;
    }

    # ignore control characters
    if ($$rstr =~ m{\G([[:cntrl:]]+)}gc) {
      next;
    }

    if ($$rstr =~ m{\G([[:ascii:]]+)}gc) {
      last if $parent_scope & F_STRING_EVAL;
      _error("UNKNOWN: $1");
      push @{$c->{errors}}, qq{"$1"};
      $token = $1;
      next;
    }
    if ($$rstr =~ m{\G([[:^ascii:]](?:[[:^ascii:]]|\w)*)}gc) {
      if (!$c->{utf8}) {
        last if $parent_scope & F_STRING_EVAL;
        _error("UNICODE?: $1");
        push @{$c->{errors}}, qq{"$1"};
      } else {
        _debug("UTF8: $1") if DEBUG;
      }
      $token = $1;
      next;
    }
    if ($$rstr =~ m{\G(\S+)}gc) {
      last if $parent_scope & F_STRING_EVAL;
      _error("UNEXPECTED: $1");
      push @{$c->{errors}}, qq{"$1"};
      $token = $1;
    }

    last;
  } continue {
    die "Aborted at $prev_pos" if $prev_pos == pos($$rstr);
    $prev_pos = pos($$rstr);

    if (defined $token) {
      if (!($current_scope & F_EXPR)) {
        _debug('BEGIN EXPR') if DEBUG;
        $current_scope |= F_EXPR;
      } elsif (($current_scope & F_EXPR) and (($current_scope & F_EXPR_END) or ($ends_expr{$token} and $token_type eq 'KEYWORD' and $prev_token ne ',' and $prev_token ne '=>'))) {
        @keywords = ();
        _debug('END EXPR') if DEBUG;
        $current_scope &= MASK_EXPR_END;
      }
      $prepend = undef;

      if (DEBUG) {
        my $token_str = ref $token ? Data::Dump::dump($token) : $token;
        _debug("GOT: $token_str ($pos) TYPE: $token_desc ($token_type)".($prev_token_type ? " PREV: $prev_token_type" : '').(@keywords ? " KEYWORD: @keywords" : '').(($current_scope | $parent_scope) & F_EVAL ? ' EVAL' : '').(($current_scope | $parent_scope) & F_KEEP_TOKENS ? ' KEEP' : ''));
      }

      if ($parent_scope & F_KEEP_TOKENS) {
        push @scope_tokens, [$token, $token_desc];
        if ($token eq '-' or $token eq '+') {
          $prepend = $token;
        }
      }
      if (!($current_scope & F_KEEP_TOKENS) and (exists $c->{callback}{$token} or exists $c->{keyword}{$token} or exists $c->{sub}{$token}) and $token_type ne 'METHOD' and !$c->token_expects_word($prev_token)) {
        $current_scope |= F_KEEP_TOKENS;
      }
      if ($c->token_expects_block($token)) {
        $waiting_for_a_block = 1;
      }
      if ($current_scope & F_EVAL or ($parent_scope & F_EVAL and (!@{$c->{stack}} or $c->{stack}[-1][0] ne '{'))) {
        if ($token_type eq 'STRING') {
          if ($token->[0] =~ /\b(?:(?:use|no)\s+[A-Za-z]|require\s+(?:q[qw]?.|['"])?[A-Za-z])/) {
            my $eval_string = $token->[0];
            if (defined $eval_string and $eval_string ne '') {
              $eval_string =~ s/\\(.)/$1/g;
              pos($eval_string) = 0;
              $c->{eval} = 1;
              my $saved_stack = $c->{stack};
              $c->{stack} = [];
              eval { $self->_scan($c, \$eval_string, (
                ($current_scope | $parent_scope | F_STRING_EVAL) &
                F_RESCAN
              ))};
              $c->{stack} = $saved_stack;
            }
          }
          $current_scope &= MASK_EVAL;
        } elsif ($token_desc eq 'HEREDOC') {
          if ($token->[0] =~ /\b(?:use|require|no)\s+[A-Za-z]/) {
            my $eval_string = $token->[0];
            if (defined $eval_string and $eval_string ne '') {
              $eval_string =~ s/\\(.)/$1/g;
              pos($eval_string) = 0;
              $c->{eval} = 1;
              my $saved_stack = $c->{stack};
              $c->{stack} = [];
              eval { $self->_scan($c, \$eval_string, (
                ($current_scope | $parent_scope | F_STRING_EVAL) &
                F_RESCAN
              ))};
              $c->{stack} = $saved_stack;
            }
          }
          $current_scope &= MASK_EVAL;
        } elsif ($token_type eq 'VARIABLE') {
          $current_scope &= MASK_EVAL;
        }
        $c->{eval} = ($current_scope | $parent_scope) & F_EVAL ? 1 : 0;
      }
      if ($token eq 'eval') {
        $current_scope |= F_EVAL;
        $c->{eval} = 1;
      }

      if ($current_scope & F_KEEP_TOKENS) {
        push @tokens, [$token, $token_desc];
        if ($token eq '-' or $token eq '+') {
          $prepend = $token;
        }
        if ($token_type eq 'KEYWORD' and $has_sideff{$token}) {
          $current_scope |= F_SIDEFF;
        }
      }
      if ($stack) {
        push @{$c->{stack}}, $stack;
        _dump_stack($c, $stack->[0]) if DEBUG;
        my $child_scope = $current_scope | $parent_scope;
        if ($token eq '{' and $is_conditional{$stack->[2]}) {
          $child_scope |= F_CONDITIONAL
        }
        my $scanned_tokens = $self->_scan($c, $rstr, (
          $child_scope & F_RESCAN
        ));
        if ($token eq '{' and $current_scope & F_EVAL) {
          $current_scope &= MASK_EVAL;
          $c->{eval} = ($current_scope | $parent_scope) & F_EVAL ? 1 : 0;
        }
        if ($current_scope & F_KEEP_TOKENS) {
          my $start = pop @tokens || '';
          my $end = pop @$scanned_tokens || '';
          push @tokens, [$scanned_tokens, "$start->[0]$end->[0]"];
        } elsif ($parent_scope & F_KEEP_TOKENS) {
          my $start = pop @scope_tokens || '';
          my $end = pop @$scanned_tokens || '';
          push @scope_tokens, [$scanned_tokens, "$start->[0]$end->[0]"];
        }

        if ($stack->[0] eq '(' and $prev_token_type eq 'KEYWORD' and @keywords and $keywords[-1] eq $prev_token and !$c->token_expects_expr_block($prev_token)) {
          pop @keywords;
        }

        if ($stack->[0] eq '{' and @keywords and $c->token_expects_block($keywords[0]) and !$c->token_expects_block_list($keywords[-1])) {
          $current_scope |= F_STATEMENT_END unless @tokens and ($c->token_defines_sub($keywords[-1]) or $keywords[-1] eq 'eval');
        }
        $stack = undef;
      }
      if ($current_scope & F_STATEMENT_END) {
        if (($current_scope & F_KEEP_TOKENS) and @tokens) {
          my $first_token = $tokens[0][0];
          if ($first_token eq '->') {
            $first_token = $tokens[1][0];
            # ignore ->use and ->no
            # ->require may be from UNIVERSAL::require
            if ($first_token eq 'use' or $first_token eq 'no') {
              $first_token = '';
            }
          }
          my $cond = (($current_scope | $parent_scope) & (F_CONDITIONAL|F_SIDEFF)) ? 1 : 0;
          if (exists $c->{callback}{$first_token}) {
            $c->{current_scope} = \$current_scope;
            $c->{cond} = $cond;
            $c->{callback}{$first_token}->($c, $rstr, \@tokens);

            if ($c->{found_unsupported_package} and !$c->{quick}) {
              my $unsupported = $c->{found_unsupported_package};
              $c->{quick} = 1;
              $self->_skim_string($c, $rstr);
              warn "Unsupported package '$unsupported' is found. Result may be incorrect.\n";
            }
          }
          if (exists $c->{keyword}{$first_token}) {
            $c->{current_scope} = \$current_scope;
            $c->{cond} = $cond;
            $tokens[0][1] = 'KEYWORD';
            $c->run_callback_for(keyword => $first_token, \@tokens);
          }
          if (exists $c->{method}{$first_token} and $caller_package) {
            unshift @tokens, [$caller_package, 'WORD'];
            $c->{current_scope} = \$current_scope;
            $c->{cond} = $cond;
            $c->run_callback_for(method => $first_token, \@tokens);
          }
          if ($current_scope & F_SIDEFF) {
            $current_scope &= MASK_SIDEFF;
            while(my $token = shift @tokens) {
              last if $has_sideff{$token->[0]};
            }
            $current_scope &= F_SIDEFF if grep {$has_sideff{$_->[0]}} @tokens;
            if (@tokens) {
              $first_token = $tokens[0][0];
              $cond = (($current_scope | $parent_scope) & (F_CONDITIONAL|F_SIDEFF)) ? 1 : 0;
              if (exists $c->{callback}{$first_token}) {
                $c->{current_scope} = \$current_scope;
                $c->{cond} = $cond;
                $c->{callback}{$first_token}->($c, $rstr, \@tokens);
              }
              if (exists $c->{keyword}{$first_token}) {
                $c->{current_scope} = \$current_scope;
                $c->{cond} = $cond;
                $tokens[0][1] = 'KEYWORD';
                $c->run_callback_for(keyword => $first_token, \@tokens);
              }
              if (exists $c->{method}{$first_token} and $caller_package) {
                unshift @tokens, [$caller_package, 'WORD'];
                $c->{current_scope} = \$current_scope;
                $c->{cond} = $cond;
                $c->run_callback_for(method => $first_token, \@tokens);
              }
            }
          }
        }
        @tokens = ();
        @keywords = ();
        $current_scope &= MASK_STATEMENT_END;
        $caller_package = undef;
        $token = $token_type = '';
        _debug('END SENTENSE') if DEBUG;
      }
      if ($unstack and @{$c->{stack}}) {
        my $stacked = pop @{$c->{stack}};
        my $stacked_type = substr($stacked->[0], -1);
        if (
          ($unstack eq '}' and $stacked_type ne '{') or
          ($unstack eq ']' and $stacked_type ne '[') or
          ($unstack eq ')' and $stacked_type ne '(')
        ) {
          my $prev_pos = $stacked->[1] || 0;
          die "mismatch $stacked_type $unstack\n" .
              substr($$rstr, $prev_pos, pos($$rstr) - $prev_pos);
        }
        _dump_stack($c, $unstack) if DEBUG;
        $current_scope |= F_SCOPE_END;
        $unstack = undef;
      }

      last if $current_scope & F_SCOPE_END;
      last if $c->{ended};
      last if $c->{last_found_by_skimming} and $c->{last_found_by_skimming} < pos($$rstr);

      ($prev_token, $prev_token_type) = ($token, $token_type);
    }

    if (@{$c->{errors}} and !($parent_scope & F_STRING_EVAL)) {
      my $rest = substr($$rstr, pos($$rstr));
      _error("REST:\n\n".$rest) if $rest;
      last;
    }
  }

  if (@tokens) {
    if (my $first_token = $tokens[0][0]) {
      if (exists $c->{callback}{$first_token}) {
        $c->{callback}{$first_token}->($c, $rstr, \@tokens);
      }
      if (exists $c->{keyword}{$first_token}) {
        $tokens[0][1] = 'KEYWORD';
        $c->run_callback_for(keyword => $first_token, \@tokens);
      }
    }
  }

  _dump_stack($c, "END SCOPE") if DEBUG;

  \@scope_tokens;
}

sub _match_quotelike {
  my ($self, $c, $rstr, $op) = @_;

  # '#' only works when it comes just after the op,
  # without prepending spaces
  $$rstr =~ m/\G(?:\s(?:$re_comment))?\s*/gcs;

  unless ($$rstr =~ m/\G(\S)/gc) {
    return _match_error($rstr, "No block delimiter found after $op");
  }
  my $ldel = $1;
  my $startpos = pos($$rstr);

  if ($ldel =~ /[[(<{]/) {
    my ($rdel, $re_skip) = _gen_rdel_and_re_skip($ldel);
    my @nest = ($ldel);
    my ($p, $c1);
    while(defined($p = pos($$rstr))) {
      $c1 = substr($$rstr, $p, 1);
      if ($c1 eq '\\') {
        pos($$rstr) = $p + 2;
        next;
      }
      if ($c1 eq $ldel) {
        pos($$rstr) = $p + 1;
        push @nest, $ldel;
        next;
      }
      if ($c1 eq $rdel) {
        pos($$rstr) = $p + 1;
        pop @nest;
        last unless @nest;
        next;
      }
      $$rstr =~ m{\G$re_skip}gc and next;
      last;
    }
    return if @nest;
  } else {
    my $re = _gen_re_str_in_delims_with_end_delim($ldel);
    $$rstr =~ /\G$re/gcs or return;
  }

  my $endpos = pos($$rstr);

  return [substr($$rstr, $startpos, $endpos - $startpos - 1), $op];
}

sub _match_regexp0 { # //
  my ($self, $c, $rstr, $startpos, $token_type) = @_;
  pos($$rstr) = $startpos + 1;

  my $re_shortcut = _gen_re_regexp_shortcut('/');
  $$rstr =~ m{\G$re_shortcut}gcs or  # shortcut
  defined($self->_scan_re($c, $rstr, '/', '/', $token_type ? 'm' : '')) or return _match_error($rstr, "Closing delimiter was not found: $@");

  $$rstr =~ m/\G([msixpodualgc]*)/gc;
  my $mod = $1;

  my $endpos = pos($$rstr);

  my $re = substr($$rstr, $startpos, $endpos - $startpos);
  if ($re =~ /\n/s and $mod !~ /x/) {
    return _match_error($rstr, "multiline without x");
  }
  return $re;
}

sub _match_regexp {
  my ($self, $c, $rstr) = @_;
  my $startpos = pos($$rstr) || 0;

  # '#' only works when it comes just after the op,
  # without prepending spaces
  $$rstr =~ m/\G(?:\s(?:$re_comment))?\s*/gcs;

  unless ($$rstr =~ m/\G(\S)/gc) {
    return _match_error($rstr, "No block delimiter found");
  }
  my ($ldel, $rdel) = ($1, $1);

  if ($ldel =~ /[[(<{]/) {
    $rdel =~ tr/[({</])}>/;
  }

  my $re_shortcut = _gen_re_regexp_shortcut($ldel, $rdel);
  $$rstr =~ m{\G$re_shortcut}gcs or  # shortcut
  defined($self->_scan_re($c, $rstr, $ldel, $rdel, 'm/qr')) or return _match_error($rstr, "Closing delimiter was not found: $@");

  # strictly speaking, qr// doesn't support gc.
  $$rstr =~ m/\G[msixpodualgc]*/gc;
  my $endpos = pos($$rstr);

  return substr($$rstr, $startpos, $endpos - $startpos);
}

sub _match_substitute {
  my ($self, $c, $rstr) = @_;
  my $startpos = pos($$rstr) || 0;

  # '#' only works when it comes just after the op,
  # without prepending spaces
  $$rstr =~ m/\G(?:\s(?:$re_comment))?\s*/gcs;

  unless ($$rstr =~ m/\G(\S)/gc) {
    return _match_error($rstr, "No block delimiter found");
  }
  my ($ldel1, $rdel1) = ($1, $1);

  if ($ldel1 =~ /[[(<{]/) {
    $rdel1 =~ tr/[({</])}>/;
  }

  my $re_shortcut = _gen_re_regexp_shortcut($ldel1, $rdel1);
  ($ldel1 ne '\\' and $$rstr =~ m{\G$re_shortcut}gcs) or  # shortcut
  defined($self->_scan_re($c, $rstr, $ldel1, $rdel1, 's')) or return _match_error($rstr, "Closing delimiter was not found: $@");
  defined($self->_scan_re2($c, $rstr, $ldel1, 's')) or return;
  $$rstr =~ m/\G[msixpodualgcer]*/gc;
  my $endpos = pos($$rstr);

  return substr($$rstr, $startpos, $endpos - $startpos);
}

sub _match_transliterate {
  my ($self, $c, $rstr) = @_;
  my $startpos = pos($$rstr) || 0;

  # '#' only works when it comes just after the op,
  # without prepending spaces
  $$rstr =~ m/\G(?:\s(?:$re_comment))?\s*/gcs;

  unless ($$rstr =~ m/\G(\S)/gc) {
    return _match_error($rstr, "No block delimiter found");
  }
  my $ldel1 = $1;
  my $ldel2;

  if ($ldel1 =~ /[[(<{]/) {
    (my $rdel1 = $ldel1) =~ tr/[({</])}>/;
    my $re = _gen_re_str_in_delims_with_end_delim($rdel1);
    $$rstr =~ /\G$re/gcs or return;
    $$rstr =~ /\G(?:$re_comment)/gcs;
    unless ($$rstr =~ /\G\s*(\S)/gc) {
      return _match_error($rstr, "Missing second block");
    }
    $ldel2 = $1;
  } else {
    my $re = _gen_re_str_in_delims_with_end_delim($ldel1);
    $$rstr =~ /\G$re/gcs or return;
    $ldel2 = $ldel1;
  }

  if ($ldel2 =~ /[[(<{]/) {
    (my $rdel2 = $ldel2) =~ tr/[({</])}>/;
    my $re = _gen_re_str_in_delims_with_end_delim($rdel2);
    $$rstr =~ /\G$re/gcs or return;
  } else {
    my $re = _gen_re_str_in_delims_with_end_delim($ldel2);
    $$rstr =~ /\G$re/gcs or return;
  }

  $$rstr =~ m/\G[cdsr]*/gc;
  my $endpos = pos($$rstr);

  return substr($$rstr, $startpos, $endpos - $startpos);
}

sub _match_heredoc {
  my ($self, $c, $rstr) = @_;

  my $startpos = pos($$rstr) || 0;

  $$rstr =~ m{\G(?:<<(~)?\s*)}gc;
  my $indent = $1 ? "\\s*" : "";

  my $label;
  if ($$rstr =~ m{\G\\?([A-Za-z_]\w*)}gc) {
    $label = $1;
  } elsif ($$rstr =~ m{
      \G ' ($re_str_in_single_quotes) '
    | \G " ($re_str_in_double_quotes) "
    | \G ` ($re_str_in_backticks) `
  }gcsx) {
    $label = $+;
  } else {
    return;
  }
  $label =~ s/\\(.)/$1/g;
  my $extrapos = pos($$rstr);
  $$rstr =~ m{\G.*\n}gc;
  my $str1pos = pos($$rstr)--;
  unless ($$rstr =~ m{\G.*?\n$indent(?=\Q$label\E\n)}gcs) {
    return _match_error($rstr, qq{Missing here doc terminator ('$label')});
  }
  my $ldpos = pos($$rstr);
  $$rstr =~ m{\G\Q$label\E\n}gc;
  my $ld2pos = pos($$rstr);

  my $heredoc = [
    substr($$rstr, $str1pos, $ldpos-$str1pos),
    substr($$rstr, $startpos, $extrapos-$startpos),
    substr($$rstr, $ldpos, $ld2pos-$ldpos),
  ];
  substr($$rstr, $str1pos, $ld2pos - $str1pos) = '';
  pos($$rstr) = $extrapos;
  if ($indent) {
    $c->add_perl('5.026', '<<~');
  }
  return $heredoc;
}

sub _scan_re {
  my ($self, $c, $rstr, $ldel, $rdel, $op) = @_;
  my $startpos = pos($$rstr) || 0;

  _debug(" L $ldel R $rdel") if DEBUG_RE;

  my ($outer_opening_delimiter, $outer_closing_delimiter);
  if (@{$c->{stack}}) {
    ($outer_closing_delimiter = $outer_opening_delimiter = $c->{stack}[-1][0]) =~ tr/[({</])}>/;
  }

  my @nesting = ($ldel);
  my $multiline = 0;
  my $saw_sharp = 0;
  my $prev;
  my ($p, $c1);
  while (defined($p = pos($$rstr))) {
    $c1 = substr($$rstr, $p, 1);
    if ($c1 eq "\n") {
      $$rstr =~ m{\G\n\s*}gcs;
      $multiline = 1;
      $saw_sharp = 0;
      # _debug("CRLF") if DEBUG_RE;
      next;
    }
    if ($c1 eq ' ' or $c1 eq "\t") {
      $$rstr =~ m{\G\s*}gc;
      # _debug("WHITESPACE") if DEBUG_RE;
      next;
    }
    if ($c1 eq '#' and $rdel ne '#') {
      if ($multiline and $$rstr =~ m{\G(#[^\Q$rdel\E]*?)\n}gcs) {
        _debug(" comment $1") if DEBUG_RE
      } else {
        pos($$rstr) = $p + 1;
        $saw_sharp = 1;
        _debug(" saw #") if DEBUG_RE;
      }
      next;
    }

    if ($c1 eq '\\' and $rdel ne '\\') {
      if ($$rstr =~ m/\G(\\.)/gcs) {
        _debug(" escaped $1") if DEBUG_RE;
        next;
      }
    }

    _debug(" looking @nesting: $c1") if DEBUG_RE;

    if ($c1 eq '[') {
      # character class may have other (ignorable) delimiters
      if ($$rstr =~ m/\G(\[\[:\w+?:\]\])/gcs) {
        _debug(" character class $1") if DEBUG_RE;
        next;
      }
      if ($$rstr =~ m/\G(\[[^\\\]]]*?(\\.[^\\\]]]*)*\])/gcs) {
        _debug(" character class: $1") if DEBUG_RE;
        next;
      }
    }

    if ($c1 eq $rdel) {
      pos($$rstr) = $p + 1;
      if ($saw_sharp) {
        my $tmp_pos = $p + 1;
        if ($op eq 's') {
          _debug(" looking for latter part") if DEBUG_RE;
          my $latter = $self->_scan_re2($c, $rstr, $ldel, $op);
          if (!defined $latter) {
            pos($$rstr) = $tmp_pos;
            next;
          }
          _debug(" latter: $latter") if DEBUG_RE;
        }
        if ($$rstr =~ m/\G[a-wyz]*x/) {
          # looks like an end of block
          _debug(" end of block $rdel (after #)") if DEBUG_RE;
          @nesting = ();
          pos($$rstr) = $tmp_pos;
          last;
        }
        pos($$rstr) = $tmp_pos;
        if ($multiline) {
          next; # part of a comment
        }
      }
      _debug(" end of block $rdel") if DEBUG_RE;
      my $expected = $rdel;
      if ($ldel ne $rdel) {
        $expected =~ tr/)}]>/({[</;
      }
      while(my $nested = pop @nesting) {
        last if $nested eq $expected;
      }
      last unless @nesting;
      next;
    } elsif ($c1 eq $ldel) {
      pos($$rstr) = $p + 1;
      if ($multiline and $saw_sharp) {
      } else {
        _debug(" block $ldel") if DEBUG_RE;
        push @nesting, $ldel;
        next;
      }
    }

    if ($c1 eq '{') {
      # quantifier shouldn't be nested
      if ($$rstr =~ m/\G(\{[0-9]+(?:,(?:[0-9]+)?)?})/gcs) {
        _debug(" quantifier $1") if DEBUG_RE;
        next;
      }
    }

    if ($c1 eq '(') {
      my $c2 = substr($$rstr, $p + 1, 1);
      if ($c2 eq '?' and !($multiline and $saw_sharp)) {
        # code
        if ($$rstr =~ m/\G((\()\?+?)(?=\{)/gc) {
          _debug(" code $1") if DEBUG_RE;
          push @nesting, $2;
          unless (eval { $self->_scan($c, $rstr, F_EXPECTS_BRACKET); 1 }) {
            _debug("scan failed") if DEBUG_RE;
            return;
          }
          next;
        }
        # comment
        if ($$rstr =~ m{\G(\(\?\#[^\\\)]*(?:\\.[^\\\)]*)*\))}gcs) {
          _debug(" comment $1") if DEBUG_RE;
          next;
        }
      }

      # grouping may have (ignorable) <>
      if ($$rstr =~ m/\G((\()(?:<[!=]|<\w+?>|>)?)/gc) {
        _debug(" group $1") if DEBUG_RE;
        push @nesting, $2;
        next;
      }
    }

    # maybe variables (maybe not)
    if ($c1 eq '$' and substr($$rstr, $p + 1, 1) eq '{') {
      my @tmp_stack = @{$c->{stack}};
      next if eval { $self->_scan($c, $rstr, F_EXPECTS_BRACKET); 1 };
      pos($$rstr) = $p;
      $c->{stack} = \@tmp_stack;
    }

    if ($c1 eq ')') {
      if (@nesting and $nesting[-1] eq '(') {
        _debug(" end of group $c1") if DEBUG_RE;
        pop @nesting;
        pos($$rstr) = $p + 1;
        next;
      } else {
        # die "unnested @nesting" unless $saw_sharp;
      }
    }

    # for //, see if an outer closing delimiter is found first (ie. see if it was actually a /)
    if (!$op) {
      if ($outer_opening_delimiter and $c1 eq $outer_opening_delimiter) {
        push @nesting, $c1;
        pos($$rstr) = $p + 1;
        next;
      }

      if ($outer_closing_delimiter and $c1 eq $outer_closing_delimiter) {
        if (@nesting and $nesting[-1] eq $outer_opening_delimiter) {
          pop @nesting;
          pos($$rstr) = $p + 1;
          next;
        }

        return _match_error($rstr, "Outer closing delimiter: $outer_closing_delimiter is found");
      }
    }

    if ($$rstr =~ m/\G(\w+|.)/gcs) {
      _debug(" rest $1") if DEBUG_RE;
      next;
    }
    last;
  }
  if ($#nesting>=0) {
    return _match_error($rstr, "Unmatched opening bracket(s): ". join("..",@nesting)."..");
  }

  my $endpos = pos($$rstr);

  return substr($$rstr, $startpos, $endpos - $startpos);
}


sub _scan_re2 {
  my ($self, $c, $rstr, $ldel, $op) = @_;
  my $startpos = pos($$rstr);

  if ($ldel =~ /[[(<{]/) {
    $$rstr =~ /\G(?:$re_comment)/gcs;

    unless ($$rstr =~ /\G\s*(\S)/gc) {
      return _match_error($rstr, "Missing second block for quotelike $op");
    }
    $ldel = $1;
  }

  if ($ldel =~ /[[(<{]/) {
    my ($rdel, $re_skip) = _gen_rdel_and_re_skip($ldel);
    my @nest = $ldel;
    my ($p, $c1);
    while(defined($p = pos($$rstr))) {
      $c1 = substr($$rstr, $p, 1);
      if ($c1 eq '\\') {
        pos($$rstr) = $p + 2;
        next;
      }
      if ($c1 eq $ldel) {
        pos($$rstr) = $p + 1;
        push @nest, $ldel;
        next;
      }
      if ($c1 eq $rdel) {
        pos($$rstr) = $p + 1;
        pop @nest;
        last unless @nest;
        next;
      }
      $$rstr =~ m{\G$re_skip}gc and next;
      last;
    }
    return _match_error($rstr, "nesting mismatch: @nest") if @nest;
  } else {
    my $re = _gen_re_str_in_delims_with_end_delim($ldel);
    $$rstr =~ /\G$re/gcs or return;
  }

  my $endpos = pos($$rstr);

  return substr($$rstr, $startpos, $endpos - $startpos);
}

sub _use {
  my ($c, $rstr, $tokens) = @_;
_debug("USE TOKENS: ".(Data::Dump::dump($tokens))) if DEBUG;
  shift @$tokens; # discard 'use' itself

  # TODO: see if the token is WORD or not?
  my $name_token = shift @$tokens or return;
  my $name = $name_token->[0];
  return if !defined $name or ref $name or $name eq '';

  my $c1 = substr($name, 0, 1);
  if ($c1 eq '5') {
    $c->add(perl => $name);
    return;
  }
  if ($c1 eq 'v') {
    my $c2 = substr($name, 1, 1);
    if ($c2 eq '5') {
      $c->add(perl => $name);
      return;
    }
    if ($c2 eq '6') {
      $c->{perl6} = 1;
      $c->{ended} = 1;
      return;
    }
  }
  if ($c->enables_utf8($name)) {
    $c->add($name => 0);
    $c->{utf8} = 1;
    if (!$c->{decoded}) {
      $c->{decoded} = 1;
      _debug("UTF8 IS ON") if DEBUG;
      utf8::decode($$rstr);
      pos($$rstr) = 0;
      $c->{ended} = $c->{redo} = 1;
    }
  }

  if (is_module_name($name)) {
    my $maybe_version_token = $tokens->[0];
    my $maybe_version_token_desc = $maybe_version_token->[1];
    if ($maybe_version_token_desc and ($maybe_version_token_desc eq 'NUMBER' or $maybe_version_token_desc eq 'VERSION_STRING')) {
      $c->add($name => $maybe_version_token->[0]);
      shift @$tokens;
    } else {
      $c->add($name => 0);
    }

    if (exists $sub_keywords{$name}) {
      $c->register_sub_keywords(@{$sub_keywords{$name}});
      $c->prototype_re(qr{\G(\((?:[^\\\(\)]*(?:\\.[^\\\(\)]*)*)\))});
    }
    if (exists $filter_modules{$name}) {
      my $tmp = pos($$rstr);
      my $redo = $filter_modules{$name}->($rstr);
      pos($$rstr) = $tmp;
      $c->{ended} = $c->{redo} = 1 if $redo;
    }
  }

  if ($c->has_callback_for(use => $name)) {
    eval { $c->run_callback_for(use => $name, $tokens) };
    warn "Callback Error: $@" if $@;
  } elsif ($name =~ /\b(?:Mo[ou]se?X?|MooX?|Elk|Antlers|Role)\b/) {
    my $module = $name =~ /Role/ ? 'Moose::Role' : 'Moose';
    if ($c->has_callback_for(use => $module)) {
      eval { $c->run_callback_for(use => $module, $tokens) };
      warn "Callback Error: $@" if $@;
    }
  }

  if (exists $unsupported_packages{$name}) {
    $c->{found_unsupported_package} = $name;
  }
}

sub _require {
  my ($c, $rstr, $tokens) = @_;
_debug("REQUIRE TOKENS: ".(Data::Dump::dump($tokens))) if DEBUG;
  shift @$tokens; # discard 'require' itself

  # TODO: see if the token is WORD or not?
  my $name_token = shift @$tokens or return;
  my $name = $name_token->[0];
  if (ref $name) {
    $name = $name->[0];
    return if $name =~ /\.pl$/i;

    $name =~ s|/|::|g;
    $name =~ s|\.pm$||i;
  }
  return if !defined $name or $name eq '';

  my $c1 = substr($name, 0, 1);
  if ($c1 eq '5') {
    $c->add_conditional(perl => $name);
    return;
  }
  if ($c1 eq 'v') {
    my $c2 = substr($name, 1, 1);
    if ($c2 eq '5') {
      $c->add_conditional(perl => $name);
      return;
    }
    if ($c2 eq '6') {
      $c->{perl6} = 1;
      $c->{ended} = 1;
      return;
    }
  }
  if (is_module_name($name)) {
    $c->add_conditional($name => 0);
    return;
  }
}

sub _no {
  my ($c, $rstr, $tokens) = @_;
_debug("NO TOKENS: ".(Data::Dump::dump($tokens))) if DEBUG;
  shift @$tokens; # discard 'no' itself

  # TODO: see if the token is WORD or not?
  my $name_token = shift @$tokens or return;
  my $name = $name_token->[0];
  return if !defined $name or ref $name or $name eq '';

  my $c1 = substr($name, 0, 1);
  if ($c1 eq '5') {
    $c->add_no(perl => $name);
    return;
  }
  if ($c1 eq 'v') {
    my $c2 = substr($name, 1, 1);
    if ($c2 eq '5') {
      $c->add_no(perl => $name);
      return;
    }
    if ($c2 eq '6') {
      $c->{perl6} = 1;
      $c->{ended} = 1;
      return;
    }
  }
  if ($name eq 'utf8') {
    $c->{utf8} = 0;
  }

  if (is_module_name($name)) {
    my $maybe_version_token = $tokens->[0];
    my $maybe_version_token_desc = $maybe_version_token->[1];
    if ($maybe_version_token_desc and ($maybe_version_token_desc eq 'NUMBER' or $maybe_version_token_desc eq 'VERSION_STRING')) {
      $c->add_no($name => $maybe_version_token->[0]);
      shift @$tokens;
    } else {
      $c->add_no($name => 0);
    }
  }

  if ($c->has_callback_for(no => $name)) {
    eval { $c->run_callback_for(no => $name, $tokens) };
    warn "Callback Error: $@" if $@;
    return;
  }
}

1;

__END__

=encoding utf-8

=head1 NAME

Perl::PrereqScanner::NotQuiteLite - a tool to scan your Perl code for its prerequisites

=head1 SYNOPSIS

  use Perl::PrereqScanner::NotQuiteLite;
  my $scanner = Perl::PrereqScanner::NotQuiteLite->new(
    parsers => [qw/:installed -UniversalVersion/],
    suggests => 1,
    perl_minimum_version => 1,
  );
  my $context = $scanner->scan_file('path/to/file');
  my $requirements = $context->requires;
  my $recommends = $context->recommends;
  my $suggestions  = $context->suggests; # requirements in evals
  my $noes = $context->noes;

=head1 DESCRIPTION

Perl::PrereqScanner::NotQuiteLite is yet another prerequisites
scanner. It passes almost all the scanning tests for
L<Perl::PrereqScanner> and L<Module::ExtractUse> (ie. except for
a few dubious ones), and runs slightly faster than PPI-based
Perl::PrereqScanner. However, it doesn't run as fast as
L<Perl::PrereqScanner::Lite> (which uses an XS lexer).

Perl::PrereqScanner::NotQuiteLite also recognizes C<eval>.
Prerequisites in C<eval> are not considered as requirements, but you
can collect them as suggestions.

Conditional requirements or requirements loaded in a block are
treated as recommends. Noed modules are stored separately (since 0.94).
You may or may not need to merge them into requires.

Perl::PrereqScanner::NotQuiteLite can also recognize some of
the new language features such as C<say>, subroutine signatures,
and postfix dereferences, to improve the minimum perl requirement
(since 0.9905).

=head1 METHODS

=head2 new

creates a scanner object. Options are:

=over 4

=item parsers

By default, Perl::PrereqScanner::NotQuiteLite only recognizes
modules loaded directly by C<use>, C<require>, C<no> statements,
plus modules loaded by a few common modules such as C<base>,
C<parent>, C<if> (that are in the Perl core), and by two keywords
exported by L<Moose> family (C<extends> and C<with>).

If you need more, you can pass extra parser names to the scanner,
or C<:bundled>, which loads and registers all the parsers bundled
with this distribution. If you have your own parsers, you can
specify C<:installed> to load and register all the installed parsers.

You can also pass a project-specific parser (that lies outside the
C<Perl::PrereqScanner::NotQuiteLite::Parser> namespace) by
prepending C<+> to the name.

  use Perl::PrereqScanner::NotQuiteLite;
  my $scanner = Perl::PrereqScanner::NotQuiteLite->new(
    parsers => [qw/+PrereqParser::For::MyProject/],
  );

If you don't want to load a specific parser for some reason,
prepend C<-> to the parser name.

=item suggests

Perl::PrereqScanner::NotQuiteLite ignores C<use>-like statements in
C<eval> by default. If you set this option to true,
Perl::PrereqScanner::NotQuiteLite also parses statements in C<eval>,
and records requirements as suggestions.

=item recommends

Perl::PrereqScanner::NotQuiteLite usually ignores C<require>-like
statements in a block by default. If you set this option to true,
Perl::PrereqScanner::NotQuiteLite also records requirements in
a block as recommendations.

=item perl_minimum_version

If you set this option to true, Perl::PrereqScanner::NotQuiteLite
adds a specific version of perl as a requirement when it finds
some of the new perl language features.

=back

=head2 scan_file

takes a path to a file and returns a ::Context object.

=head2 scan_string

takes a string, scans and returns a ::Context object.

=head1 SEE ALSO

L<Perl::PrereqScanner>, L<Perl::PrereqScanner::Lite>, L<Module::ExtractUse>

L<Perl::PrereqScanner::NotQuiteLite::App> to scan a whole distribution.

L<scan-perl-prereqs-nqlite> is a command line interface of the above.

=head1 AUTHOR

Kenichi Ishigaki, E<lt>ishigaki@cpan.orgE<gt>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by Kenichi Ishigaki.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut