class PuppetLint::Lexer

Internal: The puppet-lint lexer. Converts your manifest into its tokenised form.

Constants

APP_MANAGEMENT_TOKENS

Internal: A Hash whose keys are Strings representing reserved keywords in the Puppet DSL when Application Management is enabled From github.com/puppetlabs/puppet/blob/master/lib/puppet/pops/parser/lexer2.rb#L142-L159 or therabouts Currently unused

FORMATTING_TOKENS

Internal: A Hash whose keys are Symbols representing token types which are considered to be formatting tokens (i.e. tokens that don't contain code).

KEYWORDS

Internal: A Hash whose keys are Strings representing reserved keywords in the Puppet DSL. From github.com/puppetlabs/puppet/blob/master/lib/puppet/pops/parser/lexer2.rb#L116-L137 or thereabouts

KNOWN_TOKENS
LINE_END_RE
NAME_RE

Internal: An Array of Arrays containing tokens that can be described by a single regular expression. Each sub-Array contains 2 elements, the name of the token as a Symbol and a regular expression describing the value of the token.

REGEX_PREV_TOKENS

Internal: A Hash whose keys are Symbols representing token types which a regular expression can follow.

WHITESPACE_RE

t == tab v == vertical tab f == form feed p{Zs} == ASCII + Unicode non-linebreaking whitespace

Public Class Methods

heredoc_queue() click to toggle source
# File lib/puppet-lint/lexer.rb, line 40
def self.heredoc_queue
  @heredoc_queue ||= []
end
new() click to toggle source
# File lib/puppet-lint/lexer.rb, line 35
def initialize
  @line_no = 1
  @column = 1
end

Public Instance Methods

heredoc_queue() click to toggle source
# File lib/puppet-lint/lexer.rb, line 44
def heredoc_queue
  self.class.heredoc_queue
end
slurp_string(string) click to toggle source
# File lib/puppet-lint/lexer.rb, line 310
def slurp_string(string)
  dq_str_regexp = %r{(\$\{|(\A|[^\])(\\)*")}m
  scanner = StringScanner.new(string)
  contents = scanner.scan_until(dq_str_regexp)

  if scanner.matched.nil?
    raise LexerError.new(@line_no, @column, 'Double quoted string missing closing quote')
  end

  until scanner.matched.end_with?('"')
    contents += scanner.scan_until(%r{\}}m)
    contents += scanner.scan_until(dq_str_regexp)
  end
tokenise(code) click to toggle source

Internal: Convert a Puppet manifest into tokens.

code - The Puppet manifest to be tokenised as a String.

Returns an Array of PuppetLint::Lexer::Token objects. Raises PuppetLint::LexerError if it encounters unexpected characters (usually the result of syntax errors).

# File lib/puppet-lint/lexer.rb, line 190
def tokenise(code)
  i = 0

  while i < code.size
    chunk = code[i..-1]

    found = false

    KNOWN_TOKENS.each do |type, regex|
      value = chunk[regex, 1]
      next if value.nil?

      length = value.size
      tokens << if type == :NAME && KEYWORDS.include?(value)
                  new_token(value.upcase.to_sym, value)
                else
                  new_token(type, value)
                end
      i += length
      found = true
      break
    end

    next if found

    if var_name = chunk[%r{\A\$((::)?(\w+(-\w+)*::)*\w+(-\w+)*(\[.+?\])*)}, 1]
      length = var_name.size + 1
      tokens << new_token(:VARIABLE, var_name)

    elsif chunk =~ %r{\A'.*?'}m
      str_content = StringScanner.new(code[i + 1..-1]).scan_until(%r{(\A|[^\])(\\)*'}m)
      length = str_content.size + 1
      tokens << new_token(:SSTRING, str_content[0..-2])

    elsif chunk.start_with?('"')
      str_contents = slurp_string(code[i + 1..-1])
      lines_parsed = code[0..i].split(LINE_END_RE)
      interpolate_string(str_contents, lines_parsed.count, lines_parsed.last.length)
      length = str_contents.size + 1

    elsif heredoc_name = chunk[%r{\A@\(("?.+?"?(:.+?)?(/.*?)?)\)}, 1]
      heredoc_queue << heredoc_name
      tokens << new_token(:HEREDOC_OPEN, heredoc_name)
      length = heredoc_name.size + 3

    elsif comment = chunk[%r{\A(#[^\r\n]*)#{LINE_END_RE}?}, 1]
      length = comment.size
      comment.sub!(%r{#}, '')
      tokens << new_token(:COMMENT, comment)

    elsif slash_comment = chunk[%r{\A(//[^\r\n]*)#{LINE_END_RE}?}, 1]
      length = slash_comment.size
      slash_comment.sub!(%r{//}, '')
      tokens << new_token(:SLASH_COMMENT, slash_comment)

    elsif mlcomment = chunk[%r{\A(/\*.*?\*/)}m, 1]
      length = mlcomment.size
      mlcomment_raw = mlcomment.dup
      mlcomment.sub!(%r{\A/\* ?}, '')
      mlcomment.sub!(%r{ ?\*/\Z}, '')
      mlcomment.gsub!(%r{^ *\*}, '')
      tokens << new_token(:MLCOMMENT, mlcomment, :raw => mlcomment_raw)

    elsif chunk.match(%r{\A/.*?/}) && possible_regex?
      str_content = StringScanner.new(code[i + 1..-1]).scan_until(%r{(\A|[^\])(\\)*/}m)
      length = str_content.size + 1
      tokens << new_token(:REGEX, str_content[0..-2])

    elsif eolindent = chunk[%r{\A(#{LINE_END_RE}#{WHITESPACE_RE}+)}m, 1]
      eol = eolindent[%r{\A(#{LINE_END_RE})}m, 1]
      tokens << new_token(:NEWLINE, eol)
      length = eol.size

      if heredoc_queue.empty?
        indent = eolindent[%r{\A#{LINE_END_RE}+(#{WHITESPACE_RE}+)}m, 1]
        tokens << new_token(:INDENT, indent)
        length += indent.size
      else
        heredoc_tag = heredoc_queue.shift
        heredoc_name = heredoc_tag[%r{\A"?(.+?)"?(:.+?)?(/.*)?\Z}, 1]
        str_contents = StringScanner.new(code[(i + length)..-1]).scan_until(%r{\|?\s*-?\s*#{heredoc_name}})
        interpolate_heredoc(str_contents, heredoc_tag)
        length += str_contents.size
      end

    elsif whitespace = chunk[%r{\A(#{WHITESPACE_RE}+)}, 1]
      length = whitespace.size
      tokens << new_token(:WHITESPACE, whitespace)

    elsif eol = chunk[%r{\A(#{LINE_END_RE})}, 1]
      length = eol.size
      tokens << new_token(:NEWLINE, eol)

      unless heredoc_queue.empty?
        heredoc_tag = heredoc_queue.shift
        heredoc_name = heredoc_tag[%r{\A"?(.+?)"?(:.+?)?(/.*)?\Z}, 1]
        str_contents = StringScanner.new(code[(i + length)..-1]).scan_until(%r{\|?\s*-?\s*#{heredoc_name}})
        _ = code[0..(i + length)].split(LINE_END_RE)
        interpolate_heredoc(str_contents, heredoc_tag)
        length += str_contents.size
      end

    elsif chunk.start_with?('/')
      length = 1
      tokens << new_token(:DIV, '/')

    elsif chunk.start_with?('@')
      length = 1
      tokens << new_token(:AT, '@')

    else
      raise PuppetLint::LexerError.new(@line_no, @column)
    end

    i += length
  end

  tokens
end
tokens() click to toggle source

Internal: Access the internal token storage.

Returns an Array of PuppetLint::Lexer::Toxen objects.

# File lib/puppet-lint/lexer.rb, line 179
def tokens
  @tokens ||= []
end