Skip to content

Commit 501df76

Browse files
author
Quintus
committed
Lua scanner for CodeRay. Meta-commit.
This commit is a super-commit containing all the subcommits for implementing the Lua scanner.
1 parent 359db45 commit 501df76

File tree

3 files changed

+271
-0
lines changed

3 files changed

+271
-0
lines changed

lib/coderay/scanners/lua.rb

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Scanner for the Lua[http://lua.org] programming lanuage.
4+
#
5+
# The language’s complete syntax is defined in
6+
# {the Lua manual}[http://www.lua.org/manual/5.2/manual.html],
7+
# which is what this scanner tries to conform to.
8+
class CodeRay::Scanners::Lua < CodeRay::Scanners::Scanner
9+
10+
register_for :lua
11+
file_extension "lua"
12+
title "Lua"
13+
14+
# Keywords used in Lua.
15+
KEYWORDS = %w[and break do else elseif end
16+
for function goto if in
17+
local not or repeat return
18+
then until while
19+
]
20+
21+
# Constants set by the Lua core.
22+
PREDEFINED_CONSTANTS = %w[false true nil]
23+
24+
# The expressions contained in this array are parts of Lua’s `basic'
25+
# library. Although it’s not entirely necessary to load that library,
26+
# it is highly recommended and one would have to provide own implementations
27+
# of some of these expressions if one does not do so. They however aren’t
28+
# keywords, neither are they constants, but nearly predefined, so they
29+
# get tagged as `predefined' rather than anything else.
30+
#
31+
# This list excludes values of form `_UPPERCASE' because the Lua manual
32+
# requires such identifiers to be reserved by Lua anyway and they are
33+
# highlighted directly accordingly, without the need for specific
34+
# identifiers to be listed here.
35+
PREDEFINED_EXPRESSIONS = %w[
36+
assert collectgarbage dofile error getmetatable
37+
ipairs load loadfile next pairs pcall print
38+
rawequal rawget rawlen rawset select setmetatable
39+
tonumber tostring type xpcall
40+
]
41+
42+
# Automatic token kind selection for normal words.
43+
IDENT_KIND = CodeRay::WordList.new(:ident)
44+
.add(KEYWORDS, :keyword)
45+
.add(PREDEFINED_CONSTANTS, :predefined_constant)
46+
.add(PREDEFINED_EXPRESSIONS, :predefined)
47+
48+
protected
49+
50+
# Scanner initialization.
51+
def setup
52+
@state = :initial
53+
@brace_depth = 0
54+
end
55+
56+
# CodeRay entry hook. Starts parsing.
57+
def scan_tokens(encoder, options)
58+
@encoder = encoder
59+
@options = options
60+
61+
send(:"handle_state_#@state") until eos?
62+
63+
@encoder
64+
end
65+
66+
def handle_state_initial
67+
if match = scan(/\-\-\[\=*\[/) #--[[ long (possibly multiline) comment ]]
68+
@num_equals = match.count("=") # Number must match for comment end
69+
@encoder.begin_group(:comment)
70+
@encoder.text_token(match, :delimiter)
71+
@state = :long_comment
72+
73+
elsif match = scan(/--.*?$/) # --Lua comment
74+
@encoder.text_token(match, :comment)
75+
76+
elsif match = scan(/\[=*\[/) # [[ long (possibly multiline) string ]]
77+
@num_equals = match.count("=") # Number must match for comment end
78+
@encoder.begin_group(:string)
79+
@encoder.text_token(match, :delimiter)
80+
@state = :long_string
81+
82+
elsif match = scan(/::\s*[a-zA-Z_][a-zA-Z0-9_]+\s*::/) # ::goto_label::
83+
@encoder.text_token(match, :label)
84+
85+
elsif match = scan(/_[A-Z]+/) # _UPPERCASE are names reserved for Lua
86+
@encoder.text_token(match, :predefined)
87+
88+
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # Normal letters (or letters followed by digits)
89+
kind = IDENT_KIND[match]
90+
91+
# Extra highlighting for entities following certain keywords
92+
if kind == :keyword and match == "function"
93+
@state = :function_expected
94+
elsif kind == :keyword and match == "goto"
95+
@state = :goto_label_expected
96+
elsif kind == :keyword and match == "local"
97+
@state = :local_var_expected
98+
end
99+
100+
@encoder.text_token(match, kind)
101+
102+
elsif match = scan(/{/) # Opening table brace {
103+
@encoder.begin_group(:table)
104+
@encoder.text_token(match, @brace_depth >= 1 ? :inline_delimiter : :delimiter)
105+
@brace_depth += 1
106+
@state = :table
107+
108+
elsif match = scan(/}/) # Closing table brace }
109+
if @brace_depth == 1
110+
@brace_depth = 0
111+
@encoder.text_token(match, :delimiter)
112+
elsif @brace_depth == 0 # Mismatched brace
113+
@encoder.text_token(match, :error)
114+
else
115+
@brace_depth -= 1
116+
@encoder.text_token(match, :inline_delimiter)
117+
@state = :table
118+
end
119+
@encoder.end_group(:table)
120+
121+
elsif match = scan(/["']/) # String delimiters " and '
122+
@encoder.begin_group(:string)
123+
@encoder.text_token(match, :delimiter)
124+
@start_delim = match
125+
@state = :string
126+
127+
# ↓Prefix hex number ←|→ decimal number
128+
elsif match = scan(/-? (?:0x\h* \. \h+ (?:p[+\-]?\d+)? | \d*\.\d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
129+
@encoder.text_token(match, :float)
130+
131+
# ↓Prefix hex number ←|→ decimal number
132+
elsif match = scan(/-? (?:0x\h+ (?:p[+\-]?\d+)? | \d+ (?:e[+\-]?\d+)?)/ix) # hexadecimal constants have no E power, decimal ones no P power
133+
@encoder.text_token(match, :integer)
134+
135+
elsif match = scan(/[\+\-\*\/%^\#=~<>\(\)\[\]:;,] | \.(?!\d)/x) # Operators
136+
@encoder.text_token(match, :operator)
137+
138+
elsif match = scan(/\s+/) # Space
139+
@encoder.text_token(match, :space)
140+
141+
else # Invalid stuff. Note that Lua doesn’t accept multibyte chars outside of strings, hence these are also errors.
142+
@encoder.text_token(getch, :error)
143+
end
144+
145+
# It may be that we’re scanning a full-blown subexpression of a table
146+
# (tables can contain full expressions in parts).
147+
# If this is the case, return to :table scanning state.
148+
@state = :table if @state == :initial && @brace_depth >= 1
149+
end
150+
151+
def handle_state_function_expected
152+
if match = scan(/\(.*?\)/m) # x = function() # "Anonymous" function without explicit name
153+
@encoder.text_token(match, :operator)
154+
@state = :initial
155+
elsif match = scan(/[a-zA-Z_] (?:[a-zA-Z0-9_\.] (?!\.\d))* [\.\:]/x) # function tbl.subtbl.foo() | function tbl:foo() # Colon only allowed as last separator
156+
@encoder.text_token(match, :ident)
157+
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/) # function foo()
158+
@encoder.text_token(match, :function)
159+
@state = :initial
160+
elsif match = scan(/\s+/) # Between the `function' keyword and the ident may be any amount of whitespace
161+
@encoder.text_token(match, :space)
162+
else
163+
@encoder.text_token(getch, :error)
164+
@state = :initial
165+
end
166+
end
167+
168+
def handle_state_goto_label_expected
169+
if match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
170+
@encoder.text_token(match, :label)
171+
@state = :initial
172+
elsif match = scan(/\s+/) # Between the `goto' keyword and the label may be any amount of whitespace
173+
@encoder.text_token(match, :space)
174+
else
175+
@encoder.text_token(getch, :error)
176+
end
177+
end
178+
179+
def handle_state_local_var_expected
180+
if match = scan(/function/) # local function ...
181+
@encoder.text_token(match, :keyword)
182+
@state = :function_expected
183+
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]*/)
184+
@encoder.text_token(match, :local_variable)
185+
elsif match = scan(/,/)
186+
@encoder.text_token(match, :operator)
187+
elsif match = scan(/=/)
188+
@encoder.text_token(match, :operator)
189+
# After encountering the equal sign, arbitrary expressions are
190+
# allowed again, so just return to the main state for further
191+
# parsing.
192+
@state = :initial
193+
elsif match = scan(/\n/)
194+
@encoder.text_token(match, :space)
195+
@state = :initial
196+
elsif match = scan(/\s+/)
197+
@encoder.text_token(match, :space)
198+
else
199+
@encoder.text_token(getch, :error)
200+
end
201+
end
202+
203+
def handle_state_long_comment
204+
if match = scan(/.*?(?=\]={#@num_equals}\])/m)
205+
@encoder.text_token(match, :content)
206+
207+
delim = scan(/\]={#@num_equals}\]/)
208+
@encoder.text_token(delim, :delimiter)
209+
else # No terminator found till EOF
210+
@encoder.text_token(rest, :error)
211+
terminate
212+
end
213+
@encoder.end_group(:comment)
214+
@state = :initial
215+
end
216+
217+
def handle_state_long_string
218+
if match = scan(/.*?(?=\]={#@num_equals}\])/m) # Long strings do not interpret any escape sequences
219+
@encoder.text_token(match, :content)
220+
221+
delim = scan(/\]={#@num_equals}\]/)
222+
@encoder.text_token(delim, :delimiter)
223+
else # No terminator found till EOF
224+
@encoder.text_token(rest, :error)
225+
terminate
226+
end
227+
@encoder.end_group(:string)
228+
@state = :initial
229+
end
230+
231+
def handle_state_string
232+
if match = scan(/[^\\#@start_delim\n]+/) # Everything except \ and the start delimiter character is string content (newlines are only allowed if preceeded by \ or \z)
233+
@encoder.text_token(match, :content)
234+
elsif match = scan(/\\(?:['"abfnrtv\\]|z\s*|x\h\h|\d{1,3}|\n)/m)
235+
@encoder.text_token(match, :char)
236+
elsif match = scan(Regexp.compile(@start_delim))
237+
@encoder.text_token(match, :delimiter)
238+
@encoder.end_group(:string)
239+
@state = :initial
240+
elsif match = scan(/\n/) # Lua forbids unescaped newlines in normal non-long strings
241+
@encoder.text_token("\\n\n", :error) # Visually appealing error indicator--otherwise users may wonder whether the highlighter cannot highlight multine strings
242+
@encoder.end_group(:string)
243+
@state = :initial
244+
else
245+
@encoder.text_token(getch, :error)
246+
end
247+
end
248+
249+
def handle_state_table
250+
if match = scan(/[,;]/)
251+
@encoder.text_token(match, :operator)
252+
elsif match = scan(/[a-zA-Z_][a-zA-Z0-9_]* (?=\s*=)/x)
253+
@encoder.text_token(match, :key)
254+
@encoder.text_token(scan(/\s+/), :space) if check(/\s+/)
255+
@encoder.text_token(scan(/=/), :operator)
256+
@state = :initial
257+
elsif match = scan(/\s+/m)
258+
@encoder.text_token(match, :space)
259+
else
260+
# Note this clause doesn’t advance the scan pointer, it’s a kind of
261+
# "retry with other options" (the :initial state then of course
262+
# advances the pointer).
263+
@state = :initial
264+
end
265+
end
266+
267+
end

lib/coderay/styles/alpha.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ class Alpha < Style
116116
.symbol .content { color:#A60 }
117117
.symbol .delimiter { color:#630 }
118118
.symbol { color:#A60 }
119+
.table .content { color:#808 }
120+
.table .delimiter { color:#40A}
121+
.table { background-color:hsla(200,100%,50%,0.06); }
119122
.tag { color:#070 }
120123
.type { color:#339; font-weight:bold }
121124
.value { color: #088; }

lib/coderay/token_kinds.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ module CodeRay
6363
:shell => 'shell',
6464
:string => 'string',
6565
:symbol => 'symbol',
66+
:table => 'table',
6667
:tag => 'tag',
6768
:type => 'type',
6869
:value => 'value',

0 commit comments

Comments
 (0)