Skip to content

Commit f3073bd

Browse files
committed
Scala scanner. Register the scanner under the :scala symbol.
1 parent 9ede776 commit f3073bd

File tree

2 files changed

+595
-0
lines changed

2 files changed

+595
-0
lines changed

lib/coderay/scanners/scala.rb

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
module CodeRay
2+
module Scanners
3+
4+
# Scanner for Java.
5+
class Scala < Scanner
6+
7+
register_for :scala
8+
9+
autoload :BuiltinTypes, CodeRay.coderay_path('scanners', 'scala', 'builtin_types')
10+
11+
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
12+
KEYWORDS = %w[
13+
case catch continue default do else object match with val var
14+
finally for if instanceof import new package override def
15+
return switch throw try typeof while implicit type
16+
debugger export
17+
] # :nodoc:
18+
RESERVED = %w[ const goto ] # :nodoc:
19+
CONSTANTS = %w[ false null true ] # :nodoc:
20+
MAGIC_VARIABLES = %w[ this super ] # :nodoc:
21+
TYPES = %w[
22+
boolean byte char class double enum float int interface long
23+
short void
24+
] << '[]' # :nodoc: because int[] should be highlighted as a type
25+
DIRECTIVES = %w[
26+
abstract extends final implements native private protected public
27+
static strictfp synchronized throws transient volatile
28+
] # :nodoc:
29+
30+
IDENT_KIND = WordList.new(:ident).
31+
add(KEYWORDS, :keyword).
32+
add(RESERVED, :reserved).
33+
add(CONSTANTS, :predefined_constant).
34+
add(MAGIC_VARIABLES, :local_variable).
35+
add(TYPES, :type).
36+
add(BuiltinTypes::List, :predefined_type).
37+
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
38+
add(DIRECTIVES, :directive) # :nodoc:
39+
40+
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
41+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
42+
STRING_CONTENT_PATTERN = {
43+
"'" => /[^\\']+/,
44+
'"' => /[^\\"]+/,
45+
'/' => /[^\\\/]+/,
46+
} # :nodoc:
47+
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/ # :nodoc:
48+
49+
protected
50+
51+
def scan_tokens encoder, options
52+
53+
state = :initial
54+
string_delimiter = nil
55+
package_name_expected = false
56+
class_name_follows = false
57+
last_token_dot = false
58+
59+
until eos?
60+
61+
case state
62+
63+
when :initial
64+
65+
if match = scan(/ \s+ | \\\n /x)
66+
encoder.text_token match, :space
67+
next
68+
69+
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
70+
encoder.text_token match, :comment
71+
next
72+
73+
elsif package_name_expected && match = scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
74+
encoder.text_token match, package_name_expected
75+
76+
elsif match = scan(/ #{IDENT} | \[\] /ox)
77+
kind = IDENT_KIND[match]
78+
if last_token_dot
79+
kind = :ident
80+
elsif class_name_follows
81+
kind = :class
82+
class_name_follows = false
83+
else
84+
case match
85+
when 'import'
86+
package_name_expected = :include
87+
when 'package'
88+
package_name_expected = :namespace
89+
when 'class', 'trait', 'case class', 'object'
90+
class_name_follows = true
91+
end
92+
end
93+
encoder.text_token match, kind
94+
95+
elsif match = scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
96+
encoder.text_token match, :operator
97+
98+
elsif match = scan(/;/)
99+
package_name_expected = false
100+
encoder.text_token match, :operator
101+
102+
elsif match = scan(/\{/)
103+
class_name_follows = false
104+
encoder.text_token match, :operator
105+
106+
elsif check(/[\d.]/)
107+
if match = scan(/0[xX][0-9A-Fa-f]+/)
108+
encoder.text_token match, :hex
109+
elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
110+
encoder.text_token match, :octal
111+
elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
112+
encoder.text_token match, :float
113+
elsif match = scan(/\d+[lL]?/)
114+
encoder.text_token match, :integer
115+
end
116+
117+
elsif match = scan(/["']/)
118+
state = :string
119+
encoder.begin_group state
120+
string_delimiter = match
121+
encoder.text_token match, :delimiter
122+
123+
elsif match = scan(/ @ #{IDENT} /ox)
124+
encoder.text_token match, :annotation
125+
126+
else
127+
encoder.text_token getch, :error
128+
129+
end
130+
131+
when :string
132+
if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
133+
encoder.text_token match, :content
134+
elsif match = scan(/["'\/]/)
135+
encoder.text_token match, :delimiter
136+
encoder.end_group state
137+
state = :initial
138+
string_delimiter = nil
139+
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
140+
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
141+
encoder.text_token match, :content
142+
else
143+
encoder.text_token match, :char
144+
end
145+
elsif match = scan(/\\./m)
146+
encoder.text_token match, :content
147+
elsif match = scan(/ \\ | $ /x)
148+
encoder.end_group state
149+
state = :initial
150+
encoder.text_token match, :error
151+
else
152+
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
153+
end
154+
155+
else
156+
raise_inspect 'Unknown state', encoder
157+
158+
end
159+
160+
last_token_dot = match == '.'
161+
162+
end
163+
164+
if state == :string
165+
encoder.end_group state
166+
end
167+
168+
encoder
169+
end
170+
171+
end
172+
173+
end
174+
end

0 commit comments

Comments
 (0)