Skip to content

Commit abb92f3

Browse files
committed
New: *Go Encoder*
Draft version, copy from c
1 parent a4b8b09 commit abb92f3

File tree

2 files changed

+214
-18
lines changed

2 files changed

+214
-18
lines changed

lib/coderay/helpers/file_type.rb

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
module CodeRay
2-
2+
33
# = FileType
44
#
55
# A simple filetype recognizer.
@@ -8,18 +8,18 @@ module CodeRay
88
#
99
# # determine the type of the given
1010
# lang = FileType[file_name]
11-
#
11+
#
1212
# # return :text if the file type is unknown
1313
# lang = FileType.fetch file_name, :text
14-
#
14+
#
1515
# # try the shebang line, too
1616
# lang = FileType.fetch file_name, :text, true
1717
module FileType
18-
18+
1919
UnknownFileType = Class.new Exception
20-
20+
2121
class << self
22-
22+
2323
# Try to determine the file type of the file.
2424
#
2525
# +filename+ is a relative or absolute path to a file.
@@ -30,7 +30,7 @@ def [] filename, read_shebang = false
3030
name = File.basename filename
3131
ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot
3232
ext2 = filename.to_s[/\.(.*)/, 1] # from first dot
33-
33+
3434
type =
3535
TypeFromExt[ext] ||
3636
TypeFromExt[ext.downcase] ||
@@ -39,10 +39,10 @@ def [] filename, read_shebang = false
3939
TypeFromName[name] ||
4040
TypeFromName[name.downcase]
4141
type ||= shebang(filename) if read_shebang
42-
42+
4343
type
4444
end
45-
45+
4646
# This works like Hash#fetch.
4747
#
4848
# If the filetype cannot be found, the +default+ value
@@ -51,7 +51,7 @@ def fetch filename, default = nil, read_shebang = false
5151
if default && block_given?
5252
warn 'Block supersedes default value argument; use either.'
5353
end
54-
54+
5555
if type = self[filename, read_shebang]
5656
type
5757
else
@@ -60,9 +60,9 @@ def fetch filename, default = nil, read_shebang = false
6060
raise UnknownFileType, 'Could not determine type of %p.' % filename
6161
end
6262
end
63-
63+
6464
protected
65-
65+
6666
def shebang filename
6767
return unless File.exist? filename
6868
File.open filename, 'r' do |f|
@@ -73,9 +73,9 @@ def shebang filename
7373
end
7474
end
7575
end
76-
76+
7777
end
78-
78+
7979
TypeFromExt = {
8080
'c' => :c,
8181
'cfc' => :xml,
@@ -86,6 +86,7 @@ def shebang filename
8686
'dpr' => :delphi,
8787
'erb' => :erb,
8888
'gemspec' => :ruby,
89+
'go' => :go,
8990
'groovy' => :groovy,
9091
'gvy' => :groovy,
9192
'h' => :c,
@@ -128,16 +129,16 @@ def shebang filename
128129
for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
129130
TypeFromExt[cpp_alias] = :cpp
130131
end
131-
132+
132133
TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/
133-
134+
134135
TypeFromName = {
135136
'Capfile' => :ruby,
136137
'Rakefile' => :ruby,
137138
'Rantfile' => :ruby,
138139
'Gemfile' => :ruby,
139140
}
140-
141+
141142
end
142-
143+
143144
end

lib/coderay/scanners/go.rb

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
module CodeRay
2+
module Scanners
3+
4+
# Scanner for Go, copy from c
5+
class Go < Scanner
6+
7+
register_for :go
8+
file_extension 'go'
9+
10+
# http://golang.org/ref/spec#Keywords
11+
KEYWORDS = [
12+
'break', 'default', 'func', 'interface', 'select',
13+
'case', 'defer', 'go', 'map', 'struct',
14+
'chan', 'else', 'goto', 'package', 'switch',
15+
'const', 'fallthrough', 'if', 'range', 'type',
16+
'continue', 'for', 'import', 'return', 'var',
17+
] # :nodoc:
18+
19+
# http://golang.org/ref/spec#Types
20+
PREDEFINED_TYPES = [
21+
'bool',
22+
'uint8', 'uint16', 'uint32', 'uint64',
23+
'int8', 'int16', 'int32', 'int64',
24+
'float32', 'float64',
25+
'complex64', 'complex128',
26+
'byte', 'rune',
27+
'uint', 'int', 'uintptr',
28+
] # :nodoc:
29+
30+
PREDEFINED_CONSTANTS = [
31+
'nil', 'iota',
32+
'true', 'false',
33+
] # :nodoc:
34+
35+
DIRECTIVES = [
36+
'go_no_directive', # Seems no directive concept in Go?
37+
] # :nodoc:
38+
39+
IDENT_KIND = WordList.new(:ident).
40+
add(KEYWORDS, :keyword).
41+
add(PREDEFINED_TYPES, :predefined_type).
42+
add(DIRECTIVES, :directive).
43+
add(PREDEFINED_CONSTANTS, :predefined_constant) # :nodoc:
44+
45+
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
46+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x # :nodoc:
47+
48+
protected
49+
50+
def scan_tokens encoder, options
51+
52+
state = :initial
53+
label_expected = true
54+
case_expected = false
55+
label_expected_before_preproc_line = nil
56+
in_preproc_line = false
57+
58+
until eos?
59+
60+
case state
61+
62+
when :initial
63+
64+
if match = scan(/ \s+ | \\\n /x)
65+
if in_preproc_line && match != "\\\n" && match.index(?\n)
66+
in_preproc_line = false
67+
label_expected = label_expected_before_preproc_line
68+
end
69+
encoder.text_token match, :space
70+
71+
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
72+
encoder.text_token match, :comment
73+
74+
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
75+
label_expected = match =~ /[;\{\}]/
76+
if case_expected
77+
label_expected = true if match == ':'
78+
case_expected = false
79+
end
80+
encoder.text_token match, :operator
81+
82+
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
83+
kind = IDENT_KIND[match]
84+
if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
85+
kind = :label
86+
match << matched
87+
else
88+
label_expected = false
89+
if kind == :keyword
90+
case match
91+
when 'case', 'default'
92+
case_expected = true
93+
end
94+
end
95+
end
96+
encoder.text_token match, kind
97+
98+
elsif match = scan(/L?"/)
99+
encoder.begin_group :string
100+
if match[0] == ?L
101+
encoder.text_token 'L', :modifier
102+
match = '"'
103+
end
104+
encoder.text_token match, :delimiter
105+
state = :string
106+
107+
elsif match = scan(/ \# \s* if \s* 0 /x)
108+
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
109+
encoder.text_token match, :comment
110+
111+
elsif match = scan(/#[ \t]*(\w*)/)
112+
encoder.text_token match, :preprocessor
113+
in_preproc_line = true
114+
label_expected_before_preproc_line = label_expected
115+
state = :include_expected if self[1] == 'include'
116+
117+
elsif match = scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
118+
label_expected = false
119+
encoder.text_token match, :char
120+
121+
elsif match = scan(/\$/)
122+
encoder.text_token match, :ident
123+
124+
elsif match = scan(/0[xX][0-9A-Fa-f]+/)
125+
label_expected = false
126+
encoder.text_token match, :hex
127+
128+
elsif match = scan(/(?:0[0-7]+)(?![89.eEfF])/)
129+
label_expected = false
130+
encoder.text_token match, :octal
131+
132+
elsif match = scan(/(?:\d+)(?![.eEfF])L?L?/)
133+
label_expected = false
134+
encoder.text_token match, :integer
135+
136+
elsif match = scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
137+
label_expected = false
138+
encoder.text_token match, :float
139+
140+
else
141+
encoder.text_token getch, :error
142+
143+
end
144+
145+
when :string
146+
if match = scan(/[^\\\n"]+/)
147+
encoder.text_token match, :content
148+
elsif match = scan(/"/)
149+
encoder.text_token match, :delimiter
150+
encoder.end_group :string
151+
state = :initial
152+
label_expected = false
153+
elsif match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
154+
encoder.text_token match, :char
155+
elsif match = scan(/ \\ | $ /x)
156+
encoder.end_group :string
157+
encoder.text_token match, :error
158+
state = :initial
159+
label_expected = false
160+
else
161+
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
162+
end
163+
164+
when :include_expected
165+
if match = scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
166+
encoder.text_token match, :include
167+
state = :initial
168+
169+
elsif match = scan(/\s+/)
170+
encoder.text_token match, :space
171+
state = :initial if match.index ?\n
172+
173+
else
174+
state = :initial
175+
176+
end
177+
178+
else
179+
raise_inspect 'Unknown state', encoder
180+
181+
end
182+
183+
end
184+
185+
if state == :string
186+
encoder.end_group :string
187+
end
188+
189+
encoder
190+
end
191+
192+
end
193+
194+
end
195+
end

0 commit comments

Comments
 (0)