Skip to content

Commit 11807f1

Browse files
committed
add Bash scanner from Petr Kovář and Steven Penny (for testing) [GH-19]
1 parent c91e35e commit 11807f1

File tree

1 file changed

+291
-0
lines changed

1 file changed

+291
-0
lines changed

lib/coderay/scanners/bash.rb

+291
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
# Scanner for Bash
2+
# Author: Petr Kovar <pejuko@gmail.com>
3+
4+
module CodeRay module Scanners
5+
6+
class Bash < Scanner
7+
8+
register_for :bash
9+
file_extension 'sh'
10+
title 'bash script'
11+
12+
RESERVED_WORDS = %w(
13+
! [[ ]] case do done elif else esac fi for function if in select then time until while { }
14+
)
15+
16+
COMMANDS = %w(
17+
: . break cd continue eval exec exit export getopts hash pwd
18+
readonly return shift test [ ] times trap umask unset
19+
)
20+
21+
BASH_COMMANDS = %w(
22+
alias bind builtin caller command declare echo enable help let
23+
local logout printf read set shopt source type typeset ulimit unalias
24+
)
25+
26+
PROGRAMS = %w(
27+
awk bash bunzip2 bzcat bzip2 cat chgrp chmod chown cp cut date dd df dir dmesg du ed egrep
28+
false fgrep findmnt fusermount gawk grep groups gunzip gzip hostname install keyctl kill less
29+
ln loadkeys login ls lsblk lsinitcpio lsmod mbchk mkdir mkfifo mknod more mount mountpoint mv
30+
netstat pidof ping ping6 ps pwd readlink red rm rmdir sed sh shred sleep stty su sudo sync tar
31+
touch tput tr traceroute traceroute6 true umount uname uncompress vdir zcat
32+
)
33+
34+
VARIABLES = %w(
35+
CDPATH HOME IFS MAIL MAILPATH OPTARG OPTIND PATH PS1 PS2
36+
)
37+
38+
BASH_VARIABLES = %w(
39+
BASH BASH_ARGC BASH_ARGV BASH_COMMAND BASH_ENV BASH_EXECUTION_STRING
40+
BASH_LINENO BASH_REMATCH BASH_SOURCE BASH_SUBSHELL BASH_VERSINFO
41+
BASH_VERSINFO[0] BASH_VERSINFO[1] BASH_VERSINFO[2] BASH_VERSINFO[3]
42+
BASH_VERSINFO[4] BASH_VERSINFO[5] BASH_VERSION COLUMNS COMP_CWORD
43+
COMP_LINE COMP_POINT COMP_WORDBREAKS COMP_WORDS COMPREPLAY DIRSTACK
44+
EMACS EUID FCEDIT FIGNORE FUNCNAME GLOBIGNORE GROUPS histchars HISTCMD
45+
HISTCONTROL HISTFILE HISTFILESIZE HISTIGNORE HISTSIZE HISTTIMEFORMAT
46+
HOSTFILE HOSTNAME HOSTTYPE IGNOREEOF INPUTRC LANG LC_ALL LC_COLLATE
47+
LC_CTYPE LC_MESSAGE LC_NUMERIC LINENNO LINES MACHTYPE MAILCHECK OLDPWD
48+
OPTERR OSTYPE PIPESTATUS POSIXLY_CORRECT PPID PROMPT_COMMAND PS3 PS4 PWD
49+
RANDOM REPLAY SECONDS SHELL SHELLOPTS SHLVL TIMEFORMAT TMOUT TMPDIR UID
50+
)
51+
52+
PRE_CONSTANTS = / \$\{? (?: \# | \? | \d | \* | @ | - | \$ | \! | _ ) \}? /ox
53+
54+
IDENT_KIND = WordList.new(:ident).
55+
add(RESERVED_WORDS, :reserved).
56+
add(COMMANDS, :method).
57+
add(BASH_COMMANDS, :method).
58+
# add(PROGRAMS, :method).
59+
add(VARIABLES, :predefined).
60+
add(BASH_VARIABLES, :predefined)
61+
62+
attr_reader :state, :quote
63+
64+
def initialize(*args)
65+
super(*args)
66+
@state = :initial
67+
@quote = nil
68+
@shell = false
69+
@brace_shell = 0
70+
@quote_brace_shell = 0
71+
end
72+
73+
def scan_tokens encoder, options
74+
75+
until eos?
76+
kind = match = nil
77+
78+
if match = scan(/\n/)
79+
encoder.text_token(match, :plain)
80+
next
81+
end
82+
83+
if @state == :initial
84+
if match = scan(/\A#!.*/)
85+
kind = :directive
86+
elsif match = scan(/\s*#.*/)
87+
kind = :comment
88+
elsif match = scan(/.#/)
89+
kind = :ident
90+
elsif match = scan(/(?:\. |source ).*/)
91+
kind = :reserved
92+
elsif match = scan(/(?:\\.|,)/)
93+
kind = :plain
94+
elsif match = scan(/;/)
95+
kind = :delimiter
96+
elsif match = scan(/"/)
97+
@state = :quote
98+
@quote = match
99+
encoder.begin_group :string
100+
encoder.text_token(match, :delimiter)
101+
next
102+
elsif match = scan(/<<\S+/)
103+
@state = :quote
104+
match =~ /<<(\S+)/
105+
@quote = "#{$1}"
106+
encoder.begin_group :string
107+
encoder.text_token(match, :delimiter)
108+
next
109+
elsif match = scan(/`/)
110+
if @shell
111+
encoder.text_token(match, :delimiter)
112+
encoder.end_group :shell
113+
else
114+
encoder.begin_group :shell
115+
encoder.text_token(match, :delimiter)
116+
end
117+
@shell = (not @shell)
118+
next
119+
elsif match = scan(/'[^']*'?/)
120+
kind = :string
121+
elsif match = scan(/(?: \& | > | < | \| >> | << | >\& )/ox)
122+
kind = :bin
123+
elsif match = scan(/\d+[\.-](?:\d+[\.-]?)+/)
124+
#versions, dates, and hyphen delimited numbers
125+
kind = :float
126+
elsif match = scan(/\d+\.\d+\s+/)
127+
kind = :float
128+
elsif match = scan(/\d+/)
129+
kind = :integer
130+
elsif match = scan(/ (?: \$\(\( | \)\) ) /x)
131+
kind = :global_variable
132+
elsif match = scan(/ \$\{ [^\}]+ \} /ox)
133+
match =~ /\$\{(.*)\}/
134+
var=$1
135+
if var =~ /\[.*\]/
136+
encoder.text_token("${", :instance_variable)
137+
match_array(var, encoder)
138+
encoder.text_token("}", :instance_variable)
139+
next
140+
end
141+
kind = IDENT_KIND[var]
142+
kind = :instance_variable if kind == :ident
143+
#elsif match = scan(/ \$\( [^\)]+ \) /ox)
144+
elsif match = scan(/ \$\( /ox)
145+
@brace_shell += 1
146+
encoder.begin_group :shell
147+
encoder.text_token(match, :delimiter)
148+
next
149+
elsif match = scan(/ \) /ox)
150+
if @brace_shell > 0
151+
encoder.text_token(match, :delimiter)
152+
encoder.end_group :shell
153+
@brace_shell -= 1
154+
next
155+
end
156+
elsif match = scan(PRE_CONSTANTS)
157+
kind = :predefined_constant
158+
elsif match = scan(/[^\s'"]*[A-Za-z_][A-Za-z_0-9]*\+?=/)
159+
match =~ /(.*?)([A-Za-z_][A-Za-z_0-9]*)(\+?=)/
160+
str = $1
161+
pre = $2
162+
op = $3
163+
kind = :plain
164+
if str.to_s.strip.empty?
165+
kind = IDENT_KIND[pre]
166+
kind = :instance_variable if kind == :ident
167+
encoder.text_token(pre, kind)
168+
encoder.text_token(op, :operator)
169+
next
170+
end
171+
elsif match = scan(/[A-Za-z_]+\[[A-Za-z_\@\*\d]+\]/)
172+
# array
173+
match_array(match, encoder)
174+
next
175+
elsif match = scan(/ \$[A-Za-z_][A-Za-z_0-9]* /ox)
176+
match =~ /\$(.*)/
177+
kind = IDENT_KIND[$1]
178+
kind = :instance_variable if kind == :ident
179+
elsif match = scan(/read \S+/)
180+
match =~ /read(\s+)(\S+)/
181+
encoder.text_token('read', :method)
182+
encoder.text_token($1, :space)
183+
encoder.text_token($2, :instance_variable)
184+
next
185+
elsif match = scan(/[\!\:\[\]\{\}]/)
186+
kind = :reserved
187+
elsif match = scan(/ [A-Za-z_][A-Za-z_\d]*;? /x)
188+
match =~ /([^;]+);?/
189+
kind = IDENT_KIND[$1]
190+
if match[/([^;]+);$/]
191+
encoder.text_token($1, kind)
192+
encoder.text_token(';', :delimiter)
193+
next
194+
end
195+
elsif match = scan(/(?: = | - | \+ | \{ | \} | \( | \) | && | \|\| | ;; | ! )/ox)
196+
kind = :operator
197+
elsif match = scan(/\s+/)
198+
kind = :space
199+
elsif match = scan(/[^ \$"'`\d]/)
200+
kind = :plain
201+
elsif match = scan(/.+/)
202+
# this shouldn't be :reserved for highlighting bad matches
203+
match, kind = handle_error(match, options)
204+
end
205+
elsif @state == :quote
206+
if (match = scan(/\\.?/))
207+
kind = :content
208+
elsif match = scan(/#{@quote}/)
209+
encoder.text_token(match, :delimiter)
210+
encoder.end_group :string
211+
@quote = nil
212+
@state = :initial
213+
next
214+
#kind = :symbol
215+
elsif match = scan(PRE_CONSTANTS)
216+
kind = :predefined_constant
217+
elsif match = scan(/ (?: \$\(\(.*?\)\) ) /x)
218+
kind = :global_variable
219+
elsif match = scan(/ \$\( /ox)
220+
encoder.begin_group :shell
221+
encoder.text_token(match, :delimiter)
222+
@quote_brace_shell += 1
223+
next
224+
elsif match = scan(/\)/)
225+
if @quote_brace_shell > 0
226+
encoder.text_token(match, :delimiter)
227+
encoder.end_group :shell
228+
@quote_brace_shell -= 1
229+
next
230+
else
231+
kind = :content
232+
end
233+
elsif match = scan(/ \$ (?: (?: \{ [^\}]* \}) | (?: [A-Za-z_0-9]+ ) ) /x)
234+
match =~ /(\$\{?)([^\}]*)(\}?)/
235+
pre=$1
236+
var=$2
237+
post=$3
238+
if var =~ /\[.*?\]/
239+
encoder.text_token(pre,:instance_variable)
240+
match_array(var, encoder)
241+
encoder.text_token(post,:instance_variable)
242+
next
243+
end
244+
kind = IDENT_KIND[match]
245+
kind = :instance_variable if kind == :ident
246+
elsif match = scan(/[^\)\$#{@quote}\\]+/)
247+
kind = :content
248+
else match = scan(/.+/)
249+
# this shouldn't be
250+
#kind = :reserved
251+
#raise match
252+
match, kind = handle_error(match, options)
253+
end
254+
end
255+
256+
match ||= matched
257+
encoder.text_token(match, kind)
258+
end
259+
260+
if @state == :quote
261+
encoder.end_group :string
262+
end
263+
264+
encoder
265+
end
266+
267+
268+
def match_array(match, encoder)
269+
match =~ /([A-Za-z_]+)\[(.*?)\]/
270+
var = $1
271+
key = $2
272+
kind = IDENT_KIND[var]
273+
kind = :instance_variable if kind == :ident
274+
encoder.text_token(var, kind)
275+
encoder.text_token("[", :operator)
276+
encoder.text_token(key, :key)
277+
encoder.text_token("]", :operator)
278+
end
279+
280+
def handle_error(match, options)
281+
o = {:ignore_errors => true}.merge(options)
282+
if o[:ignore_errors]
283+
[match, :plain]
284+
else
285+
[">>>>>#{match}<<<<<", :error]
286+
end
287+
end
288+
289+
end
290+
end
291+
end

0 commit comments

Comments
 (0)