forked from GDQuest/learn-gdscript
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMiniGDScriptTokenizer.gd
135 lines (116 loc) · 3.82 KB
/
MiniGDScriptTokenizer.gd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Simple and brutish tokenizer
# We only use it to test for recursive calls (at the moment)
# the way it works:
# 1. walk line by line
# 2. put each line through a bunch of regexes
# 3. if a regex match, extract a dict of values from the group, then
# 4. check if there is a custom parser method.
# - If yes, send the dict there
# - If no, append the dict to the current token list
# current token list is by default the top level, but can be changed. For example,
# when a function is found, it sets its "body" variable as `_current_scope`, which
# makes it so every subsequent line gets appended to its body
class_name MiniGDScriptTokenizer
const TOKEN_FUNC_DECLARATION := "function_declaration"
const TOKEN_FUNC_CALL := "function_call"
var _lines := PoolStringArray()
var _index := 0
var _current_line := ""
var tokens := []
var _current_scope := []
var indent_regex := RegEx.new()
var _available_tokens := {
TOKEN_FUNC_DECLARATION: "^func\\s+(?<func_name>[a-zA-Z_].*?)(?:\\(\\s*(?:(?<args>[^)]+)[,)])*|\\):)",
TOKEN_FUNC_CALL: "\\t.*?\\s?(?<func_name>[a-zA-Z_][a-zA-Z0-9_]+)\\(\\s*(?<params>.*?)\\s*\\)"
}
func _init(text: String) -> void:
_lines = text.split("\n")
indent_regex.compile('^(\\s|\\t)')
for token_type in _available_tokens:
var pattern: String = _available_tokens[token_type]
var regex := RegEx.new()
regex.compile(pattern)
_available_tokens[token_type] = regex
_current_scope = tokens
tokenize()
func _process_function_declaration(token: Dictionary):
var parameters_list: PoolStringArray = token.get("args", "").split(",")
var parameters := []
for tuple_str in parameters_list:
var tuple: PoolStringArray = tuple_str.split(":")
var param := {
"name": "",
"type": "",
"default": "",
"required": true
}
param.name = tuple[0].strip_edges()
if tuple.size() > 1:
var type := tuple[1].strip_edges().split("=")
param.type = type[0].strip_edges()
if type.size() > 1:
param.default = type[1].strip_edges()
param.required = false
if param.name != "":
parameters.append(param)
token["args"] = parameters
var body := []
token["body"] = body
_current_scope = body
tokens.append(token)
_index += 1
func _test_regex(type: String, regex: RegEx, line: String) -> bool:
var m := regex.search(line)
if m == null or m.names.size() == 0:
return false
var token := {
"type": type
}
for group_name in m.names:
token[group_name] = m.get_string(group_name)
var further_process_method_name := "_process_%s"%[type]
if has_method(further_process_method_name):
call(further_process_method_name, token)
else:
_current_scope.append(token)
_index += 1
return true
func _tokenize_line(line: String) -> bool:
for token_type in _available_tokens:
var regex := _available_tokens[token_type] as RegEx
var found := _test_regex(token_type, regex, line)
if found:
return true
return false
func tokenize():
_index = 0
var size := _lines.size()
while _index < size:
_current_line = _lines[_index]
# skip comments early, we don't care
if _current_line.begins_with("#"):
_index += 1
continue
var is_indented := indent_regex.search(_current_line)
# any line at the root level, apart for a comment, resets the context
if is_indented == null:
_current_scope = tokens
var found := _tokenize_line(_current_line)
if not found:
# couldn't find any token, increment _index
_index += 1
###############################################################################
#
# Analysis Utilities
#
# If there is one recursive function, this function returns its name
func find_any_recursive_function() -> String:
for token in tokens:
if token.type == TOKEN_FUNC_DECLARATION:
for sub_token in token.body:
if sub_token.type == TOKEN_FUNC_CALL:
if sub_token.func_name == token.func_name:
return token.func_name
return ""
func _to_string():
return JSON.print(tokens, " ")