From c5b3cf9f43ca4cf7d262ea901fe2479dcd19b3a9 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sat, 21 Jul 2012 15:23:41 -0400 Subject: [PATCH 01/31] Adding asm reader program, currently just reads and spits out assembly files (minus comments)... will be the basis for an optimizer --- asm-reader.sch | 291 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 asm-reader.sch diff --git a/asm-reader.sch b/asm-reader.sch new file mode 100644 index 0000000..5f6bb39 --- /dev/null +++ b/asm-reader.sch @@ -0,0 +1,291 @@ +(define is-upcase-letter? (lambda (c) (and (char>=? c #\A) (char<=? c #\Z)))) +(define char-is-digit? (lambda (c) (if (char>=? c #\0) (char<=? c #\9) #f))) +(define char-is-hexdigit? (lambda (c) (if (or (and (char>=? c #\0) (char<=? c #\9)) + (and (char>=? c #\a) (char<=? c #\f)) + (and (char>=? c #\A) (char<=? c #\F))) + #t + #f))) +(define is-space? (lambda (c) (if (eof-object? c) #t + (if (char=? c #\space) #t + (if (char=? c #\tab) #t + (if (char=? c #\newline) #t #f)))))) +(define char->digit + (lambda (c) + (if (char=? c #\0) 0 + (if (char=? c #\1) 1 + (if (char=? c #\2) 2 + (if (char=? c #\3) 3 + (if (char=? c #\4) 4 + (if (char=? c #\5) 5 + (if (char=? c #\6) 6 + (if (char=? c #\7) 7 + (if (char=? c #\8) 8 + (if (char=? c #\9) 9 #f)))))))))))) +(define char->hexdigit + (lambda (c) + (if (char-is-digit? c) (char->digit c) + (if (or (char=? c #\a) (char=? c #\A)) 10 + (if (or (char=? c #\b) (char=? c #\B)) 11 + (if (or (char=? c #\c) (char=? c #\C)) 12 + (if (or (char=? c #\d) (char=? c #\D)) 13 + (if (or (char=? c #\e) (char=? c #\E)) 14 + (if (or (char=? c #\f) (char=? c #\F)) 15 #f))))))))) + +(define asm-read-num + (lambda (c) + (letrec ((dec-helper (lambda (acc sign c) + (if (char-is-digit? c) + (dec-helper (+ (* 10 acc) (char->digit c)) + sign + (read-char)) + (cons (* acc sign) (read-char))))) + (hex-helper (lambda (acc c) + (if (char-is-hexdigit? c) + (hex-helper (+ (* 16 acc) (char->hexdigit c)) + (read-char)) + (cons acc (read-char)))))) + (if (char=? c #\0) + (let ((next (read-char))) + (if (or (char=? next #\x) (char=? next #\X)) + (hex-helper 0 (read-char)) + (dec-helper 0 1 next))) + (if (char=? #\+ c) + (dec-helper 0 1 (read-char)) + (if (char=? #\- c) + (dec-helper 0 -1 (read-char)) + (dec-helper 0 1 c))))))) + +(define asm-read-identifier + (lambda (c) + (letrec ((helper (lambda (acc c) + (if (or (and (char<=? #\a c) (char<=? c #\z)) + (and (char<=? #\A c) (char<=? c #\Z)) + (and (char<=? #\0 c) (char<=? c #\9)) + (char=? c #\_)) + (helper (cons c acc) (read-char)) + (cons (list->string (reverse acc)) c))))) + (if (or (and (char<=? #\a c) (char<=? c #\z)) + (and (char<=? #\A c) (char<=? c #\Z)) + (char=? c #\_)) + (helper (list c) (read-char)) + (begin + (display "ERROR: reading identifier, expected [a-zA-Z_]. Got '") + (display c) + (display "'\n") + (quit)))))) + +(define asm-reader-state-number + (lambda (c) + (let ((n (asm-read-num c))) + (cons (cons "number" (car n)) (cdr n))))) + +(define asm-reader-state-pointer + (lambda (c) + (let ((base (asm-read-num c))) + (if (char=? (cdr base) #\,) + (let ((offset (asm-read-num (cdr base)))) + (cons (list "pointer" (car base) (car offset)) + (cdr offset))) + (begin + (display "Error reading pointer literal, expected ',' got: ") + (display (cdr base)) + (quit)))))) + +(define asm-reader-state-vm-const + (lambda (c) + (let ((n (asm-read-num c))) + (cons (cons "vm-const" (car n)) (cdr n))))) + +(define asm-reader-state-lang-const + (lambda (c) + (let ((n (asm-read-num c))) + (cons (cons "lang-const" (car n)) (cdr n))))) + +(define asm-reader-state-string + (lambda (c) + (letrec ((state-unescaped + (lambda (acc c) + (if (eof-object? c) + (begin + (display "ERROR: EOF Encountered while scanning string\n") + (quit)) + (if (char=? #\" c) + (cons (reverse acc) (read-char)) + (if (char=? #\\ c) + (state-escaped (cons c acc) (read-char)) + (state-unescaped (cons c acc) (read-char))))))) + (state-escaped + (lambda (acc c) + (if (eof-object? c) + (begin + (display "ERROR: EOF Encountered while scanning string\n") + (quit)) + (state-unescaped (cons c acc) (read-char)))))) + (let ((r (state-unescaped nil c))) + (cons (cons "string" (list->string (car r))) (cdr r)))))) + +(define asm-reader-state-char + (lambda (c) + (let ((the-char (if (char=? c #\\) + (let ((cc (read-char))) + (if (char=? cc #\t) #\tab + (if (char=? cc #\n) #\newline + (if (char=? cc #\') #\' + (if (char=? cc #\") #\" + (if (char=? cc #\\) #\\ + (begin (display "Unknown escaped sequence: \\") + (display cc) + (quit)))))))) + c))) + (let ((close-q (read-char))) + (if (char=? close-q #\') + (cons (cons "char" the-char) (read-char)) + (begin + (display "ERROR: while reading character. Expected close ' got: ") + (display close-q) + (quit))))))) + +(define asm-reader-state-comment + (lambda (c) + (if (char=? c #\newline) + (asm-reader-state-entrance (read-char)) + (asm-reader-state-comment (read-char))))) + +(define asm-reader-state-semicolon + (lambda (c) + (if (char=? c #\;) + (asm-reader-state-comment (read-char)) + (begin + (display "ERROR: expected semicolon, got '") + (display c) + (display "'\n") + (quit))))) + +(define asm-reader-state-definition + (lambda (c) + (let ((ident (asm-read-identifier c))) + (if (char=? (cdr ident) #\,) + (let ((size (asm-read-num (read-char)))) + (cons (cons "definition" (cons (car ident) (car size))) + (cdr size))) + (cons (cons "definition" (cons (car ident) nil)) + (cdr ident)))))) + +(define asm-reader-state-reference + (lambda (c) + (let ((ident (asm-read-identifier c))) + (cons (cons "reference" (car ident)) + (cdr ident))))) + + +(define asm-reader-state-instruction + (lambda (c) + (letrec ((helper (lambda (acc c) + (if (is-upcase-letter? c) + (helper (cons c acc) (read-char)) + (cons (reverse acc) c))))) + (let ((i (helper (list c) (read-char)))) + (cons (cons "instruction" (list->string (car i))) (cdr i)))))) + +(define asm-reader-state-entrance + (lambda (c) + (if (eof-object? c) #f + (if (is-space? c) + (asm-reader-state-entrance (read-char)) + (if (char=? c #\n) + (asm-reader-state-number (read-char)) + (if (char=? c #\p) + (asm-reader-state-pointer (read-char)) + (if (char=? c #\v) + (asm-reader-state-vm-const (read-char)) + (if (char=? c #\l) + (asm-reader-state-lang-const (read-char)) + (if (char=? c #\') + (asm-reader-state-char (read-char)) + (if (char=? c #\;) + (asm-reader-state-semicolon (read-char)) + (if (char=? c #\#) + (asm-reader-state-comment (read-char)) + (if (char=? c #\") + (asm-reader-state-string (read-char)) + (if (char=? c #\:) + (asm-reader-state-definition (read-char)) + (if (char=? c #\@) + (asm-reader-state-reference (read-char)) + (asm-reader-state-instruction c))))))))))))))) + + + +(define look-ahead #f) +(define asm-next-token + (lambda () + (if (not look-ahead) + (begin + (set! look-ahead (read-char)) + (asm-next-token)) + (let ((z (asm-reader-state-entrance look-ahead))) + (if z + (begin + (set! look-ahead (cdr z)) + (car z)) + z))))) + +(define display-char-asm + (lambda (c) + (display #\') + (display + (if (char=? c #\') "\\'" + (if (char=? c #\tab) "\\t" + (if (char=? c #\newline) "\\n" + (if (char=? c #\\) "\\\\" c))))) + (display #\'))) + +(define print-asm-tok + (lambda (asm-tok) + (let ((typ (car asm-tok)) + (val (cdr asm-tok))) + (if (string=? typ "number") + (begin + (display #\n) + (display val)) + (if (string=? typ "vm-const") + (begin (display #\v) (display val)) + (if (string=? typ "lang-const") + (begin (display #\l) (display val)) + (if (string=? typ "pointer") + (begin + (display #\p) + (display (car val)) + (display #\,) + (display (cdr val))) + (if (string=? typ "string") + (begin (display #\") (display val) (display #\")) + (if (string=? typ "char") + (display-char-asm val) + (if (string=? typ "reference") + (begin (display #\@) (display val)) + (if (string=? typ "definition") + (begin (display #\:) + (display (car val)) + (if (not (null? (cdr val))) + (begin (display #\,) (display (cdr val))) #f)) + (if (string=? typ "instruction") + (display val) + (begin + (display "Unknown token type: ") + (display typ) + (quit)))))))))))))) + +(define go + (lambda () + (let ((tok (asm-next-token))) + (if tok + (begin + ;; (display tok) + ;; (newline) + (print-asm-tok tok) + (newline) + (go)) nil)))) + +(go) + \ No newline at end of file From 8cb24c9c65ad6cd3b1953b05b9aaf1c05152c132 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 23 Nov 2014 09:43:38 -0500 Subject: [PATCH 02/31] implementing some peephole optimizations (renamed asm-reader to peephole). some tweaks to lib and compiler to support --- lib.sch | 9 +- asm-reader.sch => peephole.sch | 240 ++++++++++++++++++++++++++++++--- schemer.sch | 71 +++++++--- 3 files changed, 285 insertions(+), 35 deletions(-) rename asm-reader.sch => peephole.sch (53%) diff --git a/lib.sch b/lib.sch index eaf8b31..5a052fd 100644 --- a/lib.sch +++ b/lib.sch @@ -28,6 +28,8 @@ (define cdar (lambda (l) (cdr (car l)))) (define cddr (lambda (l) (cdr (cdr l)))) (define caddr (lambda (l) (car (cddr l)))) +(define cdddr (lambda (l) (cdr (cddr l)))) +(define cadddr (lambda (l) (car (cdddr l)))) (define >= (lambda (n1 n2) (if (= n2 n1) #t (< n2 n1)))) (define char<=? (lambda (c1 c2) (if (char=? c1 c2) #t (char? (lambda (c1 c2) (charstring-helper n nil) (string-append "-" (number->string-helper (* n -1) nil)))))) - \ No newline at end of file + +(define assoc + (lambda (key alist) + (if (null? alist) #f + (if (string=? key (caar alist)) (car alist) + (assoc key (cdr alist)))))) diff --git a/asm-reader.sch b/peephole.sch similarity index 53% rename from asm-reader.sch rename to peephole.sch index 5f6bb39..1e03772 100644 --- a/asm-reader.sch +++ b/peephole.sch @@ -1,3 +1,49 @@ +(define ins-push "PUSH") +(define ins-pop "POP") +(define ins-swap "SWAP") +(define ins-dup "DUP") +(define ins-rot "ROT") +(define ins-call "CALL") +(define ins-ret "RET") +(define ins-jmp "JMP") +(define ins-jtrue "JTRUE") +(define ins-end "END") +(define ins-add "ADD") +(define ins-sub "SUB") +(define ins-eq "EQ") +(define ins-lt "LT") +(define ins-stor "STOR") +(define ins-load "LOAD") +(define ins-aloc "ALOC") +(define ins-rdrr "RDRR") +(define ins-wtrr "WTRR") +(define ins-isnum "ISNUM") +(define ins-isptr "ISPTR") +(define ins-mul "MUL") +(define ins-div "DIV") +(define ins-mod "MOD") +(define ins-shl "SHL") +(define ins-shr "SHR") +(define ins-bor "BOR") +(define ins-band "BAND") +(define ins-getc "GETC") +(define ins-dump "DUMP") +(define ins-pint "PINT") +(define ins-pchr "PCHR") +(define ins-islconst "ISLCONST") +(define ins-ischr "ISCHR") +(define ins-isins "ISINS") + +(define tag-instruction "instruction") +(define tag-number "number") +(define tag-pointer "pointer") +(define tag-vm-const "vm-const") +(define tag-lang-const "lang-const") +(define tag-string "string") +(define tag-char "char") +(define tag-definition "definition") +(define tag-reference "reference") + (define is-upcase-letter? (lambda (c) (and (char>=? c #\A) (char<=? c #\Z)))) (define char-is-digit? (lambda (c) (if (char>=? c #\0) (char<=? c #\9) #f))) (define char-is-hexdigit? (lambda (c) (if (or (and (char>=? c #\0) (char<=? c #\9)) @@ -77,14 +123,14 @@ (define asm-reader-state-number (lambda (c) (let ((n (asm-read-num c))) - (cons (cons "number" (car n)) (cdr n))))) + (cons (cons tag-number (car n)) (cdr n))))) (define asm-reader-state-pointer (lambda (c) (let ((base (asm-read-num c))) (if (char=? (cdr base) #\,) (let ((offset (asm-read-num (cdr base)))) - (cons (list "pointer" (car base) (car offset)) + (cons (list tag-pointer (car base) (car offset)) (cdr offset))) (begin (display "Error reading pointer literal, expected ',' got: ") @@ -94,12 +140,12 @@ (define asm-reader-state-vm-const (lambda (c) (let ((n (asm-read-num c))) - (cons (cons "vm-const" (car n)) (cdr n))))) + (cons (cons tag-vm-const (car n)) (cdr n))))) (define asm-reader-state-lang-const (lambda (c) (let ((n (asm-read-num c))) - (cons (cons "lang-const" (car n)) (cdr n))))) + (cons (cons tag-lang-const (car n)) (cdr n))))) (define asm-reader-state-string (lambda (c) @@ -121,8 +167,8 @@ (display "ERROR: EOF Encountered while scanning string\n") (quit)) (state-unescaped (cons c acc) (read-char)))))) - (let ((r (state-unescaped nil c))) - (cons (cons "string" (list->string (car r))) (cdr r)))))) + (let ((r (state-unescaped '() c))) + (cons (cons tag-string (list->string (car r))) (cdr r)))))) (define asm-reader-state-char (lambda (c) @@ -139,7 +185,7 @@ c))) (let ((close-q (read-char))) (if (char=? close-q #\') - (cons (cons "char" the-char) (read-char)) + (cons (cons tag-char the-char) (read-char)) (begin (display "ERROR: while reading character. Expected close ' got: ") (display close-q) @@ -166,15 +212,15 @@ (let ((ident (asm-read-identifier c))) (if (char=? (cdr ident) #\,) (let ((size (asm-read-num (read-char)))) - (cons (cons "definition" (cons (car ident) (car size))) + (cons (cons tag-definition (cons (car ident) (car size))) (cdr size))) - (cons (cons "definition" (cons (car ident) nil)) + (cons (cons tag-definition (cons (car ident) '())) (cdr ident)))))) (define asm-reader-state-reference (lambda (c) (let ((ident (asm-read-identifier c))) - (cons (cons "reference" (car ident)) + (cons (cons tag-reference (car ident)) (cdr ident))))) @@ -185,7 +231,7 @@ (helper (cons c acc) (read-char)) (cons (reverse acc) c))))) (let ((i (helper (list c) (read-char)))) - (cons (cons "instruction" (list->string (car i))) (cdr i)))))) + (cons (cons tag-instruction (list->string (car i))) (cdr i)))))) (define asm-reader-state-entrance (lambda (c) @@ -276,16 +322,172 @@ (display typ) (quit)))))))))))))) +(define print-basic-block + (lambda (l) + (if (null? l) #t (begin + (print-asm-tok (car l)) + (newline) + (print-basic-block (cdr l)))))) + +(define is-jmp-instruction? + (lambda (tok) + (and (string=? (car tok) tag-instruction) + (let ((val (cdr tok))) + (or (string=? val ins-jmp) + (string=? val ins-jtrue) + (string=? val ins-call) + (string=? val ins-ret) + (string=? val ins-end)))))) + +(define is-mem-instr? + (lambda (tok) + (and (string=? (car tok) tag-instruction) + (let ((val (cdr tok))) + (or (string=? val ins-aloc) + (string=? val ins-stor)))))) + +(define is-side-effecting-instr? + (lambda (tok) + (and (string=? (car tok) tag-instruction) + (let ((val (cdr tok))) + (or (string=? val ins-aloc) + (string=? val ins-stor)))))) + +(define is-number? (lambda (tok) (string=? (car tok) tag-number))) + +(define ins-has-immediate? + (lambda (ins) + (and (string=? (car ins) tag-instruction) + (string=? (cdr ins) ins-push)))) + +(define is-definition? + (lambda (tok) + (string=? (car tok) tag-definition))) + +(define asm-read-basic-block + (lambda () + (letrec ((helper (lambda (acc) + (let ((tok (asm-next-token))) + (if (not tok) + (reverse acc) + (if (or (is-jmp-instruction? tok) + (is-definition? tok)) + (reverse (cons tok acc)) + (helper (cons tok acc)))))))) + (helper '())))) + +(define is-push-instr? + (lambda (tok) (and (string=? (car tok) tag-instruction) + (string=? (cdr tok) ins-push)))) +(define is-pop-instr? + (lambda (tok) (and (string=? (car tok) tag-instruction) + (string=? (cdr tok) ins-pop)))) +(define is-swap-instr? + (lambda (tok) (and (string=? (car tok) tag-instruction) + (string=? (cdr tok) ins-swap)))) + +(define is-binop-instr? + (lambda (tok) (and (string=? (car tok) tag-instruction) + (or (string=? (cdr tok) ins-add) + (string=? (cdr tok) ins-sub) + (string=? (cdr tok) ins-eq) + (string=? (cdr tok) ins-lt) + (string=? (cdr tok) ins-mul) + (string=? (cdr tok) ins-div) + (string=? (cdr tok) ins-mod) + (string=? (cdr tok) ins-shl) + (string=? (cdr tok) ins-shr) + (string=? (cdr tok) ins-bor) + (string=? (cdr tok) ins-band) + )))) + +(define split + (letrec ((helper (lambda (l n a) + (if (= n 0) (cons (reverse a) l) + (helper (cdr l) (- n 1) (cons (car l) a)))))) + (lambda (l n) (helper l n '())))) + +(define fold (lambda (f s l) (if (null? l) s (fold f (f s (car l)) (cdr l))))) + +(define peephole + (let ((optimizers (list + ;; (push x pop) -> () + (list 3 (lambda (xs) (and (is-push-instr? (car xs)) + (is-pop-instr? (caddr xs)))) + (lambda (xs ys) ys)) + ;; (swap swap) -> () + (list 2 (lambda (xs) (and (is-swap-instr? (car xs)) + (is-swap-instr? (cadr xs)))) + (lambda (xs ys) ys)) + ;; (binop pop) -> (pop pop) + (list 2 (lambda (xs) (and (is-binop-instr? (car xs)) + (is-pop-instr? (cadr xs)))) + (lambda (xs ys) (cons (cons tag-instruction ins-pop) + (cons (cons tag-instruction ins-pop) ys)))) + ;; (push x push y add) + (list 5 (lambda (xs) (and (is-push-instr? (car xs)) + (is-number? (cadr xs)) + (is-push-instr? (caddr xs)) + (is-number? (cadddr xs)) + (is-binop-instr? (cadr (cdddr xs))))) + (lambda (xs ys) + (let ((n0 (cdr (cadr xs))) + (n1 (cdr (cadddr xs))) + (op (cdr (cadr (cdddr xs))))) + (append + (list (cons tag-instruction ins-push) + (cons tag-number + ((cdr (assoc op (list (cons ins-add +) + (cons ins-sub -) + (cons ins-eq =) + (cons ins-lt <) + (cons ins-mul *) + (cons ins-div quotient) + (cons ins-mod remainder) + (cons ins-shl ash) + (cons ins-shr (lambda (a b) (ash a (- 0 b)))) + (cons ins-bor logior) + (cons ins-band logand)))) n0 n1))) + ys))))))) + (letrec ((helper + (lambda (changed bb acc) + (if (= (length bb) 0) (cons changed (reverse acc)) + (let ((changed-block (fold (lambda (changed-block optimizer) + (let ((changed (car changed-block)) + (bb (cdr changed-block)) + (len (car optimizer)) + (check (cadr optimizer)) + (repl (caddr optimizer))) + (if (>= (length bb) len) + (let ((s (split bb len))) + (let ((xs (car s)) + (ys (cdr s))) + (if (check xs) + (cons #t (repl xs ys)) + (cons changed bb)))) + (cons changed bb)))) + (cons changed bb) + optimizers))) + (let ((changed (car changed-block)) + (bb (cdr changed-block))) + (if (null? bb) (cons changed (reverse acc)) + (helper changed (cdr bb) (cons (car bb) acc))))))))) + (lambda (bb) + (let ((changed-block (helper #f bb '()))) + (if (car changed-block) + (peephole (cdr changed-block)) + (cdr changed-block))))))) + + (define go (lambda () - (let ((tok (asm-next-token))) - (if tok + (let ((bb (peephole (asm-read-basic-block)))) + (if (not (null? bb)) (begin - ;; (display tok) - ;; (newline) - (print-asm-tok tok) - (newline) - (go)) nil)))) + (display ";; basic block\n") + (print-basic-block bb) + (go)) + '())))) (go) - \ No newline at end of file + diff --git a/schemer.sch b/schemer.sch index c7f9656..3c1f604 100644 --- a/schemer.sch +++ b/schemer.sch @@ -65,18 +65,19 @@ (define ins-wtrr "WTRR") (define ins-isnum "ISNUM") (define ins-isptr "ISPTR") +(define ins-shl "SHL") +(define ins-shr "SHR") +(define ins-mul "MUL") +(define ins-bor "BOR") +(define ins-band "BAND") ;; The instructions are all used at most once in the compiler so its ;; cheaper to include them as string literals then to clutter up the ;; environment with them. -;; (define ins-mul "MUL") + ;; (define ins-div "DIV") ;; (define ins-mod "MOD") -;; (define ins-shl "SHL") -;; (define ins-shr "SHR") -;; (define ins-bor "BOR") -;; (define ins-band "BAND") ;; (define ins-getc "GETC") ;; (define ins-dump "DUMP") ;; (define ins-pint "PINT") @@ -148,13 +149,9 @@ ; search the list of builtin forms for a ; particular form (define find-special - (lambda (f) - (letrec ((helper (lambda (ss) - (if (null? ss) #f - (if (string=? f (car (car ss))) - (cdr (car ss)) - (helper (cdr ss))))))) - (helper (special-forms))))) + (lambda (f) + (let ((x (assoc f (special-forms)))) + (and x (cdr x))))) ; The top-level-env is a list containing the list of symbols ; defined by the compiler at the top level. @@ -202,7 +199,12 @@ ("-" "subtract") ("*" "multiply") ("%" "modulo") - ("/" "divide") + ("/" "divide") + ("quotient" "divide") + ("remainder" "modulo") + ("ash" "arithmetic_shift") + ("logior" "logior") + ("logand" "logand") ("print-char" "print_char") ("print-num" "print_num") ("string?" "string_q") @@ -357,7 +359,10 @@ (define u-call-cdr (lambda () (assembly-cdr))) ; same with cdr. (define u-call-cons ; cons is really big (13 instructions)! we'll never inline it - (lambda () (append-instructions ins-push (asm-label-reference "__u_cons") ins-call))) + (lambda () + ;; (assembly-cons) + (append-instructions ins-push (asm-label-reference "__u_cons") ins-call) + )) (define u-call-set-car (lambda () (assembly-set-car))) (define u-call-set-cdr (lambda () (assembly-set-cdr))) @@ -1084,7 +1089,43 @@ (append-instruction "MOD") (assembly-funret)) - ; equality comparison + (let ((shl-label (fresh-label)) + (out-label (fresh-label))) + (assembly-builtin-header "arithmetic_shift") + (assembly-get-arg 0) + (assembly-get-arg 1) + (append-instructions + ins-dup + ins-push (asm-number 0) + ins-lt + ins-push (asm-label-reference shl-label) + ins-jtrue + ins-push (asm-number -1) + ins-mul + ins-shr + ins-push (asm-label-reference out-label) + ins-jmp + (asm-label-definition shl-label) + ins-shl + (asm-label-definition out-label) + ) + (assembly-funret)) + + (begin + (assembly-builtin-header "logior") + (assembly-get-arg 0) + (assembly-get-arg 1) + (append-instruction ins-bor) + (assembly-funret)) + + (begin + (assembly-builtin-header "logand") + (assembly-get-arg 0) + (assembly-get-arg 1) + (append-instruction ins-band) + (assembly-funret)) + + ; equality comparison (begin (assembly-builtin-header "equal") (assembly-get-arg 0) From 0b3aac47910f7ce825f7661715a22444c922749f Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 23 Nov 2014 09:44:31 -0500 Subject: [PATCH 03/31] changing some unsigned int casts to uword in interpreter.c (64 bit fixes?) --- interpreter.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/interpreter.c b/interpreter.c index e3df001..6e9ab80 100644 --- a/interpreter.c +++ b/interpreter.c @@ -68,11 +68,11 @@ different VM types. The corresponding packers are defined in the interpreter.h header file. */ -#define PTR_TARGET(x) (((unsigned int)x) & PTR_TARGET_MASK) -#define PTR_SIZE(x) ((((unsigned int)x) >> PTR_TARGET_BITS) & PTR_SIZE_MASK) +#define PTR_TARGET(x) (((uword)x) & PTR_TARGET_MASK) +#define PTR_SIZE(x) ((((uword)x) >> PTR_TARGET_BITS) & PTR_SIZE_MASK) #define NUM_TO_NATIVE(x) ((typeof(x))((((int)x) << FLAG_BITS) >> FLAG_BITS)) #define CHAR_TO_NATIVE(x) ((char)((x) & 0xff)) -#define CELL_TYPE(x) (((unsigned int)x) & FLAG_MASK) +#define CELL_TYPE(x) (((uword)x) & FLAG_MASK) #define IS_NUM(x) (CELL_TYPE(x) == NUM) #define IS_PTR(x) (CELL_TYPE(x) == PTR) #define IS_LCONST(x) (CELL_TYPE(x) == LCONST) @@ -590,40 +590,40 @@ static inline void inspector(void){ int main(int argc, char *argv[]) { /* The long awaited array of instructions! */ - static long instructions[] = { - [I_PUSH] = (long)&&PUSH, [I_POP] = (long)&&POP, - [I_SWAP] = (long)&&SWAP, [I_DUP] = (long)&&DUP, - [I_ROT] = (long)&&ROT, + static void* instructions[] = { + [I_PUSH] = &&PUSH, [I_POP] = &&POP, + [I_SWAP] = &&SWAP, [I_DUP] = &&DUP, + [I_ROT] = &&ROT, /* Control flow */ - [I_CALL] = (long)&&CALL, - [I_RET] = (long)&&RET, [I_JMP] = (long)&&JMP, - [I_JTRUE] = (long)&&JTRUE, [I_END] = (long)&&END, + [I_CALL] = &&CALL, + [I_RET] = &&RET, [I_JMP] = &&JMP, + [I_JTRUE] = &&JTRUE, [I_END] = &&END, /* Arithmetic */ - [I_ADD] = (long)&&PLUS, [I_MUL] = (long)&&MUL, - [I_SUB] = (long)&&SUB, [I_DIV] = (long)&&DIV, - [I_MOD] = (long)&&MOD, [I_SHL] = (long)&&SHL, - [I_SHR] = (long)&&SHR, [I_BOR] = (long)&&BOR, - [I_BAND] = (long)&&BAND, + [I_ADD] = &&PLUS, [I_MUL] = &&MUL, + [I_SUB] = &&SUB, [I_DIV] = &&DIV, + [I_MOD] = &&MOD, [I_SHL] = &&SHL, + [I_SHR] = &&SHR, [I_BOR] = &&BOR, + [I_BAND] = &&BAND, /* Comparison */ - [I_EQ] = (long)&&EQ, [I_LT] = (long)&<, + [I_EQ] = &&EQ, [I_LT] = &<, /* Reading and writing memory */ - [I_STOR] = (long)&&STOR, [I_LOAD] = (long)&&LOAD, [I_ALOC] = (long)&&ALOC, + [I_STOR] = &&STOR, [I_LOAD] = &&LOAD, [I_ALOC] = &&ALOC, /* I/0 */ - [I_GETC] = (long)&&GETC, [I_DUMP] = (long)&&DUMP, - [I_PINT] = (long)&&PINT, [I_PCHR] = (long)&&PCHR, + [I_GETC] = &&GETC, [I_DUMP] = &&DUMP, + [I_PINT] = &&PINT, [I_PCHR] = &&PCHR, /* Root Register manipulation */ - [I_RDRR] = (long)&&RDRR, [I_WTRR] = (long)&&WTRR, + [I_RDRR] = &&RDRR, [I_WTRR] = &&WTRR, /* Type checking */ - [I_ISNUM] = (long)&&ISNUM, [I_ISLCONST] = (long)&&ISLCONST, - [I_ISPTR] = (long)&&ISPTR, [I_ISBOOL] = (long)&&ISBOOL, - [I_ISCHR] = (long)&&ISCHR, [I_ISINS] = (long)&&ISINS + [I_ISNUM] = &&ISNUM, [I_ISLCONST] = &&ISLCONST, + [I_ISPTR] = &&ISPTR, [I_ISBOOL] = &&ISBOOL, + [I_ISCHR] = &&ISCHR, [I_ISINS] = &&ISINS }; /* We first do some basic startup stuff to load the program */ From 29485b38ccd48a38d027183a76da933e9ba08b2d Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 23 Nov 2014 12:48:36 -0500 Subject: [PATCH 04/31] simplify matching of instr sequences and add peephole optimization for consecutive references to first level env --- peephole.sch | 86 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 69 insertions(+), 17 deletions(-) diff --git a/peephole.sch b/peephole.sch index 1e03772..e80b836 100644 --- a/peephole.sch +++ b/peephole.sch @@ -409,6 +409,37 @@ (define fold (lambda (f s l) (if (null? l) s (fold f (f s (car l)) (cdr l))))) +(define matches + (lambda (pat seq) + (if (null? pat) #t + (if (null? seq) #f + (and ((car pat) (car seq)) + (matches (cdr pat) (cdr seq))))))) + +(define mkinstr (lambda (i) (cons tag-instruction i))) +(define mknum (lambda (n) (cons tag-number n))) + +(define inliners + (list (cons ins-add +) + (cons ins-sub -) + (cons ins-eq =) + (cons ins-lt <) + (cons ins-mul *) + (cons ins-div quotient) + (cons ins-mod remainder) + (cons ins-shl ash) + (cons ins-shr (lambda (a b) (ash a (- 0 b)))) + (cons ins-bor logior) + (cons ins-band logand))) + +(define is-rdrr-instr? (lambda (tok) + (and (string=? tag-instruction (car tok) ) + (string=? ins-rdrr (cdr tok))))) + +(define is-load-instr? (lambda (tok) + (and (string=? tag-instruction (car tok) ) + (string=? ins-load (cdr tok))))) + (define peephole (let ((optimizers (list ;; (push x pop) -> () @@ -425,11 +456,11 @@ (lambda (xs ys) (cons (cons tag-instruction ins-pop) (cons (cons tag-instruction ins-pop) ys)))) ;; (push x push y add) - (list 5 (lambda (xs) (and (is-push-instr? (car xs)) - (is-number? (cadr xs)) - (is-push-instr? (caddr xs)) - (is-number? (cadddr xs)) - (is-binop-instr? (cadr (cdddr xs))))) + (list 5 (lambda (xs) (matches (list is-push-instr? + is-number? + is-push-instr? + is-number? + is-binop-instr?) xs)) (lambda (xs ys) (let ((n0 (cdr (cadr xs))) (n1 (cdr (cadddr xs))) @@ -437,18 +468,39 @@ (append (list (cons tag-instruction ins-push) (cons tag-number - ((cdr (assoc op (list (cons ins-add +) - (cons ins-sub -) - (cons ins-eq =) - (cons ins-lt <) - (cons ins-mul *) - (cons ins-div quotient) - (cons ins-mod remainder) - (cons ins-shl ash) - (cons ins-shr (lambda (a b) (ash a (- 0 b)))) - (cons ins-bor logior) - (cons ins-band logand)))) n0 n1))) - ys))))))) + ((cdr (assoc op inliners)) n0 n1))) + ys)))) + ;; accessing two local variables in a row + ;; (RDRR PUSH n0 LOAD PUSH x LOAD RDRR PUSH n0 LOAD) -> + ;; (RDRR PUSH n0 LOAD DUP PUSH x LOAD SWAP) + (list 11 (lambda (xs) + (matches (list is-rdrr-instr? + is-push-instr? + (lambda (x) (and (is-number? x) + (= (cdr x) 0))) + is-load-instr? + is-push-instr? + is-number? + is-load-instr? + is-rdrr-instr? + is-push-instr? + (lambda (x) (and (is-number? x) + (= (cdr x) 0))) + is-load-instr?) xs)) + (lambda (xs ys) (let ((off (car (cddr (cdddr xs))))) + (append + (list (mkinstr ins-rdrr) + (mkinstr ins-push) + (mknum 0) + (mkinstr ins-load) + (mkinstr ins-dup) + (mkinstr ins-push) + off + (mkinstr ins-load) + (mkinstr ins-swap)) + ys))) + )))) + (letrec ((helper (lambda (changed bb acc) (if (= (length bb) 0) (cons changed (reverse acc)) From 2760152d216bc32b0447fded24202af0b640903f Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 23 Nov 2014 13:02:05 -0500 Subject: [PATCH 05/31] add makefile target for safe-interpreter and add peephole.bytecode to default targets --- Makefile | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index eb366a2..d04c0fb 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ # CFLAGS=-g CFLAGS ?= -O3 -all : schemer.bytecode interpreter trace-interpreter assembler +all : schemer.bytecode peephole.bytecode interpreter trace-interpreter safe-interpreter assembler interpreter : interpreter.o gcc ${CFLAGS} -o interpreter interpreter.o @@ -27,10 +27,16 @@ interpreter.o : interpreter.c interpreter.h gcc ${CFLAGS} -c interpreter.c trace-interpreter : trace-interpreter.o - gcc ${CFLAGS} -o trace-interpreter -D__TRACE__ trace-interpreter.o + gcc ${CFLAGS} -o trace-interpreter trace-interpreter.o trace-interpreter.o : interpreter.c interpreter.h - gcc -c ${CFLAGS} -o trace-interpreter.o -D__TRACE__ interpreter.c + gcc -c ${CFLAGS} -o trace-interpreter.o -D__CHECK_INS__ -D__TRACE__ interpreter.c + +safe-interpreter : safe-interpreter.o + gcc ${CFLAGS} -o safe-interpreter safe-interpreter.o + +safe-interpreter.o : interpreter.c interpreter.h + gcc -c ${CFLAGS} -o safe-interpreter.o -D__CHECK_INS__ interpreter.c assembler : assembler.yy.o gcc ${CFLAGS} -o assembler assembler.yy.o @@ -66,4 +72,4 @@ clean : rm -f *~ assembler.yy.c *.o *.asm *-bootstrap* distclean : clean - rm -f interpreter trace-interpreter assembler *.asm *.bytecode \ No newline at end of file + rm -f interpreter trace-interpreter safe-interpreter assembler *.asm *.bytecode From e16c61c426211abde92688f7cca88079aa341618 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 23 Nov 2014 13:03:43 -0500 Subject: [PATCH 06/31] whitespace fixups in peephole --- peephole.sch | 58 ++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/peephole.sch b/peephole.sch index e80b836..b68cd8a 100644 --- a/peephole.sch +++ b/peephole.sch @@ -442,34 +442,34 @@ (define peephole (let ((optimizers (list - ;; (push x pop) -> () + ;; (push x pop) -> () (list 3 (lambda (xs) (and (is-push-instr? (car xs)) - (is-pop-instr? (caddr xs)))) - (lambda (xs ys) ys)) + (is-pop-instr? (caddr xs)))) + (lambda (xs ys) ys)) ;; (swap swap) -> () (list 2 (lambda (xs) (and (is-swap-instr? (car xs)) - (is-swap-instr? (cadr xs)))) - (lambda (xs ys) ys)) + (is-swap-instr? (cadr xs)))) + (lambda (xs ys) ys)) ;; (binop pop) -> (pop pop) (list 2 (lambda (xs) (and (is-binop-instr? (car xs)) - (is-pop-instr? (cadr xs)))) - (lambda (xs ys) (cons (cons tag-instruction ins-pop) - (cons (cons tag-instruction ins-pop) ys)))) + (is-pop-instr? (cadr xs)))) + (lambda (xs ys) (cons (cons tag-instruction ins-pop) + (cons (cons tag-instruction ins-pop) ys)))) ;; (push x push y add) (list 5 (lambda (xs) (matches (list is-push-instr? - is-number? - is-push-instr? - is-number? - is-binop-instr?) xs)) - (lambda (xs ys) - (let ((n0 (cdr (cadr xs))) - (n1 (cdr (cadddr xs))) - (op (cdr (cadr (cdddr xs))))) - (append - (list (cons tag-instruction ins-push) - (cons tag-number - ((cdr (assoc op inliners)) n0 n1))) - ys)))) + is-number? + is-push-instr? + is-number? + is-binop-instr?) xs)) + (lambda (xs ys) + (let ((n0 (cdr (cadr xs))) + (n1 (cdr (cadddr xs))) + (op (cdr (cadr (cdddr xs))))) + (append + (list (cons tag-instruction ins-push) + (cons tag-number + ((cdr (assoc op inliners)) n0 n1))) + ys)))) ;; accessing two local variables in a row ;; (RDRR PUSH n0 LOAD PUSH x LOAD RDRR PUSH n0 LOAD) -> ;; (RDRR PUSH n0 LOAD DUP PUSH x LOAD SWAP) @@ -490,14 +490,14 @@ (lambda (xs ys) (let ((off (car (cddr (cdddr xs))))) (append (list (mkinstr ins-rdrr) - (mkinstr ins-push) - (mknum 0) - (mkinstr ins-load) - (mkinstr ins-dup) - (mkinstr ins-push) - off - (mkinstr ins-load) - (mkinstr ins-swap)) + (mkinstr ins-push) + (mknum 0) + (mkinstr ins-load) + (mkinstr ins-dup) + (mkinstr ins-push) + off + (mkinstr ins-load) + (mkinstr ins-swap)) ys))) )))) From 6e4fdac50afb912ead728d17830378dc100c4688 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Tue, 25 Nov 2014 16:45:46 -0500 Subject: [PATCH 07/31] restructuring some of peephole.sch to support concise descriptions of each instruction (possibly broken) --- peephole.sch | 178 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 110 insertions(+), 68 deletions(-) diff --git a/peephole.sch b/peephole.sch index b68cd8a..18333c8 100644 --- a/peephole.sch +++ b/peephole.sch @@ -34,6 +34,108 @@ (define ins-ischr "ISCHR") (define ins-isins "ISINS") +(define instruction-infos + (list + ;; MNEMONIC BBTERM CONSUMES DELTA HAS-IMM STACKONLY + (list ins-push #f 0 1 #t #t) + (list ins-pop #f 1 -1 #f #t) + (list ins-swap #f 2 0 #f #t) + (list ins-dup #f 0 1 #f #t) + (list ins-rot #f 3 0 #f #t) + (list ins-add #f 2 -1 #f #t) + (list ins-sub #f 2 -1 #f #t) + (list ins-eq #f 2 -1 #f #t) + (list ins-lt #f 2 -1 #f #t) + (list ins-mul #f 2 -1 #f #t) + (list ins-div #f 2 -1 #f #t) + (list ins-mod #f 2 -1 #f #t) + (list ins-shl #f 2 -1 #f #t) + (list ins-shr #f 2 -1 #f #t) + (list ins-bor #f 2 -1 #f #t) + (list ins-band #f 2 -1 #f #t) + (list ins-getc #f 0 1 #f #t) + (list ins-dump #f 0 0 #f #t) + (list ins-pint #f 0 1 #f #t) + (list ins-pchr #f 0 1 #f #t) + + (list ins-stor #f 3 -2 #f #f) + (list ins-load #f 2 -1 #f #f) + (list ins-aloc #f 1 0 #f #f) + (list ins-rdrr #f 0 1 #f #f) + (list ins-wtrr #f 1 -1 #f #f) + (list ins-isnum #f 1 0 #f #t) + (list ins-isptr #f 1 0 #f #t) + (list ins-islconst #f 1 0 #f #t) + (list ins-ischr #f 1 0 #f #t) + (list ins-isins #f 1 0 #f #t) + + (list ins-call #t) + (list ins-ret #t) + (list ins-jmp #t) + (list ins-jtrue #t) + (list ins-end #t))) + + +(define is-instr? (lambda (tok) (string=? (car tok) tag-instruction))) +(define is-number? (lambda (tok) (string=? (car tok) tag-number))) +(define is-definition? (lambda (tok) (string=? (car tok) tag-definition))) + +(define lookup-instr-info (tok) + (assoc (cdr tok) instruction-infos)) + +(define is-terminal-instr?? + (lambda (tok) + (and (is-instr? tok) + (cadr (lookup-instr-info tok))))) + +(define is-push-instr? + (lambda (tok) (and (is-instr? tok) + (string=? (cdr tok) ins-push)))) + +(define is-pop-instr? + (lambda (tok) (and (is-instr? tok) + (string=? (cdr tok) ins-pop)))) +(define is-swap-instr? + (lambda (tok) (and (is-instr? tok) + (string=? (cdr tok) ins-swap)))) + +(define is-rdrr-instr? (lambda (tok) + (and (is-instr? tok) + (string=? ins-rdrr (cdr tok))))) + +(define is-load-instr? (lambda (tok) + (and (is-instr? tok) + (string=? ins-load (cdr tok))))) + +(define is-binop-instr? + (lambda (tok) (and (string=? (car tok) tag-instruction) + (or (string=? (cdr tok) ins-add) + (string=? (cdr tok) ins-sub) + (string=? (cdr tok) ins-eq) + (string=? (cdr tok) ins-lt) + (string=? (cdr tok) ins-mul) + (string=? (cdr tok) ins-div) + (string=? (cdr tok) ins-mod) + (string=? (cdr tok) ins-shl) + (string=? (cdr tok) ins-shr) + (string=? (cdr tok) ins-bor) + (string=? (cdr tok) ins-band) + )))) + +(define is-stack-only-instr? + (lambda (tok) + (let ((info (assoc (cdr instr) instruction-infos))) + (and (is-instr? tok) + (not (cadr info)) + (cadddr (cddr info)))))) + +(define instr-has-immediate? + (lambda (ins) + (let ((info (assoc (cdr instr) instruction-infos))) + (and (is-instr? tok) + (not (cadr info)) + (caddr (cddr info)))))) + (define tag-instruction "instruction") (define tag-number "number") (define tag-pointer "pointer") @@ -329,77 +431,18 @@ (newline) (print-basic-block (cdr l)))))) -(define is-jmp-instruction? - (lambda (tok) - (and (string=? (car tok) tag-instruction) - (let ((val (cdr tok))) - (or (string=? val ins-jmp) - (string=? val ins-jtrue) - (string=? val ins-call) - (string=? val ins-ret) - (string=? val ins-end)))))) - -(define is-mem-instr? - (lambda (tok) - (and (string=? (car tok) tag-instruction) - (let ((val (cdr tok))) - (or (string=? val ins-aloc) - (string=? val ins-stor)))))) - -(define is-side-effecting-instr? - (lambda (tok) - (and (string=? (car tok) tag-instruction) - (let ((val (cdr tok))) - (or (string=? val ins-aloc) - (string=? val ins-stor)))))) - -(define is-number? (lambda (tok) (string=? (car tok) tag-number))) - -(define ins-has-immediate? - (lambda (ins) - (and (string=? (car ins) tag-instruction) - (string=? (cdr ins) ins-push)))) - -(define is-definition? - (lambda (tok) - (string=? (car tok) tag-definition))) - (define asm-read-basic-block (lambda () (letrec ((helper (lambda (acc) (let ((tok (asm-next-token))) (if (not tok) (reverse acc) - (if (or (is-jmp-instruction? tok) + (if (or (is-terminal-instr? tok) (is-definition? tok)) (reverse (cons tok acc)) (helper (cons tok acc)))))))) (helper '())))) -(define is-push-instr? - (lambda (tok) (and (string=? (car tok) tag-instruction) - (string=? (cdr tok) ins-push)))) -(define is-pop-instr? - (lambda (tok) (and (string=? (car tok) tag-instruction) - (string=? (cdr tok) ins-pop)))) -(define is-swap-instr? - (lambda (tok) (and (string=? (car tok) tag-instruction) - (string=? (cdr tok) ins-swap)))) - -(define is-binop-instr? - (lambda (tok) (and (string=? (car tok) tag-instruction) - (or (string=? (cdr tok) ins-add) - (string=? (cdr tok) ins-sub) - (string=? (cdr tok) ins-eq) - (string=? (cdr tok) ins-lt) - (string=? (cdr tok) ins-mul) - (string=? (cdr tok) ins-div) - (string=? (cdr tok) ins-mod) - (string=? (cdr tok) ins-shl) - (string=? (cdr tok) ins-shr) - (string=? (cdr tok) ins-bor) - (string=? (cdr tok) ins-band) - )))) (define split (letrec ((helper (lambda (l n a) @@ -407,7 +450,7 @@ (helper (cdr l) (- n 1) (cons (car l) a)))))) (lambda (l n) (helper l n '())))) -(define fold (lambda (f s l) (if (null? l) s (fold f (f s (car l)) (cdr l))))) +(define fold (lambda (f s l) (if (null? l) s (fold f (f s (car l)) (cdr l))))) (define matches (lambda (pat seq) @@ -432,13 +475,12 @@ (cons ins-bor logior) (cons ins-band logand))) -(define is-rdrr-instr? (lambda (tok) - (and (string=? tag-instruction (car tok) ) - (string=? ins-rdrr (cdr tok))))) - -(define is-load-instr? (lambda (tok) - (and (string=? tag-instruction (car tok) ) - (string=? ins-load (cdr tok))))) +(define find-consumer + (letrec ((helper (lambda (bb depth) + (if (null? bb) '() + (if (> (consumption-depth (car bb)) depth) bb + (helper (cdr bb) (+ depth (stack-delta (car bb))))))))) + (lambda (bb) (helper bb 0)))) (define peephole (let ((optimizers (list From f2edf9fa2d3c922cafa8b3fb14a12a1c48cadb2a Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 26 Nov 2014 13:10:17 -0500 Subject: [PATCH 08/31] fix gcc warnings: specify mode in assembler open call, check return value of write in assembler, use sizeof(word) when reading in interpreter --- assembler.l | 8 ++++++-- interpreter.c | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/assembler.l b/assembler.l index 256c31c..811aecf 100644 --- a/assembler.l +++ b/assembler.l @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) { int failure = 0; int i; - int output_filedes = argc > 1 ? open(argv[1], O_WRONLY | O_CREAT) : STDOUT_FILENO; + int output_filedes = argc > 1 ? open(argv[1], O_WRONLY | O_CREAT, 0644) : STDOUT_FILENO; if(output_filedes < 0){ fprintf(stderr, "Failed to open output file. Dying\n"); exit(1); @@ -263,7 +263,11 @@ int main(int argc, char *argv[]) fprintf(stderr, "Dying From Failure\n"); exit(1); } - write(output_filedes, out_buffer, words*sizeof(word)); + if(write(output_filedes, out_buffer, words*sizeof(word)) < words*sizeof(word)){ + fprintf(stderr, "Error while writing!\n"); + close(output_filedes); + return 1; + } fprintf(stderr, "Read %d lines. Wrote %u words\n", __line_number, words); close(output_filedes); return 0; diff --git a/interpreter.c b/interpreter.c index 6e9ab80..63c3cfa 100644 --- a/interpreter.c +++ b/interpreter.c @@ -636,7 +636,7 @@ int main(int argc, char *argv[]) exit(1); } /* read in as much as we can! */ - nread = read(fd, memory, MEM_SIZE*sizeof(long)); + nread = read(fd, memory, MEM_SIZE*sizeof(word)); if(nread < 0){ printf("Read failed\n"); exit(1); From dfb654ba63a415e6544aac995937422d0cc28c1b Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 26 Nov 2014 13:52:13 -0500 Subject: [PATCH 09/31] use a word for heap offset in heap_check macro --- interpreter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interpreter.c b/interpreter.c index 63c3cfa..9b32e33 100644 --- a/interpreter.c +++ b/interpreter.c @@ -308,7 +308,7 @@ static inline void print_cell(FILE *stream, word c){ do the allocation, we crash. */ #define HEAP_CHECK(n) do{ \ - long __tmp = n; \ + word __tmp = n; \ if(unlikely(hp + __tmp >= heap_base + heap_size)){ \ heap_base = (heap_base == prog_end ? \ upper_heap : prog_end); \ From 538aa46c87473ba32cc7a0c690e062e47b443f1e Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 26 Nov 2014 14:48:55 -0500 Subject: [PATCH 10/31] fix compile-if routine. apparently, let bindings are evaluated in an unspecified order so some versions of guile were evaluating it as conditional, true case, false case, jump statements. This was causing problems. --- schemer.sch | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/schemer.sch b/schemer.sch index 3c1f604..9a52d8f 100644 --- a/schemer.sch +++ b/schemer.sch @@ -865,24 +865,24 @@ (conditional (car (cdr l))) (true-case (car (cdr (cdr l)))) (false-case (car (cdr (cdr (cdr l)))))) - (let ((r1 (compile-sexp conditional env #t)) - (x (append-instructions - ins-push false-value - ins-eq - ins-push (asm-label-reference false-label) - ins-jtrue)) - (r2 (compile-sexp true-case env rest)) - (y (append-instructions ins-push (asm-label-reference join-label) ins-jmp - (asm-label-definition false-label))) - (r3 (compile-sexp false-case env rest))) - (append-instruction (asm-label-definition join-label)) - (lambda () - (do-compile-task r1) - (do-compile-task r2) - (do-compile-task r3) - ) - ) - )))) + (let ((r1 (compile-sexp conditional env #t))) + (append-instructions + ins-push false-value + ins-eq + ins-push (asm-label-reference false-label) + ins-jtrue) + (let ((r2 (compile-sexp true-case env rest))) + (append-instructions ins-push (asm-label-reference join-label) ins-jmp + (asm-label-definition false-label)) + (let ((r3 (compile-sexp false-case env rest))) + (append-instruction (asm-label-definition join-label)) + (lambda () + (do-compile-task r1) + (do-compile-task r2) + (do-compile-task r3) + ) + ) + )))))) (define compile-quoted-sexp (lambda (s env rest) From ca4c51602573e0b2b0b63067a49a54b259e20c08 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 26 Nov 2014 14:49:09 -0500 Subject: [PATCH 11/31] fix define error in peephole.sch --- peephole.sch | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/peephole.sch b/peephole.sch index 18333c8..8c6fab7 100644 --- a/peephole.sch +++ b/peephole.sch @@ -80,8 +80,9 @@ (define is-number? (lambda (tok) (string=? (car tok) tag-number))) (define is-definition? (lambda (tok) (string=? (car tok) tag-definition))) -(define lookup-instr-info (tok) - (assoc (cdr tok) instruction-infos)) +(define lookup-instr-info + (lambda (tok) + (assoc (cdr tok) instruction-infos))) (define is-terminal-instr?? (lambda (tok) From ab58d1a89bf77d06668d11a44d6f8184042bbc1a Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 26 Nov 2014 16:56:38 -0500 Subject: [PATCH 12/31] removing some additional let statements with multiple side-effecting binders --- schemer.sch | 55 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/schemer.sch b/schemer.sch index 9a52d8f..e594fb5 100644 --- a/schemer.sch +++ b/schemer.sch @@ -709,11 +709,11 @@ (define compile-let-bindings (lambda (bs env) (if (null? bs) #f - (let ((r2 (compile-sexp (car (cdr (car bs))) env #t)) - (r1 (compile-let-bindings (cdr bs) env))) - (lambda () - (do-compile-task r1) - (do-compile-task r2)))))) + (let ((r2 (compile-sexp (car (cdr (car bs))) env #t))) + (let ((r1 (compile-let-bindings (cdr bs) env))) + (lambda () + (do-compile-task r1) + (do-compile-task r2))))))) (define compile-let (lambda (l env rest) @@ -887,14 +887,13 @@ (define compile-quoted-sexp (lambda (s env rest) (if (pair? s) - (let ((r2 (compile-quoted-sexp (car s) env #t)) - (r1 (compile-quoted-sexp (cdr s) env #t)) - ) - (u-call-cons) - (lambda () - (do-compile-task r1) - (do-compile-task r2) - #f)) + (let ((r2 (compile-quoted-sexp (car s) env #t))) + (let ((r1 (compile-quoted-sexp (cdr s) env #t))) + (u-call-cons) + (lambda () + (do-compile-task r1) + (do-compile-task r2) + #f))) (if (null? s) (begin (assembly-nil) @@ -909,27 +908,27 @@ (lambda (n l env) (if (null? l) (assembly-make-args n) - (let ((r2 (compile-sexp (car l) env #t)) - (r1 (compile-arguments n (cdr l) env))) - (lambda () - (do-compile-task r1) - (do-compile-task r2) - ))))) + (let ((r2 (compile-sexp (car l) env #t))) + (let ((r1 (compile-arguments n (cdr l) env))) + (lambda () + (do-compile-task r1) + (do-compile-task r2) + )))))) (define compile-list (lambda (l env rest) (let ((s (find-special (car l)))) (if s (s l env rest) - (let ((r1 (compile-arguments (length (cdr l)) (cdr l) env)) - (r2 (compile-sexp (car l) env #t))) - (if rest - (u-call-funcall) - (u-call-tailcall) - ) - (lambda () - (do-compile-task r1) - (do-compile-task r2))))))) + (let ((r1 (compile-arguments (length (cdr l)) (cdr l) env))) + (let ((r2 (compile-sexp (car l) env #t))) + (if rest + (u-call-funcall) + (u-call-tailcall) + ) + (lambda () + (do-compile-task r1) + (do-compile-task r2)))))))) (define compile-sexp (lambda (s env rest) From fc0f5dd6d29bb13866af10b8b9826eeea08952a6 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 26 Nov 2014 17:14:41 -0500 Subject: [PATCH 13/31] cleaning up handling of eof, true, and false in peephole --- peephole.sch | 91 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 35 deletions(-) diff --git a/peephole.sch b/peephole.sch index 8c6fab7..3fbb0a6 100644 --- a/peephole.sch +++ b/peephole.sch @@ -84,11 +84,19 @@ (lambda (tok) (assoc (cdr tok) instruction-infos))) -(define is-terminal-instr?? +(define is-terminal-instr? (lambda (tok) (and (is-instr? tok) (cadr (lookup-instr-info tok))))) +(define instr-consumption-depth + (lambda (tok) + (if (is-instr? tok) (caddr (lookup-instr-info tok)) 0))) + +(define instr-stack-delta + (lambda (tok) + (if (is-instr? tok) (cadddr (lookup-instr-info tok)) 0))) + (define is-push-instr? (lambda (tok) (and (is-instr? tok) (string=? (cdr tok) ins-push)))) @@ -125,14 +133,14 @@ (define is-stack-only-instr? (lambda (tok) - (let ((info (assoc (cdr instr) instruction-infos))) + (let ((info (assoc (cdr tok) instruction-infos))) (and (is-instr? tok) (not (cadr info)) (cadddr (cddr info)))))) (define instr-has-immediate? - (lambda (ins) - (let ((info (assoc (cdr instr) instruction-infos))) + (lambda (tok) + (let ((info (assoc (cdr tok) instruction-infos))) (and (is-instr? tok) (not (cadr info)) (caddr (cddr info)))))) @@ -144,6 +152,7 @@ (define tag-lang-const "lang-const") (define tag-string "string") (define tag-char "char") +(define tag-eof "eof") (define tag-definition "definition") (define tag-reference "reference") @@ -334,7 +343,15 @@ (helper (cons c acc) (read-char)) (cons (reverse acc) c))))) (let ((i (helper (list c) (read-char)))) - (cons (cons tag-instruction (list->string (car i))) (cdr i)))))) + (let ((str (list->string (car i)))) + (cons (if (string=? "FALSE" str) + (cons tag-vm-const #f) + (if (string=? "TRUE" str) + (cons tag-vm-const #t) + (if (string=? "EOF" str) + (cons tag-eof '()) + (cons tag-instruction str)))) + (cdr i))))))) (define asm-reader-state-entrance (lambda (c) @@ -393,37 +410,41 @@ (lambda (asm-tok) (let ((typ (car asm-tok)) (val (cdr asm-tok))) - (if (string=? typ "number") + (if (string=? typ tag-number) (begin (display #\n) (display val)) - (if (string=? typ "vm-const") - (begin (display #\v) (display val)) - (if (string=? typ "lang-const") - (begin (display #\l) (display val)) - (if (string=? typ "pointer") - (begin - (display #\p) - (display (car val)) - (display #\,) - (display (cdr val))) - (if (string=? typ "string") - (begin (display #\") (display val) (display #\")) - (if (string=? typ "char") - (display-char-asm val) - (if (string=? typ "reference") - (begin (display #\@) (display val)) - (if (string=? typ "definition") - (begin (display #\:) - (display (car val)) - (if (not (null? (cdr val))) - (begin (display #\,) (display (cdr val))) #f)) - (if (string=? typ "instruction") - (display val) - (begin - (display "Unknown token type: ") - (display typ) - (quit)))))))))))))) + (if (string=? typ tag-vm-const) + (if (equal? val #t) (display "FALSE") + (if (equal? val #f) (display "TRUE") + (begin (display #\v) (display val)))) + (if (string=? typ tag-eof) + (display "EOF") + (if (string=? typ tag-lang-const) + (begin (display #\l) (display val)) + (if (string=? typ "pointer") + (begin + (display #\p) + (display (car val)) + (display #\,) + (display (cdr val))) + (if (string=? typ "string") + (begin (display #\") (display val) (display #\")) + (if (string=? typ "char") + (display-char-asm val) + (if (string=? typ "reference") + (begin (display #\@) (display val)) + (if (string=? typ "definition") + (begin (display #\:) + (display (car val)) + (if (not (null? (cdr val))) + (begin (display #\,) (display (cdr val))) #f)) + (if (string=? typ "instruction") + (display val) + (begin + (display "Unknown token type: ") + (display typ) + (quit))))))))))))))) (define print-basic-block (lambda (l) @@ -479,8 +500,8 @@ (define find-consumer (letrec ((helper (lambda (bb depth) (if (null? bb) '() - (if (> (consumption-depth (car bb)) depth) bb - (helper (cdr bb) (+ depth (stack-delta (car bb))))))))) + (if (< (instr-consumption-depth (car bb)) depth) bb + (helper (cdr bb) (+ depth (instr-stack-delta (car bb))))))))) (lambda (bb) (helper bb 0)))) (define peephole From 4fd11549b91b3e4cebcd180c02047d966ec81b02 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 13 Apr 2022 08:04:29 -0400 Subject: [PATCH 14/31] Add instructions to output binary values and to coerce from an int to other types while outputing, this should allow the assembler to become self-hosting. --- interpreter.c | 36 +++++++++++++++++++++++++++++++++++- interpreter.h | 9 ++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/interpreter.c b/interpreter.c index 9b32e33..c7e5b00 100644 --- a/interpreter.c +++ b/interpreter.c @@ -623,7 +623,12 @@ int main(int argc, char *argv[]) /* Type checking */ [I_ISNUM] = &&ISNUM, [I_ISLCONST] = &&ISLCONST, [I_ISPTR] = &&ISPTR, [I_ISBOOL] = &&ISBOOL, - [I_ISCHR] = &&ISCHR, [I_ISINS] = &&ISINS + [I_ISCHR] = &&ISCHR, [I_ISINS] = &&ISINS, + + [I_PBIN] = &&PBIN, + [I_PBLCONSTI] = &&PBLCONSTI, + [I_PBVCONSTI] = &&PBVCONSTI, + [I_PBPTRI] = &&PBPTRI }; /* We first do some basic startup stuff to load the program */ @@ -894,6 +899,35 @@ int main(int argc, char *argv[]) INSTRUCTION(ISBOOL, STACK(0) = (IS_BOOL(STACK(0))) ? TRUE_VAL : FALSE_VAL); INSTRUCTION(ISCHR, STACK(0) = (IS_CHAR(STACK(0))) ? TRUE_VAL : FALSE_VAL); INSTRUCTION(ISINS, STACK(0) = (IS_INS(STACK(0))) ? TRUE_VAL : FALSE_VAL); + + INSTRUCTION(PBIN, do{ + word w = htonl(STACK(0)); + fwrite(&w, sizeof(w), 1, stdout); + ignore(STACK_POP()); + }while(0)); + INSTRUCTION(PBLCONSTI, do{ + ASSERT_TYPE(STACK(0), NUM); + word w = htonl(MAKE_LCONST(NUM_TO_NATIVE(STACK(0)))); + fwrite(&w, sizeof(w), 1, stdout); + ignore(STACK_POP()); + }while(0)); + INSTRUCTION(PBVCONSTI, do{ + ASSERT_TYPE(STACK(0), NUM); + word w = htonl(MAKE_VCONST(NUM_TO_NATIVE(STACK(0)))); + fwrite(&w, sizeof(w), 1, stdout); + ignore(STACK_POP()); + }while(0)); + INSTRUCTION(PBPTRI, do{ + ASSERT_TYPE(STACK(0), NUM); + ASSERT_TYPE(STACK(1), NUM); + word base = NUM_TO_NATIVE(STACK(0)); + word size = NUM_TO_NATIVE(STACK(1)); + word ptr = htonl(MAKE_PTR(base, size)); + fwrite(&ptr, sizeof(ptr), 1, stdout); + ignore(STACK_POP()); + ignore(STACK_POP()); + }while(0)); + return 0; } diff --git a/interpreter.h b/interpreter.h index 9f5acb8..e4d0324 100644 --- a/interpreter.h +++ b/interpreter.h @@ -102,7 +102,14 @@ typedef uint32_t uword; #define I_ISCHR INS(34) #define I_ISINS INS(35) -#define NR_INS 36 +/* Binary I/O */ +#define I_PBIN INS(36) +/* Binary I/O with type conversion from integer */ +#define I_PBLCONSTI INS(37) +#define I_PBVCONSTI INS(38) +#define I_PBPTRI INS(39) + +#define NR_INS 40 /* End instructions */ From dd12573438f93600fc2bb25c81dcc23c3d44d7df Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 1 May 2022 07:18:30 -0400 Subject: [PATCH 15/31] Add dump to invalid load/store --- interpreter.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/interpreter.c b/interpreter.c index c7e5b00..f422124 100644 --- a/interpreter.c +++ b/interpreter.c @@ -841,6 +841,7 @@ int main(int argc, char *argv[]) NUM_TO_NATIVE(STACK(0)) >= PTR_SIZE(STACK(1))){ fprintf(stderr, "Invalid store: offset %"PRId32" out of bounds\n", NUM_TO_NATIVE(STACK(0))); + DO_DUMP(stderr); exit(1); } memory[PTR_TARGET(STACK(1)) + NUM_TO_NATIVE(STACK(0))] = STACK(2); @@ -857,6 +858,7 @@ int main(int argc, char *argv[]) NUM_TO_NATIVE(STACK(0)) > PTR_SIZE(STACK(1))){ fprintf(stderr, "Invalid load: offset %"PRId32" out of bounds\n", NUM_TO_NATIVE(STACK(0))); + DO_DUMP(stderr); exit(1); } STACK(1) = memory[PTR_TARGET(STACK(1)) + NUM_TO_NATIVE(STACK(0))]; From fa3164416cd3b5ccdd37134c2a1c7cc0650c44b6 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 1 May 2022 07:19:08 -0400 Subject: [PATCH 16/31] add assembler directives for binary output instructions --- assembler.l | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/assembler.l b/assembler.l index 811aecf..e3254eb 100644 --- a/assembler.l +++ b/assembler.l @@ -170,6 +170,11 @@ ISCHR {out_buffer[words++] = PACK_VM_CONST(I_ISCHR);} ISBOOL {out_buffer[words++] = PACK_VM_CONST(I_ISBOOL);} ISINS {out_buffer[words++] = PACK_VM_CONST(I_ISINS);} +PBIN {out_buffer[words++] = PACK_VM_CONST(I_PBIN);} +PBLCONSTI {out_buffer[words++] = PACK_VM_CONST(I_PBLCONSTI);} +PBVCONSTI {out_buffer[words++] = PACK_VM_CONST(I_PBVCONSTI);} +PBPTRI {out_buffer[words++] = PACK_VM_CONST(I_PBPTRI);} + TRUE {out_buffer[words++] = PACK_VM_CONST(TRUE_VAL);} FALSE {out_buffer[words++] = PACK_VM_CONST(FALSE_VAL);} EOF {out_buffer[words++] = PACK_CHAR(EOF);} From 2cf4a394cd12fdbc1f75559ece48a6cf85c2dbe5 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 1 May 2022 07:19:32 -0400 Subject: [PATCH 17/31] Add a string->number definition --- lib.sch | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/lib.sch b/lib.sch index 5a052fd..b4d6df1 100644 --- a/lib.sch +++ b/lib.sch @@ -170,6 +170,25 @@ (display d) (display "' is not a digit!\n") (quit)))))))))))))) + +(define char->digit + (lambda (d) + (if (char=? d #\0) 0 + (if (char=? d #\1) 1 + (if (char=? d #\2) 2 + (if (char=? d #\3 ) 3 + (if (char=? d #\4) 4 + (if (char=? d #\5) 5 + (if (char=? d #\6) 6 + (if (char=? d #\7) 7 + (if (char=? d #\8) 8 + (if (char=? d #\9 ) 9 + (begin + (display "Error: '") + (display d) + (display "' is not a digit!\n") + (quit)))))))))))))) + (define number->string-helper (lambda (n rest) (if (= n 0) @@ -183,6 +202,12 @@ (number->string-helper n nil) (string-append "-" (number->string-helper (* n -1) nil)))))) +(define string->number + (lambda (s) + (string-fold + (lambda (n c) + (+ (* n 10) (char->digit c)))))) + (define assoc (lambda (key alist) (if (null? alist) #f From 45a04791fd66d0410cca308ec019cc6c50383388 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sun, 1 May 2022 07:20:45 -0400 Subject: [PATCH 18/31] Add compilar intrinsics for binary output routines. --- schemer.sch | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/schemer.sch b/schemer.sch index e594fb5..220ee0e 100644 --- a/schemer.sch +++ b/schemer.sch @@ -226,7 +226,12 @@ ("vapply" "vapply") ("char=?" "equal") ;; for characters ("char Date: Wed, 4 May 2022 20:26:55 -0400 Subject: [PATCH 19/31] Remove string->number and char->digit from lib.sch --- lib.sch | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/lib.sch b/lib.sch index b4d6df1..28f5c3f 100644 --- a/lib.sch +++ b/lib.sch @@ -171,24 +171,6 @@ (display "' is not a digit!\n") (quit)))))))))))))) -(define char->digit - (lambda (d) - (if (char=? d #\0) 0 - (if (char=? d #\1) 1 - (if (char=? d #\2) 2 - (if (char=? d #\3 ) 3 - (if (char=? d #\4) 4 - (if (char=? d #\5) 5 - (if (char=? d #\6) 6 - (if (char=? d #\7) 7 - (if (char=? d #\8) 8 - (if (char=? d #\9 ) 9 - (begin - (display "Error: '") - (display d) - (display "' is not a digit!\n") - (quit)))))))))))))) - (define number->string-helper (lambda (n rest) (if (= n 0) @@ -202,12 +184,6 @@ (number->string-helper n nil) (string-append "-" (number->string-helper (* n -1) nil)))))) -(define string->number - (lambda (s) - (string-fold - (lambda (n c) - (+ (* n 10) (char->digit c)))))) - (define assoc (lambda (key alist) (if (null? alist) #f From f91e8cf60728779e3b0fd20e075e727a9d1c2659 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 4 May 2022 20:28:07 -0400 Subject: [PATCH 20/31] reorder some builtin definition in schemer.sch --- schemer.sch | 64 +++++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/schemer.sch b/schemer.sch index 220ee0e..db93f8b 100644 --- a/schemer.sch +++ b/schemer.sch @@ -1359,31 +1359,6 @@ ins-push (asm-label-reference "vector_fill") ins-jmp)) - (begin - (assembly-builtin-header "vector_ref") - (assembly-get-arg 0) - (assembly-get-arg 1) - (append-instructions - ins-push vector-elems-offset - ins-add - ins-load) - (assembly-funret)) - - (begin - (assembly-builtin-header "vapply") - (assembly-get-arg 1) - (assembly-get-arg 0)) ; fall thru into tailcall - (assembly-tailcall) ; do not move the definition of tailcall!! - - (assembly-funcall) - - (begin - (assembly-builtin-header "eof_object_q") - (assembly-get-arg 0) - (append-instructions - ins-push "EOF" ins-eq) - (assembly-funret)) - (begin (assembly-builtin-header "print_binary") (assembly-get-arg 0) @@ -1410,7 +1385,34 @@ (assembly-get-arg 0) (assembly-get-arg 1) (append-instructions "PBPTRI") + (assembly-funret) ) + + (begin + (assembly-builtin-header "vector_ref") + (assembly-get-arg 0) + (assembly-get-arg 1) + (append-instructions + ins-push vector-elems-offset + ins-add + ins-load) + (assembly-funret)) + + (begin + (assembly-builtin-header "vapply") + (assembly-get-arg 1) + (assembly-get-arg 0)) ; fall thru into tailcall + (assembly-tailcall) ; do not move the definition of tailcall!! + + (assembly-funcall) + + (begin + (assembly-builtin-header "eof_object_q") + (assembly-get-arg 0) + (append-instructions + ins-push "EOF" ins-eq) + (assembly-funret)) + )) @@ -1431,7 +1433,6 @@ (begin (append-instruction ins-end)))))) - ; Into the reader. ; ; The reader is pretty simple. @@ -1442,10 +1443,12 @@ ; read-list reads sexps until the returned 'next-char' is a close peren, ; then returns the list read, and the next non-whitespace character. ; reader utility functions -(define is-space? (lambda (c) (if (eof-object? c) #t - (if (char=? c #\space) #t - (if (char=? c #\tab) #t - (if (char=? c #\newline) #t #f)))))) +(define is-space? + (lambda (c) (if (eof-object? c) #t + (if (char=? c #\space) #t + (if (char=? c #\tab) #t + (if (char=? c #\newline) #t #f)))))) + (define is-delimiter? (lambda (c) (if (is-space? c) #t (if (char=? c #\() #t (if (char=? c #\)) #t @@ -1610,7 +1613,6 @@ (read-dotted-cdr) (cons (parse-token x) (read-list)))))))) - (define read-sexp (lambda () (parse-token (next-token)))) From 20a4b7e462707fb6ea0a510b54cc416610f351c4 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 4 May 2022 20:28:31 -0400 Subject: [PATCH 21/31] first pass at self-hosting assembler. --- assembler.sch | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 assembler.sch diff --git a/assembler.sch b/assembler.sch new file mode 100644 index 0000000..82bb1ed --- /dev/null +++ b/assembler.sch @@ -0,0 +1,165 @@ +(define vm-constants-alist (list (cons "PUSH" 0) + (cons "POP" 1) + (cons "SWAP" 2) + (cons "DUP" 3) + (cons "ROT" 4) + (cons "CALL" 5) + (cons "RET" 6) + (cons "JMP" 7) + (cons "JTRUE" 8) + (cons "END" 9) + (cons "ADD" 10) + (cons "MUL" 11) + (cons "SUB" 12) + (cons "DIV" 13) + (cons "MOD" 14) + (cons "SHL" 15) + (cons "SHR" 16) + (cons "BOR" 17) + (cons "BAND" 18) + (cons "EQ" 19) + (cons "LT" 20) + (cons "STOR" 21) + (cons "LOAD" 22) + (cons "ALOC" 23) + (cons "GETC" 24) + (cons "DUMP" 25) + (cons "PINT" 26) + (cons "PCHR" 27) + (cons "RDRR" 28) + (cons "WTRR" 29) + (cons "ISNUM" 30) + (cons "ISLCONST" 31) + (cons "ISPTR" 32) + (cons "ISBOOL" 33) + (cons "ISCHR" 34) + (cons "ISINS" 35) + (cons "PBIN" 36) + (cons "PBLCONSTI" 37) + (cons "PBVCONSTI" 38) + (cons "PBPTRI" 39) + (cons "TRUE" 128) + (cons "FALSE" 129))) + +(define append-vm-constant print-vconst) +(define append-language-constant print-lconst) +(define append-number print-binary) +(define append-pointer print-pointer) +(define append-character print-vconst) + +(define make-label-definition + (lambda (ins pc) + (letrec ((helper (lambda (i) + (if (>= i (string-length ins)) #f + (if (char=? (string-ref ins i) #\,) i + (helper (+ i 1))))))) + (let ((comma (helper 1))) + (if comma + (list (substring ins 1 comma) pc + (string->number (substring ins (+ comma 1) (string-length ins)))) + (list (substring ins 1 (string-length ins)) pc 0)))))) + +(define append-label-definition + (lambda (ref labels) + (let ((tuple (assoc ref labels))) + (let ((loc (cadr tuple)) + (sz (caddr tuple))) + (print-pointer loc sz))))) + +(define char->digit + (lambda (d) + (if (char=? d #\0) 0 + (if (char=? d #\1) 1 + (if (char=? d #\2) 2 + (if (char=? d #\3 ) 3 + (if (char=? d #\4) 4 + (if (char=? d #\5) 5 + (if (char=? d #\6) 6 + (if (char=? d #\7) 7 + (if (char=? d #\8) 8 + (if (char=? d #\9 ) 9 + (begin + (display "Error: '") + (display d) + (display "' is not a digit!\n") + (quit)))))))))))))) + +(define string->number + (lambda (s) + (string-fold + (lambda (c n) + (+ (* n 10) (char->digit c))) 0 s 0 (string-length s)))) + +(define assemble-instruction + (lambda (ins pc labels) + (display "assemble-instruction ")(display ins)(display "\n") + (let ((c (string-ref ins 0))) + (if (char=? c #\") + (append-character (string-ref ins 1)) + (if (char=? c #\n) + (append-number (string->number (substring ins 1 (string-length ins)))) + (if (char=? c #\l) + (append-language-constant (string->number (substring ins 1 (string-length ins)))) + (if (char=? c #\@) + (append-label-definition ins labels) + (append-vm-constant (cdr (assoc ins vm-constants-alist))) + ))))))) + +(define assemble-instructions + (lambda (instrs pc labels) + (if (null? instrs) #t + (begin + (assemble-instruction (car instrs) pc labels) + (assemble-instructions (cdr instrs (+ pc 1) labels)))))) + +(define discard-to-nl (lambda () (let ((r (read-char))) + (if (char=? r #\newline) #t + (discard-to-nl))))) +(define is-space? + (lambda (c) (if (eof-object? c) #t + (if (char=? c #\space) #t + (if (char=? c #\tab) #t + (if (char=? c #\newline) #t #f)))))) + +(define drop-chars-until (lambda (f) (let ((x (read-char))) (if (f x) x (drop-chars-until f))))) +(define next-non-ws (lambda () (drop-chars-until (lambda (z) (or (eof-object? z) + (not (is-space? z))))))) + + +(define read-to-ws + (lambda (c) + (letrec ((helper (lambda (acc) + (let ((r (read-char))) + (if (is-space? r) + (list->string (reverse acc)) + (helper (cons r acc))))))) + (helper (list c))))) + +(define read-assembly + (lambda (instrs pc labels) + (let ((c (next-non-ws))) + (if (eof-object? c) + (cons (reverse instrs) labels) + (if (char=? c #\;) + (begin + (drop-chars-until (lambda (z) (char=? z #\newline))) + (read-assembly instrs pc labels)) + (let ((tok (read-to-ws c))) + (if (char=? c #\:) + (let ((lbl (make-label-definition tok pc))) + (read-assembly instrs pc (cons lbl labels))) + (read-assembly (cons tok instrs) (+ pc 1) labels)))))))) + +(define assemble + (lambda () + (let ((instrs-labels (read-assembly '() 0 '()))) + (assemble-instructions (car instrs-labels) + (cdr instrs-labels))))) + +(assemble) +;; (display (read-assembly '() 0 '())) +;; (let ((xs (read-assembly '() 0 '()))) +;; (let ((instrs (car xs)) +;; (labels (cadr xs))) +;; (display instrs))) + From fa4d4f5b8b49d7c29adc207d0b7c0c7da0ee410d Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sat, 7 May 2022 13:54:19 -0400 Subject: [PATCH 22/31] Implement a basic breakpoint and single step functionality. --- interpreter.c | 127 ++++++++++++++++++++++++++++++++++++++++++++------ interpreter.h | 4 +- 2 files changed, 117 insertions(+), 14 deletions(-) diff --git a/interpreter.c b/interpreter.c index f422124..adf700a 100644 --- a/interpreter.c +++ b/interpreter.c @@ -123,6 +123,10 @@ static word rr = MAKE_NUM(0); /* the root regiseter can be pointer to a structure containing local variables. */ +static word callstack[256]; +static word* callstack_top = callstack; +static int single_step = 0; + /* Access the element at offset x from the top of the stack. (0 is top). @@ -196,14 +200,18 @@ static inline void print_cell(FILE *stream, word c){ */ #define DO_DUMP(stream) do{ \ int q=0; \ - fprintf(stream, "pc: %ld, hp: %ld sp: %p height: %ld\nstack:\n", \ - pc-memory, hp-memory, sp, STACK_HEIGHT()); \ + fprintf(stream, "pc: %ld, hp: %ld sp: %ld height: %ld\nstack:\n", \ + pc-memory, hp-memory, sp-memory, STACK_HEIGHT()); \ while(q < STACK_HEIGHT() ){ \ fprintf(stderr, "\t"); \ print_cell(stderr, STACK(q)); \ fprintf(stderr, "\n"); \ q++; \ } \ + fprintf(stream, "callstack:\n"); \ + for(q = 0;&callstack[q] < callstack_top;q++){ \ + fprintf(stderr, "\t%d\n", callstack[q]); \ + } \ }while(0) /* @@ -261,6 +269,7 @@ static inline void print_cell(FILE *stream, word c){ */ #define INSTRUCTION(n,x) \ n: \ + if(single_step) inspector(); \ x; \ NEXT @@ -550,9 +559,11 @@ void gc(word *new_heap) */ static inline void inspector(void){ int cmd; + fprintf(stderr, ">> "); + fflush(stderr); while( ((cmd = getchar()) != '\n')){ switch(cmd){ - case 'c': while(getchar() != '\n'); return; + case 'c': while(getchar() != '\n'); single_step = 0; return; case 'p': { int addr; scanf("%d", &addr); @@ -575,6 +586,15 @@ static inline void inspector(void){ print_cell(stderr, rr); fprintf(stderr, "\n"); } break; + case 'd': { + while(getchar()!='\n'); + DO_DUMP(stderr); + } break; + case 'n': { + while(getchar()!='\n'); + single_step = 1; + return; + } default: while(getchar()!='\n'); fprintf(stderr, "Unknown command '%c':\n" @@ -584,9 +604,17 @@ static inline void inspector(void){ "\tr\tprint root register\n", cmd); break; } + fprintf(stderr, ">> "); + fflush(stderr); } } +#define MAX_BREAKPOINTS 32 +struct breakpoint{ + word pc; + word instr; +}; + int main(int argc, char *argv[]) { /* The long awaited array of instructions! */ @@ -625,20 +653,60 @@ int main(int argc, char *argv[]) [I_ISPTR] = &&ISPTR, [I_ISBOOL] = &&ISBOOL, [I_ISCHR] = &&ISCHR, [I_ISINS] = &&ISINS, - [I_PBIN] = &&PBIN, + [I_PBIN] = &&PBIN, [I_PBLCONSTI] = &&PBLCONSTI, [I_PBVCONSTI] = &&PBVCONSTI, - [I_PBPTRI] = &&PBPTRI + [I_PBPTRI] = &&PBPTRI, + + [I_BRK] = &&BRK }; /* We first do some basic startup stuff to load the program */ - int fd; - int nread; - /* Read from a file or stdin */ - fd = argc > 1 ? open(argv[1], O_RDONLY) : STDIN_FILENO; + int fd=-1; + FILE *program_input_file=NULL; + int nread; + + struct breakpoint breakpoints[MAX_BREAKPOINTS]; + int nr_breakpoints = 0; + int tmp; + for(tmp = 1; tmp < argc; tmp++){ + if(argv[tmp][0] == '-' && argv[tmp][1] == 'b'){ + if(nr_breakpoints == MAX_BREAKPOINTS){ + fprintf(stderr, "Error: maximum breakpoints exceeded\n"); + exit(1); + } + if(argv[tmp][2] != '\0'){ + breakpoints[nr_breakpoints].pc = atoi(&argv[tmp][2]); + }else if(tmp + 1 < argc){ + tmp++; + breakpoints[nr_breakpoints].pc = atoi(argv[tmp]); + }else{ + fprintf(stderr, "Error: -b must be followed by program counter\n"); + exit(1); + } + nr_breakpoints++; + }else if(fd < 0){ + fd = open(argv[tmp], O_RDONLY); + if(fd < 0){ + fprintf(stderr, "Failed to open program file '%s'\n", argv[tmp]); + exit(1); + } + }else if(program_input_file == NULL){ + program_input_file = fopen(argv[tmp], "r"); + if(program_input_file == NULL){ + fprintf(stderr, "Failed to open program input file '%s'\n", argv[tmp]); + exit(1); + } + }else{ + fprintf(stderr, "Error: unexpected argument '%s'\n", argv[tmp]); + exit(1); + } + } if(fd < 0){ - printf("Open failed\n"); - exit(1); + fd = STDIN_FILENO; + } + if(program_input_file == NULL){ + program_input_file = stdin; } /* read in as much as we can! */ nread = read(fd, memory, MEM_SIZE*sizeof(word)); @@ -666,6 +734,17 @@ int main(int argc, char *argv[]) memory[nread-1] = ntohl(memory[nread-1]); } + /* + Apply breakpoints, replace program locations specified on the + commandline with BRK instructions. + */ + for(tmp = 0;tmp < nr_breakpoints;tmp++){ + if(breakpoints[tmp].pc < (prog_end - memory)){ + breakpoints[tmp].instr = memory[breakpoints[tmp].pc]; + memory[breakpoints[tmp].pc] = I_BRK; + } + } + /* pc points to the first cell of memory which is the first opcode to execute. All we have to do to kick things off is call the NEXT macro to jump to that instruction handler and increment the @@ -703,6 +782,12 @@ int main(int argc, char *argv[]) do{ word tmp = pc - memory; ASSERT_TYPE(STACK(0), PTR); + if(callstack_top < &callstack[255]){ + *callstack_top = pc - memory; + callstack_top++; + *callstack_top = PTR_TARGET(STACK(0)); + callstack_top++; + } pc = memory + PTR_TARGET(STACK(0)); STACK(0) = MAKE_PTR(tmp,0); }while(0) @@ -716,6 +801,10 @@ int main(int argc, char *argv[]) do{ ASSERT_TYPE(STACK(1), PTR); pc = memory + PTR_TARGET(STACK(1)); + if(callstack_top > callstack){ + callstack_top--; + callstack_top--; + } STACK(1) = STACK(0); ignore(STACK_POP()); }while(0) @@ -882,7 +971,7 @@ int main(int argc, char *argv[]) }while(0)); /* I/O */ - INSTRUCTION(GETC, STACK_PUSH(MAKE_CHAR(getchar()))); + INSTRUCTION(GETC, STACK_PUSH(MAKE_CHAR(fgetc(program_input_file)))); INSTRUCTION(DUMP, DO_DUMP(stdout)); INSTRUCTION(PINT, ASSERT_TYPE(STACK(0), NUM); printf("%"PRId32, NUM_TO_NATIVE(STACK(0))); @@ -929,7 +1018,19 @@ int main(int argc, char *argv[]) ignore(STACK_POP()); ignore(STACK_POP()); }while(0)); - + + INSTRUCTION(BRK, do{ + int brknum; + inspector(); + for(brknum = 0; brknum < nr_breakpoints; brknum++){ + if(breakpoints[brknum].pc == ((pc-1) - memory)){ + goto *instructions[breakpoints[brknum].instr]; + } + } + fprintf(stderr, "Error: failed to continue from breakpoint at %ld\n", pc - memory); + DO_DUMP(stderr); + exit(1); + }while(0)); return 0; } diff --git a/interpreter.h b/interpreter.h index e4d0324..e48f27d 100644 --- a/interpreter.h +++ b/interpreter.h @@ -109,7 +109,9 @@ typedef uint32_t uword; #define I_PBVCONSTI INS(38) #define I_PBPTRI INS(39) -#define NR_INS 40 +#define I_BRK INS(40) + +#define NR_INS 41 /* End instructions */ From 8ec54679ef121c90f23db1c2874e8d56613a3770 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sat, 7 May 2022 13:56:23 -0400 Subject: [PATCH 23/31] fix builtin implementations for binary output --- schemer.sch | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/schemer.sch b/schemer.sch index db93f8b..d5d0dd4 100644 --- a/schemer.sch +++ b/schemer.sch @@ -1362,29 +1362,33 @@ (begin (assembly-builtin-header "print_binary") (assembly-get-arg 0) - (append-instructions "PBIN") + (append-instructions "PBIN" + ins-push (asm-number 0)) (assembly-funret) ) (begin (assembly-builtin-header "print_lconst") (assembly-get-arg 0) - (append-instructions "PBLCONSTI") + (append-instructions "PBLCONSTI" + ins-push (asm-number 0)) (assembly-funret) ) (begin (assembly-builtin-header "print_vconst") (assembly-get-arg 0) - (append-instructions "PBVCONSTI") + (append-instructions "PBVCONSTI" + ins-push (asm-number 0)) (assembly-funret) ) (begin (assembly-builtin-header "print_pointer") - (assembly-get-arg 0) (assembly-get-arg 1) - (append-instructions "PBPTRI") + (assembly-get-arg 0) + (append-instructions "PBPTRI" + ins-push (asm-number 0)) (assembly-funret) ) From ea352492a650f38e239813a00e6b1ca536c8ff87 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sat, 7 May 2022 13:56:42 -0400 Subject: [PATCH 24/31] Scheme assembler seems to actually work now. --- assembler.sch | 104 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 78 insertions(+), 26 deletions(-) diff --git a/assembler.sch b/assembler.sch index 82bb1ed..0dbb95e 100644 --- a/assembler.sch +++ b/assembler.sch @@ -41,11 +41,13 @@ (cons "TRUE" 128) (cons "FALSE" 129))) -(define append-vm-constant print-vconst) -(define append-language-constant print-lconst) -(define append-number print-binary) -(define append-pointer print-pointer) -(define append-character print-vconst) +(define append-vm-constant (lambda (x) (print-vconst x))) +(define append-language-constant (lambda (x) (print-lconst x))) +(define append-number (lambda (x) (print-binary x))) +(define append-pointer (lambda (x sz) (print-pointer x sz))) +(define append-character (lambda (x) (print-binary x))) +(define append-string (lambda (x) (string-fold (lambda (c acc) (append-character c)) #f x 0 (string-length x)))) +(define append-eof-char (lambda () (print-vconst (logior 8388608 255)))) (define make-label-definition (lambda (ins pc) @@ -59,7 +61,7 @@ (string->number (substring ins (+ comma 1) (string-length ins)))) (list (substring ins 1 (string-length ins)) pc 0)))))) -(define append-label-definition +(define append-label-reference (lambda (ref labels) (let ((tuple (assoc ref labels))) (let ((loc (cadr tuple)) @@ -86,31 +88,37 @@ (define string->number (lambda (s) - (string-fold - (lambda (c n) - (+ (* n 10) (char->digit c))) 0 s 0 (string-length s)))) + (let ((helper (lambda (s) + (string-fold + (lambda (c n) + (+ (* n 10) (char->digit c))) 0 s 0 (string-length s))))) + (if (char=? (string-ref s 0) #\-) + (* -1 (helper (substring s 1 (string-length s)))) + (helper s))))) (define assemble-instruction (lambda (ins pc labels) - (display "assemble-instruction ")(display ins)(display "\n") (let ((c (string-ref ins 0))) - (if (char=? c #\") - (append-character (string-ref ins 1)) - (if (char=? c #\n) - (append-number (string->number (substring ins 1 (string-length ins)))) - (if (char=? c #\l) - (append-language-constant (string->number (substring ins 1 (string-length ins)))) - (if (char=? c #\@) - (append-label-definition ins labels) - (append-vm-constant (cdr (assoc ins vm-constants-alist))) - ))))))) + (if (string=? ins "EOF") + (append-eof-char) + (if (char=? c #\') + (append-character (string-ref ins 1)) + (if (char=? c #\") + (append-string (substring ins 1 (string-length ins))) + (if (char=? c #\n) + (append-number (string->number (substring ins 1 (string-length ins)))) + (if (char=? c #\l) + (append-language-constant (string->number (substring ins 1 (string-length ins)))) + (if (char=? c #\@) + (append-label-reference (substring ins 1 (string-length ins)) labels) + (append-vm-constant (cdr (assoc ins vm-constants-alist)))))))))))) (define assemble-instructions (lambda (instrs pc labels) (if (null? instrs) #t (begin (assemble-instruction (car instrs) pc labels) - (assemble-instructions (cdr instrs (+ pc 1) labels)))))) + (assemble-instructions (cdr instrs) (+ pc 1) labels))))) (define discard-to-nl (lambda () (let ((r (read-char))) (if (char=? r #\newline) #t @@ -126,6 +134,42 @@ (not (is-space? z))))))) +(define read-string-literal-state-backslash + (lambda (acc) + (let ((c (read-char))) + (read-string-literal + (if (char=? c #\n) (cons #\newline acc) + (if (char=? c #\t) (cons #\tab acc) + (cons c acc))))))) + +(define read-string-literal + (lambda (acc) + (let ((c (read-char))) + (if (char=? c #\\) + (read-string-literal-state-backslash acc) + (if (char=? c #\") + (list->string (reverse acc)) + (read-string-literal (cons c acc))))))) + +(define read-char-literal-state-backslash + (lambda () + (let ((c (read-char)) + (closeq (read-char))) + (let ((x (if (char=? c #\n) #\newline + (if (char=? c #\t) #\tab + c)))) + (list->string (list #\' c #\')))))) + +(define read-char-literal + (lambda () + (let ((c (read-char))) + (if (char=? c #\\) + (read-char-literal-state-backslash) + (let ((x (read-char))) + (if (char=? x #\') + (list->string (list #\' c #\')) + (display "Error: expected '"))))))) + (define read-to-ws (lambda (c) (letrec ((helper (lambda (acc) @@ -144,16 +188,24 @@ (begin (drop-chars-until (lambda (z) (char=? z #\newline))) (read-assembly instrs pc labels)) - (let ((tok (read-to-ws c))) + (let ((tok (if (char=? c #\") + (read-string-literal (list c)) + (if (char=? c #\') + (read-char-literal) + (read-to-ws c))))) (if (char=? c #\:) - (let ((lbl (make-label-definition tok pc))) - (read-assembly instrs pc (cons lbl labels))) - (read-assembly (cons tok instrs) (+ pc 1) labels)))))))) + (let ((lbl (make-label-definition tok pc))) + (read-assembly instrs pc (cons lbl labels))) + (read-assembly (cons tok instrs) + (if (char=? c #\") + (+ pc (- (string-length tok) 1)) + (+ pc 1)) + labels)))))))) (define assemble (lambda () (let ((instrs-labels (read-assembly '() 0 '()))) - (assemble-instructions (car instrs-labels) + (assemble-instructions (car instrs-labels) 0 (cdr instrs-labels))))) (assemble) From 040033ee964d2ab817e7c1e292214ff9d195121c Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sat, 7 May 2022 14:26:11 -0400 Subject: [PATCH 25/31] fix newline and tab reading in scheme assembler. --- assembler.sch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembler.sch b/assembler.sch index 0dbb95e..b53289a 100644 --- a/assembler.sch +++ b/assembler.sch @@ -158,7 +158,7 @@ (let ((x (if (char=? c #\n) #\newline (if (char=? c #\t) #\tab c)))) - (list->string (list #\' c #\')))))) + (list->string (list #\' x #\')))))) (define read-char-literal (lambda () From e9a87c0cd9eec1920b49ce555a09c28a0cae6e21 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sat, 7 May 2022 14:26:47 -0400 Subject: [PATCH 26/31] in builtin definition of make-string, jump to the __vector_fill code body, not the cons box. --- schemer.sch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schemer.sch b/schemer.sch index d5d0dd4..53aeebd 100644 --- a/schemer.sch +++ b/schemer.sch @@ -1356,7 +1356,7 @@ (append-instruction (asm-label-definition "__make_string_two_args")) ; (s) (assembly-set-arg 0) (append-instructions - ins-push (asm-label-reference "vector_fill") + ins-push (asm-label-reference "__vector_fill") ins-jmp)) (begin From 477b7f181e83a4692a7f7d52b3e3402f3259b76c Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Sat, 7 May 2022 14:27:16 -0400 Subject: [PATCH 27/31] the compiler was mysteriously blowing up on stringtest.... --- stringtest.sch | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/stringtest.sch b/stringtest.sch index 8e31961..5e13f0e 100644 --- a/stringtest.sch +++ b/stringtest.sch @@ -41,10 +41,12 @@ (display x) ) -(print-char #\newline) -(print-char #\") -(print-char #\newline) -(print-char #\') -(print-char #\newline) -(print-char #\\) -(print-char #\newline) +(begin + (print-char #\newline) + (print-char #\") + (print-char #\newline) + (print-char #\') + (print-char #\newline) + (print-char #\\) + (print-char #\newline) + ) From a2e7fd1da616abbb46f9c57dc2a1f05e65ee41b3 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Thu, 11 May 2023 09:01:04 -0700 Subject: [PATCH 28/31] Fix build warnings. --- assembler.l | 2 ++ interpreter.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/assembler.l b/assembler.l index e3254eb..84a59e4 100644 --- a/assembler.l +++ b/assembler.l @@ -20,6 +20,8 @@ #include #include #include +#include + #include "interpreter.h" #define PACK_NUM(x) htonl(MAKE_NUM(x)) diff --git a/interpreter.c b/interpreter.c index adf700a..9cd6671 100644 --- a/interpreter.c +++ b/interpreter.c @@ -40,6 +40,7 @@ #include #include #include +#include /* The file interpreter.h defines all the opcodes, and the basic types @@ -566,7 +567,10 @@ static inline void inspector(void){ case 'c': while(getchar() != '\n'); single_step = 0; return; case 'p': { int addr; - scanf("%d", &addr); + if(scanf("%d", &addr) < 1){ + fprintf(stderr, "Invalid command. Expected 'p '\n"); + break; + } while(getchar()!='\n'); fprintf(stderr, "%d:\t", addr); print_cell(stderr, memory[addr]); @@ -574,7 +578,10 @@ static inline void inspector(void){ } break; case 's': { int offset; - scanf("%d", &offset); + if(scanf("%d", &offset) < 1){ + fprintf(stderr, "Invalid command. Expected 's '\n"); + break; + } while(getchar()!='\n'); fprintf(stderr, "STACK(%d):\t", offset); print_cell(stderr, STACK(offset)); From bf68db5d928da5c166295639d1beee6899fad88b Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Thu, 11 May 2023 09:01:25 -0700 Subject: [PATCH 29/31] Fix peephole by adding info for all instructions. --- peephole.sch | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/peephole.sch b/peephole.sch index 3fbb0a6..47bb6da 100644 --- a/peephole.sch +++ b/peephole.sch @@ -33,6 +33,10 @@ (define ins-islconst "ISLCONST") (define ins-ischr "ISCHR") (define ins-isins "ISINS") +(define ins-pbin "PBIN") +(define ins-pblconsti "PBLCONSTI") +(define ins-pbvconsti "PBVCONSTI") +(define ins-pbptri "PBPTRI") (define instruction-infos (list @@ -55,8 +59,8 @@ (list ins-band #f 2 -1 #f #t) (list ins-getc #f 0 1 #f #t) (list ins-dump #f 0 0 #f #t) - (list ins-pint #f 0 1 #f #t) - (list ins-pchr #f 0 1 #f #t) + (list ins-pint #f 1 -1 #f #t) + (list ins-pchr #f 1 -1 #f #t) (list ins-stor #f 3 -2 #f #f) (list ins-load #f 2 -1 #f #f) @@ -68,6 +72,10 @@ (list ins-islconst #f 1 0 #f #t) (list ins-ischr #f 1 0 #f #t) (list ins-isins #f 1 0 #f #t) + (list ins-pbin #f 1 -1 #f #t) + (list ins-pblconsti #f 1 -1 #f #t) + (list ins-pbvconsti #f 1 -1 #f #t) + (list ins-pbptri #f 1 -1 #f #t) (list ins-call #t) (list ins-ret #t) @@ -87,11 +95,16 @@ (define is-terminal-instr? (lambda (tok) (and (is-instr? tok) - (cadr (lookup-instr-info tok))))) + (let ((info (lookup-instr-info tok))) + (and (not (null? info)) + (cadr info)))))) (define instr-consumption-depth (lambda (tok) - (if (is-instr? tok) (caddr (lookup-instr-info tok)) 0))) + (if (is-instr? tok) + (let ((info (lookup-instr-info tok))) + (if (not (null? info)) + (caddr info) 0)) 0))) (define instr-stack-delta (lambda (tok) @@ -458,11 +471,16 @@ (letrec ((helper (lambda (acc) (let ((tok (asm-next-token))) (if (not tok) - (reverse acc) - (if (or (is-terminal-instr? tok) + (begin + (reverse acc)) + (begin + ;; (display "got token: ") + ;; (display tok) + ;; (display "\n") + (if (or (is-terminal-instr? tok) (is-definition? tok)) (reverse (cons tok acc)) - (helper (cons tok acc)))))))) + (helper (cons tok acc))))))))) (helper '())))) @@ -578,8 +596,11 @@ (let ((s (split bb len))) (let ((xs (car s)) (ys (cdr s))) + ;; (display "Checking sequence: ") + ;; (display xs) + ;; (display "\n") (if (check xs) - (cons #t (repl xs ys)) + (cons #t (repl xs ys)) (cons changed bb)))) (cons changed bb)))) (cons changed bb) @@ -597,10 +618,11 @@ (define go (lambda () - (let ((bb (peephole (asm-read-basic-block)))) + (let ((bb (peephole (asm-read-basic-block)))) (if (not (null? bb)) (begin - (display ";; basic block\n") + ;; (display ";; basic block\n") + (display "") (print-basic-block bb) (go)) '())))) From b37960b81a3687e72c2c7a4122238237dffed304 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Thu, 11 May 2023 22:28:02 -0700 Subject: [PATCH 30/31] Cleanup and add some more comments to compiler. --- schemer.sch | 829 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 511 insertions(+), 318 deletions(-) diff --git a/schemer.sch b/schemer.sch index 53aeebd..80246b0 100644 --- a/schemer.sch +++ b/schemer.sch @@ -15,35 +15,46 @@ ;; You should have received a copy of the GNU General Public License ;; along with toy-bytecode. If not, see . -; utility functions -(define last (lambda (l) (if (and (not (null? l)) - (pair? l) - (pair? (cdr l)) - (not (null? (cdr l)))) - (last (cdr l)) - l))) - -(define string-is-numeric? (lambda (str) - (let ((first-char (string-ref str 0)) - (strlen (string-length str)) - (test (lambda (c v) (if v (char-is-digit? c) v)))) - (and (< 0 strlen) - (if (< 1 strlen) - (and - (or (char=? first-char #\-) - (char=? first-char #\+) - (char-is-digit? first-char)) - (string-fold test #t str 1 (string-length str))) - (char-is-digit? first-char)))))) - - -; The underlying VM uses a tagged memory model that differentiates between -; numbers, pointers, vm constants, and language constants. -; Numeric values in the assembly must be tagged with an appropriate identifier -; to convince the assembler to tag the cell with the appropriate type. -; - -; string constants for each instruction +;; Section 0: General Utility Functions +;; +;; These seem more or less standard lib-like but don't exist in the +;; scheme standard lib. + +;; Equivalent to CL last function, given a pair if the cdr is a pair +;; or a pair with null cdr, return the it else return last of the cdr. +(define last + (lambda (l) + (if (and (not (null? l)) + (pair? l) + (pair? (cdr l)) + (not (null? (cdr l)))) + (last (cdr l)) + l))) + +;; Return true if a string represents a decimal number with an +;; optional +/- prefix +(define string-is-numeric? + (lambda (str) + (let ((first-char (string-ref str 0)) + (strlen (string-length str)) + (test (lambda (c v) (if v (char-is-digit? c) v)))) + (and (< 0 strlen) + (if (< 1 strlen) + (and + (or (char=? first-char #\-) + (char=? first-char #\+) + (char-is-digit? first-char)) + (string-fold test #t str 1 (string-length str))) + (char-is-digit? first-char)))))) + + +;; Section 1: Assembly Code Generation Helpers +;; +;; First we define symbols for all the commonly used instructions, +;; then some helper functions to generate ASM syntax for the assembler +;; executable. + +;; String constants for common instructions (define ins-push "PUSH") (define ins-pop "POP") (define ins-swap "SWAP") @@ -71,65 +82,136 @@ (define ins-bor "BOR") (define ins-band "BAND") -;; The instructions are all used at most once in the compiler so its -;; cheaper to include them as string literals then to clutter up the -;; environment with them. - - -;; (define ins-div "DIV") -;; (define ins-mod "MOD") -;; (define ins-getc "GETC") -;; (define ins-dump "DUMP") -;; (define ins-pint "PINT") -;; (define ins-pchr "PCHR") -;; (define ins-islconst "ISLCONST") -;; (define ins-ischr "ISCHR") -;; (define ins-isins "ISINS") - -; Language constants +;; The instructions below are all used at most once in the compiler so +;; its cheaper to include them as string literals then to clutter up +;; the environment with them. + +;; (define ins-div "DIV") +;; (define ins-mod "MOD") +;; (define ins-getc "GETC") +;; (define ins-dump "DUMP") +;; (define ins-pint "PINT") +;; (define ins-pchr "PCHR") +;; (define ins-islconst "ISLCONST") +;; (define ins-ischr "ISCHR") +;; (define ins-isins "ISINS") +;; (define ins-pbin "PBIN") +;; (define ins-pblconsti "PBLCONSTI") +;; (define ins-pblvconsti "PBLVCONSTI") +;; (define ins-pbptri "PBPTRI") +;; (define ins-brk "BRK") + +;; The underlying VM uses a tagged memory model that differentiates between +;; numbers, pointers, vm constants, and language constants. +;; Numeric values in the assembly must be tagged with an appropriate identifier +;; to convince the assembler to tag the cell with the appropriate type. + +;; Language constants (define false-value "FALSE") (define true-value "TRUE") -; return a string containing the asm representation of a number +;; Return a string containing the asm representation of a number (define asm-number (lambda (x) (string-append "n" (if (number? x) (number->string x) x)))) ; return a string containing the asm representation of a pointer (define asm-pointer (lambda (x y) (string-append "p" (if (number? x) (number->string x) x) "," (if (number? x) (number->string y) y) ))) -; return a string containing the asm representation of a language constant +;; Return a string containing the asm representation of a language constant (define asm-lang-const (lambda (x) (string-append "l" (if (number? x) (number->string x) x)))) +;; Return a string containing the asm representation of a label reference (define asm-label-reference (lambda (name) (string-append "@" name))) + +;; Return a string containing the asm representation of a label with undefined size (define asm-label-definition (lambda (name) (string-append ":" name))) + +;; Return a string containing the asm representation of a label with defined size (define asm-label-definition-sz (lambda (name sz) (string-append (asm-label-definition name) (string-append "," (number->string sz))))) - -(define ptr-type-offset (asm-number 0)) - +;; +;; Section 2: Assembly and Compiler Constants for Scheme-Level Datastructure +;; +;; We support three (and pretend we support four) datatypes at the +;; scheme level and we want to make sure we're able to distinguish +;; them at the bytecode level: +;; * pairs (consboxes) +;; * vectors +;; * strings +;; * symbols + +;; When we create Scheme level objects (strings, vectors, symbols), +;; we store a type tag at offset zero from the object base. +(define asm-ptr-type-offset (asm-number 0)) + +;; Consboxes don't get the type field, we just store the car at offset +;; 0 and cdr at offset 1 (define consbox-size 2) (define asm-consbox-size (asm-number consbox-size)) -(define consbox-car-offset (asm-number 0)) -(define consbox-cdr-offset (asm-number 1)) - -(define vector-type-flag (asm-lang-const 2)) -(define vector-length-offset (asm-number 1)) -(define raw-vector-elems-offset 2) -(define vector-elems-offset (asm-number raw-vector-elems-offset)) -(define string-type-flag (asm-lang-const 3)) -(define string-length-offset vector-length-offset) -(define string-chars-offset vector-elems-offset) -(define symbol-type-flag (asm-lang-const 3)) - - -; A few builtin forms are handled specially -; by the compiler. Some of these will later -; be subsumed by macro capabilities but for right -; now they are just special voodoo. -; -; This list associates the symbols with special -; compiler functions. +(define asm-consbox-car-offset (asm-number 0)) +(define asm-consbox-cdr-offset (asm-number 1)) + +;; Vectors, Strings, and Symbols are all the same under the hood here, +;; except we don't actually deal with symbols. +(define vector-type-flag (asm-lang-const 2)) +(define asm-vector-length-offset (asm-number 1)) +(define raw-vector-elems-offset 2) +(define asm-vector-elems-offset (asm-number raw-vector-elems-offset)) +(define string-type-flag (asm-lang-const 3)) +(define asm-string-length-offset asm-vector-length-offset) +(define asm-string-chars-offset asm-vector-elems-offset) +;; We should actually support symbols +(define symbol-type-flag (asm-lang-const 3)) + +;; +;; Section 3: Label Generation +;; +;; We should probably be a bit more careful to ensure the user can't +;; possibly introduce symbols that collide with our compiler generated +;; symbols, but instead we just claim ownership of the __ prefix. + +;; Check if the given string is "safe" as an assembly label. Meaning +;; all chars are alphanumeric+'_' +(define asm-safe + (lambda (s) + (list->string + (reverse + (string-fold (lambda (c acc) + (if (char-is-asm-safe? c) (cons c acc) + (cons #\_ acc))) '() s 0 (string-length s)))))) + +;; Keep track of how many labels we've generated, we'll append and +;; increment this every time we create a new label. +(define label-counter 0) + +;; Generate a fresh label to be used in the assembly. Primarily used +;; for lambdas and constants appearing in the source code (e.g., +;; strings or vectors). The argument `lbl` may be nil, or may be some +;; related string that the calling code would like included in the +;; generated label. This is ued to include when compiling +;; `(define foo (lambda ...))` expressions to include `foo` in the +;; label of generated for the lambda to make it slightly easier to read +;; the assembly. +(define fresh-label (lambda (lbl) + (set! label-counter (+ label-counter 1)) + (string-append (if lbl + (string-append "__anonymous_" + (asm-safe lbl)) + "__anonymous") + (number->string label-counter)) + )) + +;; +;; Section 4: Special Forms Preamble +;; +;; A few builtin forms are handled specially by the compiler. Some of +;; these will later be subsumed by macro capabilities but for right +;; now they are just special voodoo. +;; +;; This list associates the symbols with special compiler +;; functions. The handlers are actually defined below with other +;; compilation functions. (define special-forms (lambda () (list @@ -146,46 +228,51 @@ ) )) -; search the list of builtin forms for a -; particular form +;; Search the list of builtin forms for a +;; particular form (define find-special (lambda (f) (let ((x (assoc f (special-forms)))) (and x (cdr x))))) -; The top-level-env is a list containing the list of symbols -; defined by the compiler at the top level. -; -; The runtime environment is represented as a list of vectors -; references to symbols are replaced with a traversal of this -; structure based on the level of enclosing scope where the symbol was -; defined, and the index of the variable in that scope. This is taken -; directly from the SECD machine's representation of the environment. -; -; For example suppose we have something like: -; (((lambda (x) -; (lambda (z) (+ x z))) -; 5) -; 7) -; -; The inner lambda (that gets returned and applied to the arg 7) refers to three -; different symbols: '+' 'x' and 'z' that are each declared at a different depth -; in the enclosing environment. -; When this lambda is evaluated (with the argument 7) the environment will look like: -; ([7] -; [5] -; ["=" "null?" "cons" "car" "cdr" "+" ...]) -; -; So the reference to symbol 'z' will be compiled to (vector-ref (car env) 0) -; the reference to symbol 'x' will be compiled to (vector-ref (car (cdr env)) 0) and -; the reference to symbol '+' will be compiled to (vector-ref (car (cdr (cdr env))) 5) -; -; Note: this isn't strictly accurate since the symbols 'vector-ref', -; 'car' and 'cdr' are themselves defined in the environment and -; would thus require lookups making this expansion impossible. -; What really happens is that a non-closure form of the car and -; cdr procedures are invoked directly. See the functions -; u-call-* below. +;; +;; Section 5: The Environment +;; +;; The top-level-env is a list containing the list of symbols defined +;; by the compiler at the top level. +;; +;; The runtime environment is represented as a list of vectors +;; references to symbols are replaced with a traversal of this +;; structure based on the level of enclosing scope where the symbol +;; was defined, and the index of the variable in that scope. This is +;; taken directly from the SECD machine's representation of the +;; environment. +;; +;; For example suppose we have something like: +;; (((lambda (x) +;; (lambda (z) (+ x z))) +;; 5) +;; 7) +;; +;; The inner lambda (that gets returned and applied to the arg 7) +;; refers to three different symbols: '+' 'x' and 'z' that are each +;; declared at a different depth in the enclosing environment. When +;; this lambda is evaluated (with the argument 7) the environment will +;; look like: +;; ([7] +;; [5] +;; ["=" "null?" "cons" "car" "cdr" "+" ...]) +;; +;; So the reference to symbol 'z' will be compiled to (vector-ref (car env) 0) +;; the reference to symbol 'x' will be compiled to (vector-ref (car (cdr env)) 0) and +;; the reference to symbol '+' will be compiled to (vector-ref (car (cdr (cdr env))) 5) +;; +;; Note: this isn't strictly accurate since the symbols 'vector-ref', +;; 'car' and 'cdr' are themselves defined in the environment and +;; would thus require lookups making this expansion impossible. +;; What really happens is that a non-closure form of the car and +;; cdr procedures are invoked directly. See the functions +;; u-call-* below. (define top-level-env (quote ((("equal?" "equal") @@ -237,6 +324,14 @@ (define initial-env-label "__initial_environment") +;; +;; Section 6: Assembly Output Helpers +;; +;; Functions for outputting assembly code. These get used by the +;; compilation routines to actually generate assembly. + +;; Output a named literal consbox definition to the assembly stream +;; Returns the name of the consbox (define append-named-consbox (lambda (name car-value cdr-value) (append-instructions @@ -245,13 +340,20 @@ cdr-value) name)) +;; Output a literal "anonymous" consbox defintion by generating a +;; fresh label. Returns the label used so that subsequent code can +;; refer to it. (define append-consbox (lambda (car-value cdr-value) - (append-named-consbox (fresh-label) car-value cdr-value))) + (append-named-consbox (fresh-label #f) car-value cdr-value))) +;; Given a list of assembly values (numbers, constants, or symbols), +;; output a literal vector definition with the same elements to the +;; assembly code. +;; Returns a fresh label genearted to refer to the vector (define append-list-as-vector (lambda (vec-list) - (let ((lbl (fresh-label))) + (let ((lbl (fresh-label #f))) (append-instructions (asm-label-definition-sz lbl (+ (length vec-list) raw-vector-elems-offset)) vector-type-flag @@ -259,6 +361,10 @@ (apply append-instructions vec-list) lbl))) +;; Append the initial environment as a literal list with one element +;; that is a vector (as described above) to the assembly stream. +;; +;; This should only be called once. (define append-initial-env (lambda () (append-named-consbox "__nil" @@ -270,22 +376,14 @@ (append-list-as-vector (map (lambda (l) (asm-label-reference (cadr l))) (car top-level-env)))) (asm-label-reference "__nil")))) -; (fresh-label) is used to generate labels for each lambda -; expression and for string constants. -(define label-counter 0) -(define fresh-label (lambda () - (set! label-counter (+ label-counter 1)) - (string-append "__anonymous" (number->string label-counter)) - )) - -; append an instruction to the ouptut stream +;; Append an instruction to the ouptut stream (define append-instruction (lambda (ins) (begin (display ins) (display "\n")))) -; append a list of instructions to the output stream +;; Append a list of instructions to the output stream (define append-instructions (lambda inss (letrec ((helper (lambda (inss) @@ -295,69 +393,88 @@ (helper (cdr inss))))))) (helper inss)))) +;; Section 7: Intrinsics and Compilation +;; +;; We're now actually into the guts of the compiler. +;; The conventions are as follows: +;; Functions of the form +;; - 'assembly-foo' take no arguments, append asm instructions +;; to the output stream for completing the task +;; foo, and return no useful result. +;; +;; - 'u-call-foo' serve as a wrapper around the assembly-foo +;; functions. For larger blocks of assembly code +;; the u-call-foo insert a CALL to the definitions, +;; shorter ones are inlined. Again, no useful result +;; is returned. +;; +;; - 'compile-foo' these are the main compiler functions. All of +;; these take atleast two arguments, the s-expression +;; to compile and the symbolic environment list used +;; to resolve references. Some of these take a boolean +;; 'rest' argument which is a bad hack to support tail-call +;; optimization. If 'rest' is false it means their is +;; definitely no continuation to this expression and so +;; a closure invocation can be optimized by not storing the +;; return environment and using a JMP rather than a CALL +;; (see assembly-funcall vs. assembly-tailcall below). +;; All compile-* functions must return either 0-arity function +;; or false. The 0-arity function represents work that is +;; being delayed until after the compilation of the main +;; program body, e.g., the body of lambda expressions. +;; It is vital that these return values be propagated out +;; to the main compiler loop 'do-compiler-task' +;; +;; When generating non-trivial sequences of assembly functions, we'll +;; track the stack as a list in comments with the bottom of the stack +;; as the first element and the top in the last. + +;; +;; Subsection 7.1: Consbox Primitives ASM +;; +;; Assembly for the primitive list functions car, cdr, and cons + +(define assembly-car (lambda () + (append-instructions ; (pair) + ins-push asm-consbox-car-offset ; (pair car-offset) + ins-load))) ; (car) + +(define assembly-cdr (lambda () + (append-instructions ; (pair) + ins-push asm-consbox-cdr-offset ; (pair cdr-offset) + ins-load))) ; (cdr) -; -; We're now actually into the guts of the compiler. -; The conventions are as follows: -; Functions of the form -; - 'assembly-foo' take no arguments, append asm instructions -; to the output stream for completing the task -; foo, and return no useful result. -; -; - 'u-call-foo' serve as a wrapper around the assembly-foo -; functions. For larger blocks of assembly code -; the u-call-foo insert a CALL to the definitions, -; shorter ones are inlined. Again, no useful result -; is returned. -; -; - 'compile-foo' these are the main compiler functions. All of -; these take atleast two arguments, the s-expression -; to compile and the symbolic environment list used -; to resolve references. Some of these take a boolean -; 'rest' argument which is a bad hack to support tail-call -; optimization. If 'rest' is false it means their is -; definitely no continuation to this expression and so -; a closure invocation can be optimized by not storing the -; return environment and using a JMP rather than a CALL -; (see assembly-funcall vs. assembly-tailcall below). -; All compile-* functions must return either 0-arity function -; or false. The 0-arity function represents work that is -; being delayed until after the compilation of the main -; program body, e.g., the body of lambda expressions. -; It is vital that these return values be propagated out -; to the main compiler loop 'do-compiler-task' - - -; assembly for the primitive list functions car, cdr, and cons -; (ptr) -> ((car ptr)) -(define assembly-car (lambda () (append-instructions ins-push consbox-car-offset ins-load))) - -; (ptr) -> ((cdr ptr)) -(define assembly-cdr (lambda () (append-instructions ins-push consbox-cdr-offset ins-load))) - - -; (cdr car) -> ((cons car cdr)) (define assembly-cons (lambda () - (append-instructions - ins-push asm-consbox-size ; (car cdr 2) - ins-aloc ; (car cdr hp) - ins-push consbox-cdr-offset ; (car cdr hp 1) - ins-stor ; (car hp) cdr stored - ins-push consbox-car-offset ; (car hp 0) - ins-stor) ; (hp) cdr stored + (append-instructions ; (car cdr) + ins-push asm-consbox-size ; (car cdr 2) + ins-aloc ; (car cdr hp) + ins-push asm-consbox-cdr-offset ; (car cdr hp 1) + ins-stor ; (car hp) cdr stored + ins-push asm-consbox-car-offset ; (car hp 0) + ins-stor) ; (hp) cdr stored )) +(define assembly-set-car + (lambda () + (append-instructions ; (value pair) + ins-push asm-consbox-car-offset ; (value pair car-offset) + ins-stor))) ; (pair) -; top is the cons box to set, then the new value -(define assembly-set-car - (lambda () (append-instructions ins-push consbox-car-offset ins-stor))) (define assembly-set-cdr - (lambda () (append-instructions ins-push consbox-cdr-offset ins-stor))) + (lambda () + (append-instructions ; (value pair) + ins-push asm-consbox-cdr-offset ; (value pair car-offset) + ins-stor))) ; (pair) + +;; +;; Subsection 7.2: Consbox Primitive Invocation +;; +;; These define how to call the primitives car, cdr, set-car, set-cdr, +;; cons, and make-vector as part of larger compiler generated +;; sequences (e.g., function application) for car, cdr, set-car, and +;; set-cdr we just inline the assembly. For cons and make-vector we +;; do a machine level call into a function. -; these define how to call the three primitives car, cdr, and cons as -; part of larger compiler generated sequences (e.g., function application) -; for car, we just inline the assembly. For cdr and cons we do a machine level -; call into a function. (define u-call-car (lambda () (assembly-car))) ; car is 3 instructions, a function call is the same length ; so there is no reason not to inline it. @@ -372,18 +489,22 @@ (define u-call-set-car (lambda () (assembly-set-car))) (define u-call-set-cdr (lambda () (assembly-set-cdr))) -(define u-call-make-vector (lambda () (append-instructions - ins-push (asm-label-reference "__u_make_vector_nofill") - ins-call))) - -; function application convention -; top of stack is the closure to apply, then the arguments -; this is tricky. We need to cons the argument list onto -; the closure's environment, store the existing -; environment pointer to the stack, set the environment -; pointer to the new list, invoke the closure's code, -; then restore the environment pointer on return. -; +(define u-call-make-vector + (lambda () + (append-instructions + ins-push (asm-label-reference "__u_make_vector_nofill") + ins-call))) + +;; +;; Section 7.3: Function Invocation and Argument Handling +;; +;; The convention is that the top of stack is the closure to apply, +;; then the arguments this is tricky. We need to cons the argument +;; list onto the closure's environment, store the existing environment +;; pointer to the stack, set the environment pointer to the new list, +;; invoke the closure's code, then restore the environment pointer on +;; return. +;; (define assembly-make-args-helper (lambda (nr-args) (if (= nr-args 0) #f (begin @@ -398,7 +519,7 @@ (u-call-make-vector) (assembly-make-args-helper nr-args))) -;; special case for referencing arguments to this function (i.e., depth = 0). +;; Special case for referencing arguments to this function (i.e., depth = 0). (define assembly-get-arg (lambda (idx) (append-instruction ins-rdrr) @@ -421,10 +542,12 @@ (append-instruction ins-rdrr) (u-call-car) (append-instructions - ins-push vector-length-offset + ins-push asm-vector-length-offset ins-load))) -; (args clos) -> ((clos args)) +;; Actual assembly code for performing a scheme level function +;; invocation. This is relatively long, so actual callsites will do a +;; macihne level CALL to this stub to perform the functioncall. (define assembly-funcall (lambda () (append-instructions ; (args clos rp) (asm-label-definition "__funcall_tramp") @@ -443,47 +566,49 @@ (u-call-cdr) ; (renv rp clos-code) (append-instruction ins-jmp))) -(define u-call-funcall (lambda () - (append-instructions ins-push (asm-label-reference "__funcall_tramp") - ins-call - ins-swap - ins-wtrr))) +(define u-call-funcall + (lambda () + (append-instructions ; (args clos) + ins-push (asm-label-reference "__funcall_tramp") ; (args clos __funcall_tramp) + ins-call ; (envptr retval) + ins-swap ; (retval envptr) + ins-wtrr))) ; (retval) + +;; Tail calls are sneakier because we avoid saving the current env pointer. +(define assembly-tailcall + (lambda () + (append-instructions + (asm-label-definition "__tailcall_tramp") + ins-dup) ; (renv rp args clos clos) + (u-call-car) ; (renv rp args clos env) + (append-instructions + ins-swap ; (renv rp args env clos) + ins-rot) ; (renv rp clos args env) + (u-call-cons) ; (renv rp clos (args . env)* ) + (append-instruction ins-wtrr) ; (renv rp clos) rr = (args . env) + ; note that we didn't store the current env + ; this is a tail call so we'll return straight + ; to the current renv/rp! + (u-call-cdr) ; (renv rp code) + (append-instruction ins-jmp) ; we jump into the call with + ; (renv rp) + ; on return we'll have pc = rp, and + ; (renv rval) on the stack + ; just as on return from non-tail call above. + )) -; tail calls are sneakier we avoid saving the current -; env pointer. -; (args clos) -> ((clos args)) - (define assembly-tailcall (lambda () - (append-instructions - (asm-label-definition "__tailcall_tramp") - ins-dup) ; (renv rp args clos clos) - (u-call-car) ; (renv rp args clos env) - (append-instructions - ins-swap ; (renv rp args env clos) - ins-rot) ; (renv rp clos args env) - (u-call-cons) ; (renv rp clos (args . env)* ) - (append-instruction ins-wtrr) ; (renv rp clos) rr = (args . env) - ; note that we didn't store the current env - ; this is a tail call so we'll return straight - ; to the current renv/rp! - (u-call-cdr) ; (renv rp code) - (append-instruction ins-jmp) ; we jump into the call with - ; (renv rp) - ; on return we'll have pc = rp, and - ; (renv rval) on the stack - ; just as on return from non-tail call above. - )) - -(define u-call-tailcall (lambda () - (append-instructions "PUSH" (asm-label-reference "__tailcall_tramp") - "JMP"))) +(define u-call-tailcall + (lambda () + (append-instructions ; (renv rp args clos) + ins-push (asm-label-reference "__tailcall_tramp") ; (renv rp args clos __tailcal_tramp) + ins-jmp))) ; never comes back ; returning is simple since cleanup is handled by the caller (define assembly-funret (lambda () (append-instruction ins-ret))) - -; Assembly for loading a cell from the environment. -; assembly-env-cell places the cons box whose car is -; at the desired offsets on the stack. -; assembly-env-val actually loads the value. + +;; Assembly for loading a cell from the environment. +;; assembly-env-cell places the cons box whose car is at the desired +;; offsets on the stack. assembly-env-val actually loads the value. (define assembly-env-vec (lambda (depth) (append-instructions @@ -528,10 +653,32 @@ (define assembly-nil (lambda () - (append-instructions ins-push (asm-label-reference "__nil") ))) - -; Lookup functions, find a particular symbol in the symbolic environment -; list. These are complimentary to the assembly-env-* functions above. + (append-instructions ins-push (asm-label-reference "__nil") ))) + +;; +;; Subsection 7.3: Symbolic Environment +;; +;; As noted above, the runtime environment is indexed by integers +;; corresponding to when things are declared. The compiler tracks the +;; "symbolic environment" mapping between symbol names and their index +;; in the runtime environment. These routines deal with that mapping. +;; +;; Unlike the runtime environment, the symbolic environment is a list +;; of lists (rather than a list of vectors. Symbols from the most +;; local scope are stored in the car of the list working out to the +;; top-level env. The 'depth' of a symbol is the index of its scsope +;; in the environment, while the 'offset' is the index within this +;; scope. + +;; Lookup the index of a symbol in an list of symbols return #f if not +;; found. +;; * r is the symbol name we're looking up +;; * e is the environment to search in +;; * cont is the continuation to pass the offset to +;; +;; For some reason this handles the case where the element in the list +;; is a list whose car is the symbol we're looking for. Not sure if +;; this is important. (define lookup-reference-offset (lambda (r e cont) (if (null? e) (cont #f) @@ -541,6 +688,12 @@ (lambda (z) (cont (if z (+ z 1) z)))))))) +;; Lookup the depth and offset of a symbol name in the +;; environment. Arguments are the same as for lookup-reference-offset +;; except that `e` is the full (or a tail of) the symbolic +;; environment. +;; +;; Return value is either a conspair of (depth . offset) or #f (define lookup-reference-depth (lambda (r e cont) (if (null? e) (cont #f) @@ -559,14 +712,33 @@ (lambda (r e) (lookup-reference-depth r e (lambda (x) x)))) -; do-compile-task is the main compiler loop. -; it takes a 0-arity function to invoke (or false), -; and recurs on the result of invoking the function. +;; +;; Subsection 7.4: Actually Compiling Stuff +;; +;; The structure/flow of the compilation process is a little +;; convoluted to ensure all (define ...) forms are introduced before +;; their bodies are compiled to allow them to refer to eachother. +;; +;; * compiler-run that reads sexps (via the reader) until EOF is +;; hit and calls compile-sexp on each + +;; * compile-sexp takes a sexp, compiles it, and returns a 0-arity +;; function for any deferred compilation tasks (e.g., lambda +;; bodies). Actual compilation is performed by various +;; `compile-foo` functions specializing on different forms +;; (lists, numbers, special forms defined above, etc...) +;; +;; * do-compile-task is used to recursively evaluate a compilation +;; task expressed as a 0-arity function that returns either a new +;; compilation task or #f on completion. +;; + +;; do-compile-task is the main compiler loop. it takes a 0-arity +;; function to invoke (or false), and recurs on the result of invoking +;; the function. (define do-compile-task (lambda (t) (if t (do-compile-task (t)) #f))) -; Compilation functions - (define compile-number (lambda (c env) (append-instructions ins-push (asm-number c)) #f)) @@ -584,7 +756,7 @@ (define compile-string (lambda (s env) - (let ((strlabel (fresh-label)) + (let ((strlabel (fresh-label #f)) (strlen (calculate-string-length s))) (append-instructions ins-push (asm-label-reference strlabel)) (lambda () @@ -600,7 +772,7 @@ (define compile-symbol (lambda (s env) - (let ((symlabel (fresh-label)) + (let ((symlabel (fresh-label #f)) (symlen (calculate-symbol-length s))) (append-instructions ins-push (asm-label-reference symlabel)) (lambda () @@ -668,6 +840,8 @@ (compile-symbol x env) (compile-reference x env)))))))))) +;; Return the prefix of a list that is a "proper" list. e.g., given +;; `(1 2 3 . 4)` returns `(1 2 3)` (define list-part (lambda (l) (letrec ((helper (lambda (l acc) @@ -697,8 +871,8 @@ ; code is statically defined below initial heap pointer but in order ; to support eval we'll have to do something more clever later. (define compile-lambda - (lambda (l env rest) - (let ((label (fresh-label))) + (lambda (l env rest lbl) + (let ((label (fresh-label lbl))) (append-instructions ins-rdrr ins-push (asm-label-reference label) ) (u-call-cons) @@ -714,14 +888,14 @@ (define compile-let-bindings (lambda (bs env) (if (null? bs) #f - (let ((r2 (compile-sexp (car (cdr (car bs))) env #t))) + (let ((r2 (compile-sexp (car (cdr (car bs))) env #t #f))) (let ((r1 (compile-let-bindings (cdr bs) env))) (lambda () (do-compile-task r1) (do-compile-task r2))))))) (define compile-let - (lambda (l env rest) + (lambda (l env rest lbl) (let ((r1 (compile-let-bindings (car (cdr l)) env)) (e (map (lambda (x) (car x)) (car (cdr l))))) (assembly-make-args (length (cadr l))) @@ -741,14 +915,14 @@ )))) (define compile-set! - (lambda (l env rest) + (lambda (l env rest lbl) (let ((cell-id (lookup-reference (cadr l) env))) - (let ((r (compile-sexp (caddr l) env #t))) + (let ((r (compile-sexp (caddr l) env #t #f))) (assembly-set-env-val (length env) (car cell-id) (cdr cell-id)) r)))) (define compile-letrec - (lambda (l env rest) + (lambda (l env rest lbl) (letrec ((empty-binders (map (lambda (b) (list (car b) (list "quote" '()))) (cadr l))) (helper (lambda (binders body) @@ -761,15 +935,15 @@ (cons empty-binders (helper (reverse (cadr l)) (cddr l)))) - env rest)))) + env rest #f)))) (define compile-begin - (lambda (l env rest) (compile-sequence (cdr l) env rest))) + (lambda (l env rest lbl) (compile-sequence (cdr l) env rest))) (define compile-sequence (lambda (l env rest) (if (null? l) #f - (let ((r1 (compile-sexp (car l) env (if (null? (cdr l)) rest #t) ))) + (let ((r1 (compile-sexp (car l) env (if (null? (cdr l)) rest #t) #f))) (if (not (null? (cdr l))) (append-instruction ins-pop) #f @@ -791,10 +965,10 @@ ; where v is the value of the defined symbol and pre-env and ; post-env are the environments before and after the call. (define compile-define - (lambda (l env rest) + (lambda (l env rest lbl) (append-instruction (string-append ";; Definition of " (car (cdr l)))) (let ((v (lookup-reference (car (cdr l)) env)) - (r (compile-sexp (car (cdr (cdr l))) env #t))) + (r (compile-sexp (car (cdr (cdr l))) env #t (car (cdr l))))) (if v (begin (append-instruction (string-append ";; Updating binding " (car (cdr l)))) @@ -809,10 +983,10 @@ r))) (define compile-and - (lambda (l env rest) - (let ((out-label (fresh-label))) + (lambda (l env rest lbl) + (let ((out-label (fresh-label #f))) (letrec ((helper (lambda (es rs) - (let ((r (compile-sexp (car es) env #t)) + (let ((r (compile-sexp (car es) env #t #f)) (es (cdr es))) (if (null? es) (begin @@ -831,16 +1005,16 @@ (helper (cdr l) (lambda () #f))))))) (define compile-or - (lambda (l env rest) - (let ((out-label (fresh-label))) + (lambda (l env rest lbl) + (let ((out-label (fresh-label #f))) (letrec ((helper (lambda (es rs) - (let ((r (compile-sexp (car es) env #t)) + (let ((r (compile-sexp (car es) env #t #f)) (es (cdr es))) (if (null? es) (begin (append-instruction (asm-label-definition out-label)) (lambda () (do-compile-task r) (rs))) - (let ((next-term (fresh-label))) + (let ((next-term (fresh-label #f))) (append-instructions ins-dup ins-push false-value ins-eq @@ -858,28 +1032,28 @@ ; when we can detect application of a builtin ; we can avoid function call overhead and just inline the assembly (define compile-if - (lambda (l env rest) + (lambda (l env rest lbl) (if (not (= (length l) 4)) (begin (display "Error in compile-if wrong number of arguments\n\t") (display l) (newline) (quit)) - (let ((false-label (fresh-label)) - (join-label (fresh-label)) + (let ((false-label (fresh-label #f)) + (join-label (fresh-label #f)) (conditional (car (cdr l))) (true-case (car (cdr (cdr l)))) (false-case (car (cdr (cdr (cdr l)))))) - (let ((r1 (compile-sexp conditional env #t))) + (let ((r1 (compile-sexp conditional env #t #f))) (append-instructions ins-push false-value ins-eq ins-push (asm-label-reference false-label) ins-jtrue) - (let ((r2 (compile-sexp true-case env rest))) + (let ((r2 (compile-sexp true-case env rest #f))) (append-instructions ins-push (asm-label-reference join-label) ins-jmp (asm-label-definition false-label)) - (let ((r3 (compile-sexp false-case env rest))) + (let ((r3 (compile-sexp false-case env rest #f))) (append-instruction (asm-label-definition join-label)) (lambda () (do-compile-task r1) @@ -906,14 +1080,14 @@ (compile-atom s env #t))))) (define compile-quote - (lambda (s env rest) + (lambda (s env rest lbl) (compile-quoted-sexp (car (cdr s)) env rest))) (define compile-arguments (lambda (n l env) (if (null? l) (assembly-make-args n) - (let ((r2 (compile-sexp (car l) env #t))) + (let ((r2 (compile-sexp (car l) env #t #f))) (let ((r1 (compile-arguments n (cdr l) env))) (lambda () (do-compile-task r1) @@ -921,12 +1095,12 @@ )))))) (define compile-list - (lambda (l env rest) + (lambda (l env rest lbl) (let ((s (find-special (car l)))) (if s - (s l env rest) + (s l env rest lbl) (let ((r1 (compile-arguments (length (cdr l)) (cdr l) env))) - (let ((r2 (compile-sexp (car l) env #t))) + (let ((r2 (compile-sexp (car l) env #t #f))) (if rest (u-call-funcall) (u-call-tailcall) @@ -936,9 +1110,9 @@ (do-compile-task r2)))))))) (define compile-sexp - (lambda (s env rest) + (lambda (s env rest lbl) (if (list? s) - (compile-list s env rest) + (compile-list s env rest lbl) (compile-atom s env #f)))) (define assembly-builtin-header @@ -949,11 +1123,14 @@ (asm-label-reference uu-name)) (append-instruction (asm-label-definition uu-name))))) +;; +;; Section 8: Compiler Intrinsics ASM +;; (define define-builtin-functions (lambda (initial-env) (begin - (let ((loop (fresh-label)) - (out (fresh-label))) + (let ((loop (fresh-label #f)) + (out (fresh-label #f))) ;; (display "variadic plist") (append-instructions (asm-label-definition "__u_make_varargs_list") @@ -985,7 +1162,7 @@ (u-call-car) ;; (rp nr-fixed-params (- i 1) i (- i 1) (car env)) (append-instructions ins-swap ;; (rp nr-fixed-params (- i 1) l (car env) (- i 1)) - ins-push vector-elems-offset ;; (rp nr-fixed-params (- i 1) l (car env) (- i 1) vector-elems-offset) + ins-push asm-vector-elems-offset ;; (rp nr-fixed-params (- i 1) l (car env) (- i 1) vector-elems-offset) ins-add ;; (rp nr-fixed-params (- i 1) l (car env) (+ (- i 1) vector-elems-offset)) ins-load ;; (rp nr-fixed-params (- i 1) l (aref (car env) (- i 1))) ins-swap) ;; (rp nr-fixed-params (- i 1) (aref (car env) (- i 1)) l) @@ -1000,7 +1177,7 @@ (u-call-car) ;; (rp l nr-fixed-params (car env))) (append-instructions ins-swap ;; (rp l (car env) nr-fixed-params) - ins-push vector-elems-offset ;; (rp l (car env) nr-fixed-params vector-elems-offset) + ins-push asm-vector-elems-offset ;; (rp l (car env) nr-fixed-params vector-elems-offset) ins-add ;; (rp l (car env) (+ nr-fixed-params vector-elems-offset)) ins-stor ;; (rp (car env)) ins-ret))) ;; ((car env)) @@ -1093,8 +1270,8 @@ (append-instruction "MOD") (assembly-funret)) - (let ((shl-label (fresh-label)) - (out-label (fresh-label))) + (let ((shl-label (fresh-label "shl")) + (out-label (fresh-label #f))) (assembly-builtin-header "arithmetic_shift") (assembly-get-arg 0) (assembly-get-arg 1) @@ -1194,7 +1371,7 @@ (assembly-funret) (append-instructions (asm-label-definition "__string_q_is_ptr") - ins-push ptr-type-offset + ins-push asm-ptr-type-offset ins-load ins-push string-type-flag ins-eq) @@ -1245,7 +1422,7 @@ ins-eq ; (rp x (= x nil)) ins-push (asm-label-reference "__pair_q_isnil") ; (rp x (= x nil) @pair_q_is_nil) ins-jtrue ; (rp x) - ins-push ptr-type-offset ; (rp x 0) + ins-push asm-ptr-type-offset ; (rp x 0) ins-load "ISLCONST" ins-push (asm-label-reference "__pair_q_islconst") @@ -1257,7 +1434,7 @@ (begin (assembly-builtin-header "vector_length") (assembly-get-arg 0) - (append-instructions ins-push string-length-offset ins-load) + (append-instructions ins-push asm-string-length-offset ins-load) (assembly-funret)) (begin @@ -1266,7 +1443,7 @@ (assembly-get-arg 0) ; (c vec) (assembly-get-arg 1) ; (c vec n) (append-instructions - ins-push vector-elems-offset ; (c vec n 2) + ins-push asm-vector-elems-offset ; (c vec n 2) ins-add ; (c vec (+ n 2)) ins-stor) ; (vec) (assembly-funret)) @@ -1281,7 +1458,7 @@ ins-swap ; (n s) ins-dup ; (n s s) ins-rot ; (s n s) - ins-push vector-length-offset ; (s n s vector-length-offset) + ins-push asm-vector-length-offset ; (s n s vector-length-offset) ins-load ; (s n total-length) ins-swap ; (s total-length n) ins-dup ; (s total-length n n) @@ -1291,7 +1468,7 @@ ins-jtrue ; (s n) ins-dup ; (s n n) ins-rot ; (n s n) - ins-push vector-elems-offset ; (n s n offset) + ins-push asm-vector-elems-offset ; (n s n offset) ins-add) ; (n s (+ n offset)) (assembly-get-arg 1) ; (n s (+ n offset) v) (append-instructions @@ -1311,14 +1488,14 @@ (asm-label-definition "__u_make_vector_nofill") ; (n rp) ins-swap ; (rp n) ins-dup ; (n n) - ins-push vector-elems-offset ; (n n vector-elems-offset) + ins-push asm-vector-elems-offset ; (n n vector-elems-offset) ins-add ; (n (+ n vector-elems-offset)) ins-aloc ; (n v) ins-push vector-type-flag ; (n v vector-type-flag) ins-swap ; (n vector-type-flag v) - ins-push ptr-type-offset ; (n vector-type-flag v ptr-type-offset) + ins-push asm-ptr-type-offset ; (n vector-type-flag v ptr-type-offset) ins-stor ; (n v) - ins-push vector-length-offset ; (n v vector-size-offset) + ins-push asm-vector-length-offset ; (n v vector-size-offset) ins-stor ; (v) ins-ret) (assembly-builtin-header "make_vector") @@ -1336,14 +1513,14 @@ (assembly-get-arg 0) ; (n) (append-instructions ins-dup ; (n n) - ins-push string-chars-offset ; (n n 2) + ins-push asm-string-chars-offset ; (n n 2) ins-add ; (n (+ n 2)) ins-aloc ; (n s) - ins-push string-length-offset ; (n s 1) + ins-push asm-string-length-offset ; (n s 1) ins-stor ; (s) ins-push string-type-flag ; (s string-type-flag) ins-swap ; (string-type-flag s) - ins-push ptr-type-offset ; (string-type-flag s 0) + ins-push asm-ptr-type-offset ; (string-type-flag s 0) ins-stor ; (s) ) (assembly-nrargs) ; (s nr-args) @@ -1397,9 +1574,9 @@ (assembly-get-arg 0) (assembly-get-arg 1) (append-instructions - ins-push vector-elems-offset - ins-add - ins-load) + ins-push asm-vector-elems-offset + ins-add + ins-load) (assembly-funret)) (begin @@ -1418,35 +1595,18 @@ (assembly-funret)) )) - - -; compiler-run is the entry point into the compilation system. -; it reads a sexp, checks for EOF compiles the sexp, and then -; calls do-compile-task with a function that will call compiler-run -; again and then return the delayed work. If EOF is found, and END -; instruction is written and the delayed work finally gets evaluated. -(define compiler-run - (lambda () - (let ((sp (read-sexp))) - (if sp - (let ((r (compile-sexp sp top-level-env #t))) - (do-compile-task - (lambda () - (compiler-run) - r))) - (begin - (append-instruction ins-end)))))) -; Into the reader. -; -; The reader is pretty simple. -; read-sexp calls either read-atom or read-list -; read-atom reads an atom and returns a pair with the atom read, -; and the next character of the input stream. -; -; read-list reads sexps until the returned 'next-char' is a close peren, -; then returns the list read, and the next non-whitespace character. -; reader utility functions +;; +;; Section 9: The Reader +;; +;; The reader is pretty simple. +;; read-sexp calls either read-atom or read-list +;; read-atom reads an atom and returns a pair with the atom read, +;; and the next character of the input stream. +;; +;; read-list reads sexps until the returned 'next-char' is a close peren, +;; then returns the list read, and the next non-whitespace character. +;; reader utility functions (define is-space? (lambda (c) (if (eof-object? c) #t (if (char=? c #\space) #t @@ -1460,11 +1620,20 @@ (if (char=? c #\; ) #t (if (char=? c #\.) #t #f)))))))) +(define char-is-alpha? (lambda (c) + (or (and (char>=? c #\a) (char<=? c #\z)) + (and (char>=? c #\A) (char>=? c #\Z))))) + (define char-is-digit?(lambda (c) (if (char>=? c #\0) (char<=? c #\9) #f))) (define is-char-name? (lambda (s) (if (string=? s "newline") #t (if (string=? s "space") #t (if (string=? s "tab") #t #f))))) +(define char-is-asm-safe? (lambda (c) + (or (char-is-digit? c) + (char-is-alpha? c) + (char=? c #\_)))) + (define drop-chars-until (lambda (f) (let ((x (read-char))) (if (f x) x (drop-chars-until f))))) (define next-non-ws (lambda () (drop-chars-until (lambda (z) (not (is-space? z)))))) @@ -1620,7 +1789,31 @@ (define read-sexp (lambda () (parse-token (next-token)))) -; then run the compiler. +;; +;; Section 10: Main +;; + +;; Compiler-run is the entry point into the compilation system. +;; it reads a sexp, checks for EOF compiles the sexp, and then +;; calls do-compile-task with a function that will call compiler-run +;; again and then return the delayed work. If EOF is found, an END +;; instruction is written and the delayed work finally gets evaluated. +(define compiler-run + (lambda () + (let ((sp (read-sexp))) + (if sp + (let ((r (compile-sexp sp top-level-env #t #f))) + (do-compile-task + (lambda () + (compiler-run) + r))) + (begin + (append-instruction ins-end)))))) + +;; +;; Output preamble stuff, run the compiler, output the initial +;; environment +;; (append-instructions ins-push (asm-label-reference initial-env-label) ins-wtrr) (compiler-run) From 27b092cb0d90daba41e363a4fe2292cc5c3505b8 Mon Sep 17 00:00:00 2001 From: "J. Aaron Pendergrass" Date: Wed, 22 Nov 2023 16:01:37 -0500 Subject: [PATCH 31/31] Tweak makefile to use a SCHEME variable instead of hardcoding guile when building the bootstrap compiler. --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d04c0fb..6014b90 100644 --- a/Makefile +++ b/Makefile @@ -17,6 +17,7 @@ # CFLAGS=-g CFLAGS ?= -O3 +SCHEME=guile all : schemer.bytecode peephole.bytecode interpreter trace-interpreter safe-interpreter assembler @@ -51,7 +52,7 @@ schemer-bootstrap.bytecode: assembler schemer-bootstrap.asm ./assembler $@ schemer-bootstrap.asm : schemer.sch lib.sch - cat lib.sch schemer.sch | guile --use-srfi=13 schemer.sch > $@ + cat lib.sch schemer.sch | $(SCHEME) --use-srfi=13 schemer.sch > $@ assembler.yy.o : assembler.yy.c gcc ${CFLAGS} -c assembler.yy.c