diff --git a/README.md b/README.md index 4a1bf55..39b4e0b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ -# lzm -Lisp Z-Machine +# Lisp Z-Machine + +This is a simple Z-Machine implementation, built as a learning exercise. +It currently supports only v3 story files. + diff --git a/cpu.lisp b/cpu.lisp new file mode 100644 index 0000000..da618c2 --- /dev/null +++ b/cpu.lisp @@ -0,0 +1,45 @@ +(in-package #:lzm) + +(defvar *stack*) +(defvar *frame*) +(defvar *pc*) +(defvar *break*) + +(defun fetch-u8 () + (prog1 (read-u8 *pc*) + (incf *pc*))) + +(defun fetch-u16 () + (prog1 (read-u16 *pc*) + (incf *pc* 2))) + +(defun stack-push (value) + (vector-push-extend value *stack*)) + +(defun stack-pop () + (vector-pop *stack*)) + +(defun stack-top () + (aref *stack* (1- (length *stack*)))) + +(defun ret (val) + (setf (fill-pointer *stack*) *frame* + *frame* (stack-pop) + *pc* (stack-pop)) + (write-var (stack-pop) val)) + +(defun branch (condition) + (setf condition (not (not condition))) + (let* ((b (fetch-u8)) + (offset (ldb (byte 6 0) b))) + (unless (logbitp 6 b) + (let ((b2 (fetch-u8))) + (setf offset (logior (ash offset 8) b2)))) + (when (logbitp 13 offset) + ;; Handle negative branch + (decf offset #x4000)) + (when (eql condition (logbitp 7 b)) + (case offset + (0 (ret 0)) + (1 (ret 1)) + (otherwise (incf *pc* (- offset 2))))))) diff --git a/decode.lisp b/decode.lisp new file mode 100644 index 0000000..895cd8a --- /dev/null +++ b/decode.lisp @@ -0,0 +1,57 @@ +(in-package #:lzm) + +(defconstant +operand-type-word+ 0) +(defconstant +operand-type-byte+ 1) +(defconstant +operand-type-var+ 2) +(defconstant +operand-type-none+ 3) + +(defun read-operand (operand-type) + (ecase operand-type + (#.+operand-type-word+ (fetch-u16)) + (#.+operand-type-byte+ (fetch-u8)) + (#.+operand-type-var+ (read-var (fetch-u8))))) + +(defun decode-operand-types (b) + (loop for pos from 6 downto 0 by 2 + for type = (ldb (byte 2 pos) b) + until (= type +operand-type-none+) + collect type)) + +(defun opcode-type (b) + (cond + ((and (= b #xbe) (>= *version* 5)) :opcode-type-ext) + ((not (logbitp 7 b)) :opcode-type-long) + ((logbitp 6 b) :opcode-type-var) + (t :opcode-type-short))) + +(defun decode-instruction () + (let ((b (fetch-u8))) + (ecase (opcode-type b) + (:opcode-type-ext nil) + (:opcode-type-long + (list :2op + (logand b #b11111) + (if (logbitp 6 b) +operand-type-var+ +operand-type-byte+) + (if (logbitp 5 b) +operand-type-var+ +operand-type-byte+))) + (:opcode-type-var + (list* (if (logbitp 5 b) :var :2op) + (logand b #b11111) + (decode-operand-types (fetch-u8)))) + (:opcode-type-short + (let ((opcode (ldb (byte 4 0) b)) + (operand-type (ldb (byte 2 4) b))) + (if (= operand-type +operand-type-none+) + (list :0op opcode) + (list :1op opcode operand-type))))))) + +(defun next-instruction () + (let ((pc *pc*)) + (destructuring-bind (arity opcode &rest optypes) (decode-instruction) + (let ((operands (mapcar #'read-operand optypes))) + (if-let (handler (find-handler arity opcode)) + (progn + ;; (format t "PC $~x: ~a ~a ~%" pc handler operands) + (apply handler operands)) + (progn + (format t "No handler for ~a $~x at PC $~x" arity opcode pc) + (setf *break* t))))))) diff --git a/header.lisp b/header.lisp new file mode 100644 index 0000000..0cb6b89 --- /dev/null +++ b/header.lisp @@ -0,0 +1,30 @@ +(in-package #:lzm) + +(defvar *version*) +(defvar *high-memory-offset*) +(defvar *initial-pc*) +(defvar *dictionary-offset*) +(defvar *object-table-offset*) +(defvar *global-table-offset*) +(defvar *static-memory-offset*) +(defvar *abbrev-table-offset*) + +(defun read-header () + (setf *version* (read-u8 #x00) + *high-memory-offset* (read-u16 #x04) + *initial-pc* (read-u16 #x06) + *dictionary-offset* (read-u16 #x08) + *object-table-offset* (read-u16 #x0a) + *global-table-offset* (read-u16 #x0c) + *static-memory-offset* (read-u16 #x0e) + *abbrev-table-offset* (read-u16 #x18))) + +(defun print-header () + (format t "Version: ~d~%" *version*) + (format t "High memory offset: ~4,'0x~%" *high-memory-offset*) + (format t "Initial PC: ~4,'0x~%" *initial-pc*) + (format t "Dictionary offset: ~4,'0x~%" *dictionary-offset*) + (format t "Object table offset: ~4,'0x~%" *object-table-offset*) + (format t "Global table offset: ~4,'0x~%" *global-table-offset*) + (format t "Static memory offset: ~4,'0x~%" *static-memory-offset*) + (format t "Abbrev table offset: ~4,'0x~%" *abbrev-table-offset*)) diff --git a/lzm.asd b/lzm.asd new file mode 100644 index 0000000..cfba118 --- /dev/null +++ b/lzm.asd @@ -0,0 +1,16 @@ +(defsystem :lzm + :author "Nick Maher" + :description "Lisp Z-Machine" + :depends-on (:alexandria) + :components ((:file "packages") + (:file "mem") + (:file "header") + (:file "text") + (:file "var") + (:file "obj") + (:file "cpu") + (:file "rand") + (:file "tok") + (:file "op") + (:file "decode") + (:file "main"))) diff --git a/main.lisp b/main.lisp new file mode 100644 index 0000000..2c4f160 --- /dev/null +++ b/main.lisp @@ -0,0 +1,39 @@ +(in-package #:lzm) + +(defun read-data (filename) + (with-open-file (f filename :element-type '(unsigned-byte 8)) + (let ((data (make-array (file-length f) + :element-type '(unsigned-byte 8)))) + (read-sequence data f) + data))) + +(defun init (filename) + (setf *memory* (read-data filename)) + (read-header) + ;; TODO: Validate header + (init-dictionary) + (setf *pc* *initial-pc*) + (setf *frame* 0) + (setf *break* nil) + (setf *rand-seed* nil) + (setf *stack* (make-array 0 :fill-pointer 0 + :adjustable t + :element-type '(unsigned-byte 32)))) + +(defun run (filename) + (init filename) + (loop until *break* do + ;; (format t "PC: $~x~%" *pc*) + (next-instruction))) + +(defun run-hitch () (run "/Users/nickm/infocom/hitchhiker-r60-s861002.z3")) +(defun run-zork () (run "/Users/nickm/infocom/zork1-r88-s840726.z3")) +(defun run-czech () (run "/Users/nickm/infocom/czech_0_8/czech.z3")) + +(defun log-czech () + (with-open-file (*standard-output* "czech.log" + :direction :output + :if-does-not-exist :create + :if-exists :overwrite) + (run-czech))) + diff --git a/mem.lisp b/mem.lisp new file mode 100644 index 0000000..4936bc3 --- /dev/null +++ b/mem.lisp @@ -0,0 +1,29 @@ +(in-package #:lzm) + +(defvar *memory*) + +(defun read-u8 (addr) + (aref *memory* addr)) + +(defun read-u16 (addr) + (logior (ash (read-u8 addr) 8) (read-u8 (1+ addr)))) + +(defun read-word-addr (addr) + (* 2 (read-u16 addr))) + +(defun write-u8 (addr value) + (assert (< addr *static-memory-offset*)) + (setf (aref *memory* addr) value)) + +(defun write-u16 (addr value) + (assert (< (1+ addr) *static-memory-offset*)) + (setf (aref *memory* addr) (ash value -8) + (aref *memory* (1+ addr)) (logand value #xff))) + +(defun u16->s16 (n) + (if (logbitp 15 n) + (- n #x10000) + n)) + +(defun s16->u16 (n) + (logand #xffff n)) diff --git a/obj.lisp b/obj.lisp new file mode 100644 index 0000000..8f955c9 --- /dev/null +++ b/obj.lisp @@ -0,0 +1,128 @@ +(in-package #:lzm) + +(defun obj-property-default (property) + (read-u16 (+ *object-table-offset* (* (1- property) 2)))) + +(defun obj-address (obj) + (assert (plusp obj)) + (+ *object-table-offset* 62 (* (1- obj) 9))) + +(defun obj-attr-address (obj attr) + (multiple-value-bind (byte bit) (floor attr 8) + (values (+ (obj-address obj) byte) + (- 7 bit)))) + +(defun obj-attr (obj attr) + (multiple-value-bind (addr bit) (obj-attr-address obj attr) + (logbitp bit (read-u8 addr)))) + +(defun (setf obj-attr) (val obj attr) + (multiple-value-bind (addr bit) (obj-attr-address obj attr) + (write-u8 addr (dpb (if val 1 0) + (byte 1 bit) + (read-u8 addr))))) + +(defun obj-parent (obj) + (read-u8 (+ (obj-address obj) 4))) + +(defun (setf obj-parent) (val obj) + (write-u8 (+ (obj-address obj) 4) val)) + +(defun obj-sibling (obj) + (read-u8 (+ (obj-address obj) 5))) + +(defun (setf obj-sibling) (val obj) + (write-u8 (+ (obj-address obj) 5) val)) + +(defun obj-child (obj) + (read-u8 (+ (obj-address obj) 6))) + +(defun (setf obj-child) (val obj) + (write-u8 (+ (obj-address obj) 6) val)) + +(defun obj-prev-sibling (obj) + (let ((parent (obj-parent obj))) + (if (zerop parent) + 0 + (loop for s = (obj-child parent) then (obj-sibling s) + and prev = 0 then s + finally (return 0) + when (= s obj) do (return prev))))) + +(defun obj-name-addr (obj) + (read-u16 (+ (obj-address obj) 7))) + +(defun obj-name-byte-len (obj) + (* 2 (read-u8 (obj-name-addr obj)))) + +(defun obj-name (obj) + (decode-text (+ 1 (obj-name-addr obj)))) + +(defun obj-properties-addr (obj) + (+ (obj-name-addr obj) (obj-name-byte-len obj) 1)) + +;; Return a list of (prop, addr, size) tuples +(defun obj-property-list (obj) + (loop with addr = (obj-properties-addr obj) + for size-byte = (read-u8 addr) + for prop = (logand size-byte #b11111) + for size = (1+ (ash size-byte -5)) + until (zerop prop) + collect (list prop (1+ addr) size) + do (incf addr (1+ size)))) + +(defun obj-first-prop (obj) + (caar (obj-property-list obj))) + +(defun obj-next-prop (obj prop) + (let ((plist (obj-property-list obj))) + (when-let (i (position prop plist :key #'first)) + (first (nth (1+ i) plist))))) + +(defun obj-prop-addr (obj prop) + (when-let (p (find prop (obj-property-list obj) :key #'first)) + (second p))) + +(defun obj-prop-len (prop-addr) + (if (zerop prop-addr) + 0 + (let ((b (read-u8 (1- prop-addr)))) + (1+ (ash b -5))))) + +(defun obj-prop (obj prop) + (if-let (p (find prop (obj-property-list obj) :key #'first)) + (destructuring-bind (prop addr size) p + (declare (ignore prop)) + (ecase size + (1 (read-u8 addr)) + (2 (read-u16 addr)) + (t (error "invalid property size")))) + (obj-property-default prop))) + +(defun (setf obj-prop) (val obj prop) + (let ((p (find prop (obj-property-list obj) :key #'first))) + (unless p (error "put to invalid property")) + (destructuring-bind (prop addr size) p + (declare (ignore prop)) + (ecase size + (1 (write-u8 addr val)) + (2 (write-u16 addr val)) + (t (error "invalid property size")))))) + +(defun obj-remove (obj) + (let ((parent (obj-parent obj)) + (sibling (obj-sibling obj)) + (prev-sibling (obj-prev-sibling obj))) + (when (plusp parent) + (if (zerop prev-sibling) + (setf (obj-child parent) sibling) + (setf (obj-sibling prev-sibling) sibling)) + (setf (obj-sibling obj) 0) + (setf (obj-parent obj) 0)))) + +(defun obj-insert (obj dst) + (obj-remove obj) + ;; (format t "Moving ~a to ~a~%" (obj-name obj) (obj-name dst)) + (setf (obj-sibling obj) (obj-child dst)) + (setf (obj-child dst) obj) + (setf (obj-parent obj) dst)) diff --git a/op.lisp b/op.lisp new file mode 100644 index 0000000..152eb7a --- /dev/null +++ b/op.lisp @@ -0,0 +1,262 @@ +(in-package #:lzm) + +(defparameter *op-table* (make-array 128 :initial-element nil)) + +(eval-when (:compile-toplevel :load-toplevel) + (defun op-table-index (arity opcode) + (+ opcode (ecase arity (:0op 0) (:1op 32) (:2op 64) (:var 96))))) + +(defun find-handler (arity opcode) + (aref *op-table* (op-table-index arity opcode))) + +(defmacro def-op ((arity opcode name &key store) args &body body) + (let ((i (op-table-index arity opcode)) + (sym (intern (concatenate 'string "OP-" (string-upcase name))))) + (when store + (setf body `((write-var (fetch-u8) (progn ,@body))))) + `(progn + (defun ,sym ,args ,@body) + (setf (aref *op-table* ,i) (function ,sym))))) + +;;;;------------------------------------------------------------------------ +;;;; 2OP opcodes +;;;;------------------------------------------------------------------------ + +(def-op (:2op #x01 "je") (a &rest args) + (branch (member a args))) + +(def-op (:2op #x02 "jl") (a b) + (branch (< (u16->s16 a) (u16->s16 b)))) + +(def-op (:2op #x03 "jg") (a b) + (branch (> (u16->s16 a) (u16->s16 b)))) + +(def-op (:2op #x04 "dec-chk") (var limit) + (let ((val (1- (read-signed-var var)))) + (write-var var val) + (branch (< val (u16->s16 limit))))) + +(def-op (:2op #x05 "inc-chk") (var limit) + (let ((val (1+ (read-signed-var var)))) + (write-var var val) + (branch (> val (u16->s16 limit))))) + +(def-op (:2op #x06 "jin") (a b) + (branch (= (obj-parent a) b))) + +(def-op (:2op #x07 "test") (bitmap flags) + (branch (= (logand bitmap flags) flags))) + +(def-op (:2op #x08 "or" :store t) (a b) + (logior a b)) + +(def-op (:2op #x09 "and" :store t) (a b) + (logand a b)) + +(def-op (:2op #x0a "test-attr") (obj attr) + (branch (obj-attr obj attr))) + +(def-op (:2op #x0b "set-attr") (obj attr) + (setf (obj-attr obj attr) t)) + +(def-op (:2op #x0c "clear-attr") (obj attr) + (setf (obj-attr obj attr) nil)) + +(def-op (:2op #x0d "store") (variable value) + (when (zerop variable) + (stack-pop)) ; 6.3.4 + (write-var variable value)) + +(def-op (:2op #x0e "insert-obj") (obj dst) + (obj-insert obj dst)) + +(def-op (:2op #x0f "loadw" :store t) (array word-index) + (read-u16 (+ array (* 2 word-index)))) + +(def-op (:2op #x10 "loadb" :store t) (array byte-index) + (read-u8 (+ array byte-index))) + +(def-op (:2op #x11 "get-prop" :store t) (obj prop) + (obj-prop obj prop)) + +(def-op (:2op #x12 "get-prop-addr" :store t) (obj prop) + (or (obj-prop-addr obj prop) 0)) + +(def-op (:2op #x13 "get-next-prop" :store t) (obj prop) + (or (if (zerop prop) + (obj-first-prop obj) + (obj-next-prop obj prop)) + 0)) + +(def-op (:2op #x14 "add" :store t) (a b) + (+ a b)) + +(def-op (:2op #x15 "sub" :store t) (a b) + (- a b)) + +(def-op (:2op #x16 "mul" :store t) (a b) + (* a b)) + +(def-op (:2op #x17 "div" :store t) (a b) + (truncate (u16->s16 a) (u16->s16 b))) + +(def-op (:2op #x18 "mod" :store t) (a b) + (rem (u16->s16 a) (u16->s16 b))) + +;;;;------------------------------------------------------------------------ +;;;; 1OP opcodes +;;;;------------------------------------------------------------------------ + +(def-op (:1op #x00 "jz") (a) + (branch (zerop a))) + +(def-op (:1op #x01 "get-sibling" :store t) (obj) + (let ((sibling (obj-sibling obj))) + (branch (plusp sibling)) + sibling)) + +(def-op (:1op #x02 "get-child" :store t) (obj) + (let ((child (obj-child obj))) + (branch (plusp child)) + child)) + +(def-op (:1op #x03 "get-parent" :store t) (obj) + (obj-parent obj)) + +(def-op (:1op #x04 "get-prop-len" :store t) (prop-addr) + (obj-prop-len prop-addr)) + +(def-op (:1op #x05 "inc") (var) + (write-var var (1+ (read-signed-var var)))) + +(def-op (:1op #x06 "dec") (var) + (write-var var (1- (read-signed-var var)))) + +(def-op (:1op #x07 "print-addr") (addr) + (princ (decode-text addr))) + +(def-op (:1op #x09 "remove-obj") (obj) + (obj-remove obj)) + +(def-op (:1op #x0a "print-obj") (obj) + (princ (obj-name obj))) + +(def-op (:1op #x0b "ret") (val) + (ret val)) + +(def-op (:1op #x0c "jump") (offset) + (incf *pc* (- (u16->s16 offset) 2))) + +(def-op (:1op #x0d "print-paddr") (addr) + (princ (decode-text (* addr 2)))) + +(def-op (:1op #x0e "load" :store t) (var) + (when (zerop var) + (stack-push (stack-top))) ; 6.3.4 + (read-var var)) + +(def-op (:1op #x0f "not" :store t) (val) + (logxor val #xffff)) + +;;;;------------------------------------------------------------------------ +;;;; 0OP opcodes +;;;;------------------------------------------------------------------------ + +(def-op (:0op #x00 "true") () + (ret 1)) + +(def-op (:0op #x01 "false") () + (ret 0)) + +(def-op (:0op #x02 "print") () + (multiple-value-bind (s byte-len) (decode-text *pc*) + (princ s) + (incf *pc* byte-len))) + +(def-op (:0op #x03 "print-ret") () + (op-print) + (terpri) + (ret 1)) + +(def-op (:0op #x08 "ret-popped") () + (ret (stack-pop))) + +(def-op (:0op #x09 "pop") () + (stack-pop)) + +(def-op (:0op #x0b "new-line") () + (terpri)) + +(def-op (:0op #x0a "quit") () + (setf *break* t)) + +(def-op (:0op #x0d "verify") () + ;; Is there any point implementing this? + (branch t)) + +;;;;------------------------------------------------------------------------ +;;;; Variable opcodes +;;;;------------------------------------------------------------------------ + +(def-op (:var #x00 "call") (routine &rest args) + (let ((ret-var (fetch-u8))) + (if (zerop routine) + (write-var ret-var 0) + (progn + (stack-push ret-var) ;; Return variable + (stack-push *pc*) ;; Return address + (stack-push *frame*) ;; Frame pointer + (setf *frame* (fill-pointer *stack*) + *pc* (* routine 2)) + (let ((num-locals (fetch-u8))) + (dotimes (i num-locals) + (let* ((default (fetch-u16)) + (local (or (nth i args) default))) + (stack-push local)))))))) + +(def-op (:var #x01 "storew") (array word-index value) + (write-u16 (+ array (* 2 word-index)) value)) + +(def-op (:var #x02 "storeb") (array byte-index value) + (write-u8 (+ array byte-index) value)) + +(def-op (:var #x03 "put-prop") (obj prop val) + (setf (obj-prop obj prop) val)) + +(def-op (:var #x04 "read") (text-buf parse-buf) + (let* ((max-chars (1- (read-u8 text-buf))) + (max-tokens (read-u8 parse-buf)) + (text (truncate-seq (string-downcase (read-line)) max-chars))) + (loop for c across text + for dst = (1+ text-buf) then (1+ dst) + do (write-u8 dst (char-code c))) + (write-u8 (+ text-buf (length text) 1) 0) + (let ((tokens (truncate-seq (tokenise text) max-tokens))) + (write-u8 (1+ parse-buf) (length tokens)) + (loop for (addr len offset) in tokens + for dst = (+ parse-buf 2) then (+ dst 4) do + (write-u16 dst addr) + (write-u8 (+ dst 2) len) + (write-u8 (+ dst 3) (1+ offset)))))) + +(def-op (:var #x05 "print-char") (n) + (princ (code-char n))) + +(def-op (:var #x06 "print-num") (n) + (princ (u16->s16 n))) + +(def-op (:var #x07 "random" :store t) (n) + (setf n (u16->s16 n)) + (if (plusp n) + (rand-next n) + (rand-seed (- n)))) + +(def-op (:var #x08 "push") (n) + (stack-push n)) + +(def-op (:var #x09 "pull") (dst) + (let ((val (stack-pop))) + (when (zerop dst) + (stack-pop)) ; 6.3.4 + (write-var dst val))) + diff --git a/packages.lisp b/packages.lisp new file mode 100644 index 0000000..d70dcdf --- /dev/null +++ b/packages.lisp @@ -0,0 +1,3 @@ + (defpackage #:lzm + (:use #:cl #:alexandria) + (:export #:run)) diff --git a/rand.lisp b/rand.lisp new file mode 100644 index 0000000..c70651f --- /dev/null +++ b/rand.lisp @@ -0,0 +1,23 @@ +(in-package #:lzm) + +(defvar *rand-seed* nil) +(defvar *rand-next* 0) + +(defun rand-seed (n) + (if (zerop n) + (setf *rand-seed* nil *rand-next* 0) + (setf *rand-seed* n *rand-next* 0))) + +(defun rand-next-predictable () + (assert *rand-seed*) + (prog1 (mod *rand-next* *rand-seed*) + (incf *rand-next*))) + +(defun rand-next-random () + (get-internal-real-time)) + +(defun rand-next (n) + (let ((raw (if *rand-seed* + (rand-next-predictable) + (rand-next-random)))) + (1+ (mod raw n)))) diff --git a/text.lisp b/text.lisp new file mode 100644 index 0000000..41c95d9 --- /dev/null +++ b/text.lisp @@ -0,0 +1,54 @@ +(in-package #:lzm) + +(defparameter *A0* "abcdefghijklmnopqrstuvwxyz") +(defparameter *A1* "ABCDEFGHIJKLMNOPQRSTUVWXYZ") +(defparameter *A2* + #(#\Space #\Newline #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 + #\. #\, #\! #\? #\_ #\# #\' #\" #\/ #\\ #\- #\: #\( #\) )) + +(defun abbrev-addr (table-index i) + (read-word-addr (+ *abbrev-table-offset* (* table-index 64) (* i 2)))) + +(defun collect-chars (addr) + (loop for p = addr then (+ 2 p) + for w = (read-u16 p) + collect (ldb (byte 5 10) w) into cs + collect (ldb (byte 5 5) w) into cs + collect (ldb (byte 5 0) w) into cs + until (logbitp 15 w) + finally (return (values cs (+ 2 (- p addr)))))) + +(defun decode-chars (chars) + (with-output-to-string (s) + (loop with charset = *A0* + with abbrev = nil + with esc = nil + with esc-code = 0 + for c in chars do + (cond + ((eql esc 0) + (setf esc-code (ash c 5)) + (incf esc)) + ((eql esc 1) + (setf esc-code (logior esc-code c)) + (princ (code-char esc-code) s) + (setf esc nil)) + (abbrev + (princ (decode-text (abbrev-addr abbrev c)) s) + (setf abbrev nil)) + ((= c 0) (princ " " s)) + ((= c 1) (setf abbrev 0)) + ((= c 2) (setf abbrev 1)) + ((= c 3) (setf abbrev 2)) + ((= c 4) (setf charset *A1*)) + ((= c 5) (setf charset *A2*)) + ((and (= c 6) (eql charset *A2*)) + (setf esc 0 charset *A0*)) + (t + (princ (elt charset (- c 6)) s) + (setf charset *A0*)))))) + +(defun decode-text (addr) + (multiple-value-bind (chars byte-len) (collect-chars addr) + (let ((s (decode-chars chars))) + (values s byte-len)))) diff --git a/tok.lisp b/tok.lisp new file mode 100644 index 0000000..9e16529 --- /dev/null +++ b/tok.lisp @@ -0,0 +1,60 @@ +(in-package #:lzm) + +(defvar *dict-separators*) +(defvar *dict-words*) +(defvar *dict-word-length*) + +(defun read-separators () + (let ((num-separators (fetch-u8))) + (loop repeat num-separators + collect (code-char (fetch-u8))))) + +(defun read-words () + (let ((entry-length (fetch-u8)) + (num-entries (fetch-u16))) + (loop repeat num-entries + collect (cons (decode-text *pc*) *pc*) + do (incf *pc* entry-length)))) + +(defun init-dictionary () + (let ((*pc* *dictionary-offset*)) + (setf *dict-separators* (read-separators)) + (setf *dict-words* (read-words)) + (setf *dict-word-length* 6) ;; This varies by version + nil)) + +(defun truncate-seq (s max-length) + (if (> (length s) max-length) + (subseq s 0 max-length) + s)) + +(defun find-dict-word (word) + (let ((key (truncate-seq word *dict-word-length*))) + (if-let (dict-entry (assoc key *dict-words* :test #'equal)) + (cdr dict-entry) + 0))) + +(defun is-split-point (a b) + "Split points are before/after spaces, and before/after separator chars" + (or (char= a #\Space) + (char= b #\space) + (member a *dict-separators*) + (member b *dict-separators*))) + +(defun split-text (text) + "Split text into a list of words at split points, preserving whitespace" + (loop for c across text and prev = c + for i = 0 then (1+ i) + with prev-split = 0 + with words = nil + do (when (and prev (is-split-point c prev)) + (push (subseq text prev-split i) words) + (setf prev-split i)) + finally (return (nreverse (cons (subseq text prev-split) words))))) + +(defun tokenise (text) + "Tokenise text into a list of (dict-address, length, offset) tuples" + (loop for word in (split-text text) + and offset = 0 then (+ offset (length word)) + unless (string= word " ") + collect (list (find-dict-word word) (length word) offset))) diff --git a/var.lisp b/var.lisp new file mode 100644 index 0000000..94a02e2 --- /dev/null +++ b/var.lisp @@ -0,0 +1,29 @@ +(in-package #:lzm) + +(defun local (i) + (aref *stack* (+ *frame* i))) + +(defun (setf local) (val i) + (setf (aref *stack* (+ *frame* i)) val)) + +(defun global (i) + (read-u16 (+ *global-table-offset* (* i 2)))) + +(defun (setf global) (val i) + (write-u16 (+ *global-table-offset* (* i 2)) val)) + +(defun read-var (n) + (cond + ((zerop n) (stack-pop)) + ((< n #x10) (local (1- n))) + ((< n #x100) (global (- n #x10))))) + +(defun read-signed-var (n) + (u16->s16 (read-var n))) + +(defun write-var (n val) + (let ((uval (logand #xffff val))) + (cond + ((zerop n) (stack-push uval)) + ((< n #x10) (setf (local (1- n)) uval)) + ((< n #x100) (setf (global (- n #x10)) uval)))))