Skip to content

Commit

Permalink
Upgrade bencode interface to op on bytes instead of utf-8 strs
Browse files Browse the repository at this point in the history
  • Loading branch information
ikappaki committed Dec 21, 2023
1 parent a2e5653 commit faf785f
Show file tree
Hide file tree
Showing 5 changed files with 88 additions and 74 deletions.
91 changes: 53 additions & 38 deletions src/basilisp/contrib/bencode.lpy
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,64 @@
https://github.com/babashka/nbb/blob/bca8b5017a06768eb35d02a2d6233ca9c6c2f692/src/nbb/impl/bencode.cljs")

(defn- index-of [s c]
(let [i (.index s c)]
(defn- index-of [ba c]
(let [i (.find ba c)]
(if (< i 0) (throw (python/EOFError "out of input")) i)))

(defn- slice
([buffer start]
(if (< (len buffer) start)
"Returns the slice of the ``bytes`` from the ``start`` index to
the end of the array or to the ``end`` index if provided. Returns
`nil` if the slice is empty.
Throw a `python/EOFError` exception if any of the indices are out
of bounds."
([bytes start]
(if (< (len bytes) start)
(throw (python/EOFError "out of input"))
(get buffer (python/slice start nil))))
([buffer start end]
(if (> end (len buffer))
(slice bytes start nil)))
([bytes start end]
(if (and end (> end (len bytes)))
(throw (python/EOFError "out of input"))
(get buffer (python/slice start end)))))
(let [bs (get bytes (python/slice start end))]
(when (> (count bs) 0)
bs)))))

;; some byte constants
(def ^{:private true} Be (python/bytes "e" "UTF-8"))
(def ^{:private true} Bcol (python/bytes ":" "UTF-8"))

(defn- decode-recur [data opts]
(case (str (slice data 0 1))
(case (.decode (slice data 0 1) "UTF-8")
"i"
(let [data (slice data 1)
i (index-of data "e")]
i (index-of data Be)]
[(int (slice data 0 i))
(slice data (inc i))])
"l"
(let [data (slice data 1)]
(loop [data data v (transient [])]
(if (= (str (slice data 0 1)) "e")
(if (= (slice data 0 1) Be)
[(persistent! v) (slice data 1)]
(let [[value data] (decode-recur data opts)]
(recur data (conj! v value))))))
"d"
(let [data (slice data 1)
{:keys [keywordize-keys]} opts]
(loop [data data m (transient {})]
(if (= (str (slice data 0 1)) "e")
(if (= (slice data 0 1) Be)
[(persistent! m) (slice data 1)]
(let [[k data] (decode-recur data opts)
[v data] (decode-recur data opts)
k (if keywordize-keys (keyword k) k)]
(recur data (assoc! m k v))))))
;; else
(let [i (index-of data ":")
(let [i (index-of data Bcol)
n (int (slice data 0 i))
data (slice data (inc i))]
[(str (slice data 0 n)) (slice data n)])))
[(if (= n 0) "" (.decode (slice data 0 n) "UTF-8")) (when data (slice data n))])))

(defn decode
"Decode the first value in the bencoded ``data`` string according to
"Decode the first value in the bencoded ``data`` bytes according to
``opts`` and return a [decoded* rest*] vector.
The decoded* item in the vector is the decoded value of the first
Expand All @@ -66,12 +78,12 @@
(catch python/Exception _e [nil data])))

(defn decode-all
"Decode all values in the bencoded ``data`` string and return them as
"Decode all values in the bencoded ``data`` bytes and return them as
a [values* incomplete*] vector.
The values* item is a collection of the ``data`` decoded values,
while incomplete* is the rest of the ``data`` string that could not
be decoded.
while incomplete* is the rest of the ``data`` bytes that could not
be decoded or nil.
``opts`` is a map supporting the same keys as lpy:fn:`decode`."
([data]
Expand All @@ -86,24 +98,7 @@
(defn- utf8-bytes [s]
(len (python/bytes s "UTF-8")))

(defn encode [data]
"Encode ``data`` to bencode encoding and return it as a string.
Can encode the following types
- strings.
- numbers.
- lists and vectors.
- maps, it will also encode any keyword keys to [ns/]name strings,
and will also order the entries in the output encoding
alphanumerically based on the keys.
- nil is encoded to the empty string.
Throws a `python/TypeError` if the ``data`` type is not supported."
(defn- encode-recur [data]
(cond
(string? data)
(str (utf8-bytes data) ":" data)
Expand All @@ -112,7 +107,7 @@
(nil? data)
"0:"
(or (vector? data) (list? data))
(str "l" (apply str (map encode data)) "e")
(str "l" (apply str (map encode-recur data)) "e")
(map? data)
(str "d" (->> data
(reduce-kv (fn [acc k v]
Expand All @@ -124,9 +119,29 @@
v)) {})
(sort-by first)
(map (fn [[k v]]
(str (encode k) (encode v))))
(str (encode-recur k) (encode-recur v))))
(apply str))
"e")

:else
(throw (python/TypeError (str [::unsupported :type (type data) :input (pr-str data)])))))

(defn encode [data]
"Encode ``data`` to bencode bytes encoding.
Can encode the following types
- strings.
- numbers.
- lists and vectors.
- maps, it will also encode any keyword keys to [ns/]name strings,
and will also order the entries in the output encoding
alphanumerically based on the keys.
- nil is encoded to the empty string.
Throws a `python/TypeError` if the ``data`` type is not supported."
(python/bytes (encode-recur data) "UTF-8"))
8 changes: 4 additions & 4 deletions src/basilisp/contrib/nrepl_server.lpy
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@
(fn [_request response]
(debug :sending (:id _request) :response-keys (keys response))
(try
(.sendall socket (python/bytes (bc/encode response) "UTF-8"))
(.sendall socket (bc/encode response))
(catch python/TypeError e
(error :bencode-cannot-decode (pr-str e))))))

Expand Down Expand Up @@ -386,11 +386,11 @@
(if (= data zero-bytes)
(do (info :socket-closing client-info)
(.close socket))
(let [data (.decode data "UTF-8")
(let [;;data (.decode data "UTF-8")
data (if-let [p @pending]
(let [s (str p data)]
(let [b (+ p data)]
(reset! pending nil)
s)
b)
data)
[requests unprocessed] (bc/decode-all data {:keywordize-keys true})]
(debug :requests requests)
Expand Down
1 change: 1 addition & 0 deletions src/basilisp/lang/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,7 @@ def get(m, k, default=None): # pylint: disable=unused-argument
@get.register(dict)
@get.register(list)
@get.register(str)
@get.register(bytes)
def _get_others(m, k, default=None):
try:
return m[k]
Expand Down
37 changes: 20 additions & 17 deletions tests/basilisp/contrib/bencode_test.lpy
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
[basilisp.contrib.bencode :as bc]
[basilisp.test :refer [deftest are is testing]]))

(defn b8 [s]
(python/bytes s "UTF-8"))

(deftest bencode-encode
(testing "bencode supported types"
(are [v s] (= s (bc/encode v))
(are [v s] (= (b8 s) (bc/encode v))
42 "i42e"
-42 "i-42e"
0.3 "i0.3e"
Expand All @@ -22,25 +25,25 @@

(deftest bencode-decode
(testing "basic"
(are [s v] (= v (bc/decode s {}))
"i42e" [42 ""]
"i-42e" [-42 ""]
"4:spam" ["spam" ""]
"0:" ["" ""]
"l4:spami42ee" [["spam" 42] ""]
"d3:bar4:spam3:fooi42ee" [{"bar" "spam" "foo" 42} ""]))
(are [s v] (= v (bc/decode (b8 s) {}))
"i42e" [42 nil]
"i-42e" [-42 nil]
"4:spam" ["spam" nil]
"0:" ["" nil]
"l4:spami42ee" [["spam" 42] nil]
"d3:bar4:spam3:fooi42ee" [{"bar" "spam" "foo" 42} nil]))

(testing "keywordized"
(are [s v] (= v (bc/decode s {:keywordize-keys true}))
"d3:bar4:spam3:fooi42ee" [{:bar "spam" :foo 42} ""]
"d3:bard1:xi-42eee" [{:bar {:x -42}} ""]))
(are [s v] (= v (bc/decode (b8 s) {:keywordize-keys true}))
"d3:bar4:spam3:fooi42ee" [{:bar "spam" :foo 42} nil]
"d3:bard1:xi-42eee" [{:bar {:x -42}} nil]))

(testing "multiple"
(is (= [42 "4:spam"] (bc/decode "i42e4:spam" {})))
(is (= [[{"bar" "spam"} 42] ""] (bc/decode-all "d3:bar4:spamei42e")))
(is (= [[{:bar "spam"} 42] ""] (bc/decode-all "d3:bar4:spamei42e" {:keywordize-keys true}))))
(is (= [42 (b8 "4:spam")] (bc/decode (b8 "i42e4:spam") {})))
(is (= [[{"bar" "spam"} 42] nil] (bc/decode-all (b8 "d3:bar4:spamei42e"))))
(is (= [[{:bar "spam"} 42] nil] (bc/decode-all (b8 "d3:bar4:spamei42e") {:keywordize-keys true}))))

(testing "incomplete"
(is (= [nil "i42"] (bc/decode "i42" {})))
(is (= [nil "d3:bar4:spam3:fooi42"] (bc/decode "d3:bar4:spam3:fooi42" {})))
(is (= [[{"bar" "spam"} 42] "i0"] (bc/decode-all "d3:bar4:spamei42ei0")))))
(is (= [nil (b8 "i42")] (bc/decode (b8 "i42") {})))
(is (= [nil (b8 "d3:bar4:spam3:fooi42")] (bc/decode (b8 "d3:bar4:spam3:fooi42") {})))
(is (= [[{"bar" "spam"} 42] (b8 "i0")] (bc/decode-all (b8 "d3:bar4:spamei42ei0"))))))
25 changes: 10 additions & 15 deletions tests/basilisp/contrib/nrepl_server_test.lpy
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
:sock The socket connection to the server."
`(with [sock# (socket/socket socket/AF_INET socket/SOCK_STREAM)]
(let [~client {:sock sock# :backlog* (atom {:items [] :fraction ""})}]
(let [~client {:sock sock# :backlog* (atom {:items [] :fraction nil})}]
(.connect sock# (python/tuple ["127.0.0.1" *nrepl-port*]))
;; the high time out value is for accommodating the slow
;; execution on pypy.
Expand All @@ -59,9 +59,8 @@
"Send ``value`` to the server the ``client`` is connected to."
[client value]
(let [{:keys [sock]} client
v (bc/encode value)
vb (python/bytes v "UTF-8")]
(.sendall sock vb)))
v (bc/encode value)]
(.sendall sock v)))

(defn client-recv!
"Receive and return nREPL response from the server the ``client`` is
Expand All @@ -72,8 +71,8 @@
(if-let [item (first items)]
(do (swap! backlog* update :items #(drop 1 %))
item)
(let [data (python/str (.recv sock 8192) "UTF-8")
data (str data fraction)
(let [data (.recv sock 8192)
data (if fraction (+ data fraction) data)
[items remaining :as response] (bc/decode-all data {:keywordize-keys true})
item (first items)]
(reset! backlog* {:items (drop 1 items) :fraction remaining})
Expand All @@ -85,20 +84,16 @@
(with [sock (socket/socket socket/AF_INET socket/SOCK_STREAM)]
(do
(.connect sock #py ("127.0.0.1" *nrepl-port*))
(let [bb (bc/encode {:id 1 :op "clone"})
bytes (python/bytes bb "UTF-8")]
(.sendall sock bytes)
(let [data (-> (.recv sock 1024)
(python/str "UTF-8"))
(let [encoded (bc/encode {:id 1 :op "clone"})]
(.sendall sock encoded)
(let [data (.recv sock 1024)
[{:keys [id new-session status] :as msg} _] (bc/decode data {:keywordize-keys true})]
(is (= id 1))
(is (uuid-like? new-session))
(is (= status ["done"]))
(.sendall sock (-> (bc/encode {:id 2 :op "close"})
(python/bytes "UTF-8")))
(is (= [{:id 2 :status ["done"]} ""]
(.sendall sock (bc/encode {:id 2 :op "close"}))
(is (= [{:id 2 :status ["done"]} nil]
(-> (.recv sock 1024)
(python/str "UTF-8")
(bc/decode {:keywordize-keys true}))))))))))

(testing "describe"
Expand Down

0 comments on commit faf785f

Please sign in to comment.