Skip to content

Commit

Permalink
Support bencode for nrepl server (#747)
Browse files Browse the repository at this point in the history
Part of #412
  • Loading branch information
chrisrink10 authored Dec 28, 2023
1 parent 29d4ba4 commit 8545148
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Added support for `bytes` literals using a `#b` prefix (#732)
* Added support for Python 3.12 (#734)
* Added a default reader conditional for the current platform (`windows`, `darwin`, `linux`, etc.) (#692)
* Added support for `bencode` binary encoding (part of #412)

### Changed
* Basilisp now supports PyTest 7.0+ (#660)
Expand Down
10 changes: 10 additions & 0 deletions docs/api/contrib/bencode.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
basilisp.contrib.bencode
========================

.. toctree::
:maxdepth: 2
:caption: Contents:

.. autonamespace:: basilisp.contrib.bencode
:members:
:undoc-members:
225 changes: 225 additions & 0 deletions src/basilisp/contrib/bencode.lpy
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
(ns basilisp.contrib.bencode
(:require
[basilisp.string :as str]))

(defprotocol BEncodeable
(to-bencode-encodeable* [this]
"Return an object which can be bencoded."))

(defmacro ^:private ->bytes
"Convert a value into a string and encode it as UTF-8 bytes."
[v]
`(.encode (python/str ~v) "utf-8"))

(extend-protocol BEncodeable
nil
(to-bencode-encodeable* [this]
#b "0:")
python/bytes
(to-bencode-encodeable* [this]
(.join (python/bytes) [(->bytes (python/len this)) #b ":" this]))
python/int
(to-bencode-encodeable* [this]
(.join (python/bytes) [#b "i" (->bytes this) #b "e"]))
python/str
(to-bencode-encodeable* [this]
(let [encoded (->bytes this)]
(.join (python/bytes) [(->bytes (python/len encoded)) #b ":" encoded]))))

(defn ^:private encode-kw-or-sym
[o]
(to-bencode-encodeable*
(if-let [ns-str (namespace o)]
(str ns-str "/" (name o))
(name o))))

(defn ^:private encode-sequential
[n]
(let [contents (.join (python/bytes) (map to-bencode-encodeable* n))]
(.join (python/bytes) [#b "l" contents #b "e"])))

(defn ^:private encode-dict
[d]
(as-> d $
(into []
(map (fn [[k v]]
[(cond
(string? k) (->bytes k)
(ident? k) (->bytes
(if-let [ns-str (namespace k)]
(str ns-str "/" (name k))
(name k)))
:else (throw
(ex-info
"bencode dictionary keys must be one of: string, keywod, or symbol"
{:type (type k)
:key k})))
(to-bencode-encodeable* v)]))
$)
(python/sorted $ ** :key first)
(into [#b "d"]
(mapcat (fn [[k v]]
[(to-bencode-encodeable* k) v]))
$)
(conj $ #b "e")
(.join (python/bytes) $)))

(extend basilisp.lang.keyword/Keyword BEncodeable {:to-bencode-encodeable* encode-kw-or-sym})
(extend basilisp.lang.symbol/Symbol BEncodeable {:to-bencode-encodeable* encode-kw-or-sym})

(extend python/dict BEncodeable {:to-bencode-encodeable* encode-dict})
(extend basilisp.lang.interfaces/IPersistentMap BEncodeable {:to-bencode-encodeable* encode-dict})

(extend python/list BEncodeable {:to-bencode-encodeable* encode-sequential})
(extend python/tuple BEncodeable {:to-bencode-encodeable* encode-sequential})
(extend basilisp.lang.interfaces/IPersistentList BEncodeable {:to-bencode-encodeable* encode-sequential})
(extend basilisp.lang.interfaces/IPersistentVector BEncodeable {:to-bencode-encodeable* encode-sequential})

(defn encode
"Encode the object ``d`` into a byte string using ``bencode`` encoding.
``encode`` supports encoding the following types:
- ``bytes``
- ``int``
- ``str``, which is first decided to UTF-8 ``bytes``
- keywords and symbols, which are first converted to strings (including namespace,
separated by '/') and then converted using the rules for ``str``s
- Python ``list``
- ``tuple``
- Basilisp lists and vectors
- ``dict``
- maps
Mapping type keys must one of: keywords, symbols, or strings.
This function does not support ``float`` because the ``bencode`` specification does
not support non-integer numerics.
Set types (including ``frozenset``, ``set``, or Basilisp's set types) are not
supported due to the requirement that lists retain their original element ordering."
[d]
(to-bencode-encodeable* d))


(defmacro ^:private index-of
"Return the index of the first occurrence of character ``c`` (a byte string) in
byte string ``b``."
[b c]
`(.index ~b ~c))

(defn- slice
"Returns the slice of the ``bytes`` from the ``start`` index to
the end of the array or to the ``end`` index if provided. Returns
`nil` if the slice is empty.
Throw a `python/EOFError` exception if any of the indices are out
of bounds."
([bytes start]
(if (< (len bytes) start)
(throw (python/ValueError "out of input"))
(slice bytes start nil)))
([bytes start end]
(if (and end (> end (len bytes)))
(throw (python/ValueError "out of input"))
(let [bs (get bytes (python/slice start end))]
(when (> (count bs) 0)
bs)))))

(declare decode*)

(defn ^:private decode-int
[data]
(let [data (slice data 1)
i (index-of data #b "e")]
[(int (slice data 0 i))
(slice data (inc i))]))

(defn ^:private decode-byte-string
[data {:keys [string-fn] :or {string-fn identity}}]
(let [i (index-of data #b ":")
n (int (slice data 0 i))
data (slice data (inc i))]
[(if (= n 0)
(string-fn #b "")
(string-fn (slice data 0 n)))
(when data
(slice data n))]))

(defn ^:private decode-list
[data opts]
(let [data (slice data 1)]
(loop [data data v (transient [])]
(if (= (slice data 0 1) #b "e")
[(persistent! v) (slice data 1)]
(let [[value data] (decode* data opts)]
(recur data (conj! v value)))))))

(defn ^:private decode-dict
[data {:keys [key-fn] :or {key-fn identity} :as opts}]
(let [data (slice data 1)]
(loop [data data m (transient {})]
(if (= (slice data 0 1) #b "e")
[(persistent! m) (slice data 1)]
(let [[k data] (decode-byte-string data {:string-fn key-fn})
[v data] (decode* data opts)]
(recur data (assoc! m k v)))))))

(defn ^:private decode*
[data opts]
(case (slice data 0 1)
#b "i" (decode-int data)
#b "l" (decode-list data opts)
#b "d" (decode-dict data opts)
;; byte string
(decode-byte-string data opts)))

(defn decode
"Decode the first value in the bencoded ``data`` bytes according to ``opts`` and
return a [decoded* rest*] vector.
The decoded* item in the vector is the decoded value of the first item in ``data``
while rest* is the remaining unencoded values.
If ``data`` cannot be decoded (e.g. is incomplete or an error occurred), it returns
a [nil ``data``] vector.
``opts`` is a map with the following optional supported keys.
:keyword ``:keywordize-keys``: if the decoded value is a map, keywordize its keys.
:keyword ``:key-fn``: a function which will be called for each key-in a map; cannot
be specified if ``:keywordize-keys`` is also specified
:keyword ``:string-fn``: a function which will be called for each byte string which
is not a map key; default is :lpy:fn:`basilisp.core/identity`"
[data {:keys [keywordize-keys key-fn string-fn] :as opts}]
(when (and keywordize-keys key-fn)
(throw (ex-info "Can only specify either :keywordize-keys or :key-fn; not both"
{:keywordize-keys keywordize-keys
:key-fn key-fn})))
(let [opts (cond-> opts
keywordize-keys (-> (dissoc :keywordize-keys)
(assoc :key-fn #(keyword (.decode % "utf-8")))))]
(try
(decode* data opts)
(catch python/Exception e
[nil data]))))

(defn decode-all
"Decode all values in the bencoded ``data`` bytes and return them as
a [values* incomplete*] vector.
The values* item is a collection of the ``data`` decoded values,
while incomplete* is the rest of the ``data`` bytes that could not
be decoded or nil.
``opts`` is a map supporting the same keys as :lpy:fn:`decode`."
([data]
(decode-all data {}))
([data opts]
(loop [items []
data data]
(let [[item data] (decode data opts)]
(if (nil? item)
[items data]
(recur (conj items item) data))))))

1 change: 1 addition & 0 deletions src/basilisp/lang/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@ def get(m, k, default=None): # pylint: disable=unused-argument
return default


@get.register(bytes)
@get.register(dict)
@get.register(list)
@get.register(str)
Expand Down
Empty file.
80 changes: 80 additions & 0 deletions tests/basilisp/contrib/bencode_test.lpy
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
(ns tests.basilisp.contrib.bencode-test
(:require
[basilisp.contrib.bencode :as bc]
[basilisp.test :refer [deftest are is testing]]))

(deftest bencode-encode
(testing "bencode supported types"
(are [v s] (= (.encode s "utf-8") (bc/encode v))
nil "0:"
"" "0:"
-0 "i0e"
0 "i0e"
42 "i42e"
-42 "i-42e"
"spam" "4:spam"
:keyword "7:keyword"
:ns/kw "5:ns/kw"
:other.ns/kw "11:other.ns/kw"
'sym "3:sym"
'ns/sym "6:ns/sym"
'other.ns/sym "12:other.ns/sym"
[] "le"
["spam" 42] "l4:spami42ee"
'("spam" 42) "l4:spami42ee"
{} "de"
{"bar" "spam" :foo 42 :efg/xyz 128} "d3:bar4:spam7:efg/xyzi128e3:fooi42ee"
["spam" 42 {"bar" "spam" :foo 42}] "l4:spami42ed3:bar4:spam3:fooi42eee"))

(testing "bencode unsupported types"
(are [v] (thrown? basilisp.lang.exception/ExceptionInfo (bc/encode v))
-3.14
0.3
#{25}
(seq [25 26])
{45 "some map"}
{["vec key"] "something bad"})))

(deftest bencode-decode
(testing "basic"
(are [s v] (= [v nil] (bc/decode (python/bytes s "utf-8") {}))
"0:" #b ""
"le" []
"de" {}
"i0e" 0
"i-1e" -1
"i1e" 1
"i42e" 42
"i-42e" -42
"4:spam" #b "spam"
"l4:spami42ee" [#b "spam" 42]
"d3:bar4:spam3:fooi42ee" {#b "bar" #b "spam" #b "foo" 42}))

(testing "encoded string"
(are [s v] (= [v nil] (bc/decode (python/bytes s "utf-8") {:string-fn #(.decode % "utf-8")}))
"0:" ""
"le" []
"de" {}
"i0e" 0
"i-1e" -1
"i1e" 1
"i42e" 42
"i-42e" -42
"4:spam" "spam"
"l4:spami42ee" ["spam" 42]
"d3:bar4:spam3:fooi42ee" {#b "bar" "spam" #b "foo" 42}))

(testing "keywordized"
(are [s v] (= v (bc/decode (python/bytes s "utf-8") {:keywordize-keys true}))
"d3:bar4:spam3:fooi42ee" [{:bar #b "spam" :foo 42} nil]
"d3:bard1:xi-42eee" [{:bar {:x -42}} nil]))

(testing "multiple"
(is (= [42 #b "4:spam"] (bc/decode #b "i42e4:spam" {})))
(is (= [[{#b "bar" #b "spam"} 42] nil] (bc/decode-all #b "d3:bar4:spamei42e")))
(is (= [[{:bar #b "spam"} 42] nil] (bc/decode-all #b "d3:bar4:spamei42e" {:keywordize-keys true}))))

(testing "incomplete"
(is (= [nil #b "i42"] (bc/decode #b "i42" {})))
(is (= [nil #b "d3:bar4:spam3:fooi42"] (bc/decode #b "d3:bar4:spam3:fooi42" {})))
(is (= [[{#b "bar" #b "spam"} 42] #b "i0"] (bc/decode-all #b "d3:bar4:spamei42ei0")))))

0 comments on commit 8545148

Please sign in to comment.