-
-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
29d4ba4
commit a2c0cb4
Showing
5 changed files
with
299 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
basilisp.contrib.bencode | ||
======================== | ||
|
||
.. toctree:: | ||
:maxdepth: 2 | ||
:caption: Contents: | ||
|
||
.. autonamespace:: basilisp.contrib.bencode | ||
:members: | ||
:undoc-members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
(ns basilisp.contrib.bencode | ||
(:require | ||
[basilisp.string :as str])) | ||
|
||
(defprotocol BEncodeable | ||
(to-bencode-encodeable* [this] | ||
"Return an object which can be bencoded.")) | ||
|
||
(defmacro ^:private ->bytes | ||
"Convert a value into a string and encode it as UTF-8 bytes." | ||
[v] | ||
`(.encode (python/str ~v) "utf-8")) | ||
|
||
(extend-protocol BEncodeable | ||
nil | ||
(to-bencode-encodeable* [this] | ||
#b "0:") | ||
python/bytes | ||
(to-bencode-encodeable* [this] | ||
(.join (python/bytes) [(->bytes (python/len this)) #b ":" this])) | ||
python/int | ||
(to-bencode-encodeable* [this] | ||
(.join (python/bytes) [#b "i" (->bytes this) #b "e"])) | ||
python/str | ||
(to-bencode-encodeable* [this] | ||
(let [encoded (->bytes this)] | ||
(.join (python/bytes) [(->bytes (python/len encoded)) #b ":" encoded])))) | ||
|
||
(defn ^:private encode-kw-or-sym | ||
[o] | ||
(to-bencode-encodeable* | ||
(if-let [ns-str (namespace o)] | ||
(str ns-str "/" (name o)) | ||
(name o)))) | ||
|
||
(defn ^:private encode-sequential | ||
[n] | ||
(let [contents (.join (python/bytes) (map to-bencode-encodeable* n))] | ||
(.join (python/bytes) [#b "l" contents #b "e"]))) | ||
|
||
(defn ^:private encode-dict | ||
[d] | ||
(as-> d $ | ||
(into [] | ||
(map (fn [[k v]] | ||
[(cond | ||
(string? k) (->bytes k) | ||
(ident? k) (->bytes | ||
(if-let [ns-str (namespace k)] | ||
(str ns-str "/" (name k)) | ||
(name k))) | ||
:else (throw | ||
(ex-info | ||
"bencode dictionary keys must be one of: string, keywod, or symbol" | ||
{:type (type k) | ||
:key k}))) | ||
(to-bencode-encodeable* v)])) | ||
$) | ||
(python/sorted $ ** :key first) | ||
(into [#b "d"] | ||
(mapcat (fn [[k v]] | ||
[(to-bencode-encodeable* k) v])) | ||
$) | ||
(conj $ #b "e") | ||
(.join (python/bytes) $))) | ||
|
||
(extend basilisp.lang.keyword/Keyword BEncodeable {:to-bencode-encodeable* encode-kw-or-sym}) | ||
(extend basilisp.lang.symbol/Symbol BEncodeable {:to-bencode-encodeable* encode-kw-or-sym}) | ||
|
||
(extend python/dict BEncodeable {:to-bencode-encodeable* encode-dict}) | ||
(extend basilisp.lang.interfaces/IPersistentMap BEncodeable {:to-bencode-encodeable* encode-dict}) | ||
|
||
(extend python/list BEncodeable {:to-bencode-encodeable* encode-sequential}) | ||
(extend python/tuple BEncodeable {:to-bencode-encodeable* encode-sequential}) | ||
(extend basilisp.lang.interfaces/IPersistentList BEncodeable {:to-bencode-encodeable* encode-sequential}) | ||
(extend basilisp.lang.interfaces/IPersistentVector BEncodeable {:to-bencode-encodeable* encode-sequential}) | ||
|
||
(defn encode | ||
"Encode the object ``d`` into a byte string using ``bencode`` encoding. | ||
``encode`` supports encoding the following types: | ||
- ``bytes`` | ||
- ``int`` | ||
- ``str``, which is first decided to UTF-8 ``bytes`` | ||
- keywords and symbols, which are first converted to strings (including namespace, | ||
separated by '/') and then converted using the rules for ``str``s | ||
- Python ``list`` | ||
- ``tuple`` | ||
- Basilisp lists and vectors | ||
- ``dict`` | ||
- maps | ||
Mapping type keys must one of: keywords, symbols, or strings. | ||
This function does not support ``float`` because the ``bencode`` specification does | ||
not support non-integer numerics. | ||
Set types (including ``frozenset``, ``set``, or Basilisp's set types) are not | ||
supported due to the requirement that lists retain their original element ordering." | ||
[d] | ||
(to-bencode-encodeable* d)) | ||
|
||
|
||
(defmacro ^:private index-of | ||
"Return the index of the first occurrence of character ``c`` (a byte string) in | ||
byte string ``b``." | ||
[b c] | ||
`(.index ~b ~c)) | ||
|
||
(defn- slice | ||
"Returns the slice of the ``bytes`` from the ``start`` index to | ||
the end of the array or to the ``end`` index if provided. Returns | ||
`nil` if the slice is empty. | ||
Throw a `python/EOFError` exception if any of the indices are out | ||
of bounds." | ||
([bytes start] | ||
(if (< (len bytes) start) | ||
(throw (python/ValueError "out of input")) | ||
(slice bytes start nil))) | ||
([bytes start end] | ||
(if (and end (> end (len bytes))) | ||
(throw (python/ValueError "out of input")) | ||
(let [bs (get bytes (python/slice start end))] | ||
(when (> (count bs) 0) | ||
bs))))) | ||
|
||
(defn ^:private decode-recur | ||
[data {:keys [key-fn string-fn] :or {key-fn identity string-fn identity} :as opts}] | ||
(case (slice data 0 1) | ||
#b "i" (let [data (slice data 1) | ||
i (index-of data #b "e")] | ||
[(int (slice data 0 i)) | ||
(slice data (inc i))]) | ||
#b "l" (let [data (slice data 1)] | ||
(loop [data data v (transient [])] | ||
(if (= (slice data 0 1) #b "e") | ||
[(persistent! v) (slice data 1)] | ||
(let [[value data] (decode-recur data opts)] | ||
(recur data (conj! v value)))))) | ||
#b "d" (let [data (slice data 1)] | ||
(loop [data data m (transient {})] | ||
(if (= (slice data 0 1) #b "e") | ||
[(persistent! m) (slice data 1)] | ||
(let [[k data] (decode-recur data opts) | ||
[v data] (decode-recur data opts) | ||
k (key-fn k)] | ||
(recur data (assoc! m k v)))))) | ||
;; byte string | ||
(let [i (index-of data #b ":") | ||
n (int (slice data 0 i)) | ||
data (slice data (inc i))] | ||
[(if (= n 0) | ||
(string-fn #b "") | ||
(string-fn (slice data 0 n))) | ||
(when data | ||
(slice data n))]))) | ||
|
||
(defn decode | ||
"Decode the first value in the bencoded ``data`` bytes according to ``opts`` and | ||
return a [decoded* rest*] vector. | ||
The decoded* item in the vector is the decoded value of the first item in ``data`` | ||
while rest* is the remaining unencoded values. | ||
If ``data`` cannot be decoded (e.g. is incomplete or an error occurred), it returns | ||
a [nil ``data``] vector. | ||
``opts`` is a map with the following optional supported keys. | ||
:keyword ``:keywordize-keys``: if the decoded value is a map, keywordize its keys. | ||
:keyword ``:key-fn``: a function which will be called for each key-in a map; cannot | ||
be specified if ``:keywordize-keys`` is also specified | ||
:keyword ``:string-fn``: a function which will be called for each byte string which | ||
is not a map key; default is :lpy:fn:`basilisp.core/identity`" | ||
[data {:keys [keywordize-keys key-fn] :as opts}] | ||
(when (and keywordize-keys key-fn) | ||
(throw (ex-info "Can only specify either :keywordize-keys or :key-fn; not both" | ||
{:keywordize-keys keywordize-keys | ||
:key-fn key-fn}))) | ||
(let [opts (cond-> opts | ||
keywordize-keys (-> (dissoc :keywordize-keys) | ||
(assoc :key-fn #(keyword (.decode % "utf-8")))))] | ||
(try | ||
(decode-recur data opts) | ||
(catch python/Exception e | ||
[nil data])))) | ||
|
||
(defn decode-all | ||
"Decode all values in the bencoded ``data`` bytes and return them as | ||
a [values* incomplete*] vector. | ||
The values* item is a collection of the ``data`` decoded values, | ||
while incomplete* is the rest of the ``data`` bytes that could not | ||
be decoded or nil. | ||
``opts`` is a map supporting the same keys as :lpy:fn:`decode`." | ||
([data] | ||
(decode-all data {})) | ||
([data opts] | ||
(loop [items [] | ||
data data] | ||
(let [[item data] (decode data opts)] | ||
(if (nil? item) | ||
[items data] | ||
(recur (conj items item) data)))))) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
(ns tests.basilisp.contrib.bencode-test | ||
(:require | ||
[basilisp.contrib.bencode :as bc] | ||
[basilisp.test :refer [deftest are is testing]])) | ||
|
||
(deftest bencode-encode | ||
(testing "bencode supported types" | ||
(are [v s] (= (.encode s "utf-8") (bc/encode v)) | ||
nil "0:" | ||
"" "0:" | ||
-0 "i0e" | ||
0 "i0e" | ||
42 "i42e" | ||
-42 "i-42e" | ||
"spam" "4:spam" | ||
:keyword "7:keyword" | ||
:ns/kw "5:ns/kw" | ||
:other.ns/kw "11:other.ns/kw" | ||
'sym "3:sym" | ||
'ns/sym "6:ns/sym" | ||
'other.ns/sym "12:other.ns/sym" | ||
[] "le" | ||
["spam" 42] "l4:spami42ee" | ||
'("spam" 42) "l4:spami42ee" | ||
{} "de" | ||
{"bar" "spam" :foo 42 :efg/xyz 128} "d3:bar4:spam7:efg/xyzi128e3:fooi42ee" | ||
["spam" 42 {"bar" "spam" :foo 42}] "l4:spami42ed3:bar4:spam3:fooi42eee")) | ||
|
||
(testing "bencode unsupported types" | ||
(are [v] (thrown? basilisp.lang.exception/ExceptionInfo (bc/encode v)) | ||
-3.14 | ||
0.3 | ||
#{25} | ||
(seq [25 26]) | ||
{45 "some map"} | ||
{["vec key"] "something bad"}))) | ||
|
||
(deftest bencode-decode | ||
(testing "basic" | ||
(are [s v] (= [v nil] (bc/decode (python/bytes s "utf-8") {})) | ||
"0:" #b "" | ||
"le" [] | ||
"de" {} | ||
"i0e" 0 | ||
"i-1e" -1 | ||
"i1e" 1 | ||
"i42e" 42 | ||
"i-42e" -42 | ||
"4:spam" #b "spam" | ||
"l4:spami42ee" [#b "spam" 42] | ||
"d3:bar4:spam3:fooi42ee" {#b "bar" #b "spam" #b "foo" 42})) | ||
|
||
(testing "encoded string" | ||
(are [s v] (= [v nil] (bc/decode (python/bytes s "utf-8") {:string-fn #(.decode % "utf-8")})) | ||
"0:" "" | ||
"le" [] | ||
"de" {} | ||
"i0e" 0 | ||
"i-1e" -1 | ||
"i1e" 1 | ||
"i42e" 42 | ||
"i-42e" -42 | ||
"4:spam" "spam" | ||
"l4:spami42ee" ["spam" 42] | ||
"d3:bar4:spam3:fooi42ee" {#b "bar" "spam" #b "foo" 42})) | ||
|
||
(testing "keywordized" | ||
(are [s v] (= v (bc/decode (python/bytes s "utf-8") {:keywordize-keys true})) | ||
"d3:bar4:spam3:fooi42ee" [{:bar #b "spam" :foo 42} nil] | ||
"d3:bard1:xi-42eee" [{:bar {:x -42}} nil])) | ||
|
||
(testing "multiple" | ||
(is (= [42 #b "4:spam"] (bc/decode #b "i42e4:spam" {}))) | ||
(is (= [[{#b "bar" #b "spam"} 42] nil] (bc/decode-all #b "d3:bar4:spamei42e"))) | ||
(is (= [[{:bar #b "spam"} 42] nil] (bc/decode-all #b "d3:bar4:spamei42e" {:keywordize-keys true})))) | ||
|
||
(testing "incomplete" | ||
(is (= [nil #b "i42"] (bc/decode #b "i42" {}))) | ||
(is (= [nil #b "d3:bar4:spam3:fooi42"] (bc/decode #b "d3:bar4:spam3:fooi42" {}))) | ||
(is (= [[{#b "bar" #b "spam"} 42] #b "i0"] (bc/decode-all #b "d3:bar4:spamei42ei0"))))) |