From add05c6f73819399cf51286efe81fe9b0785d7cc Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Fri, 12 Jan 2024 12:11:39 -0500 Subject: [PATCH] Add replicate and replicateByte to Data.Bytes.Chunks. Release 0.2.13.0 --- CHANGELOG.md | 4 ++++ byteslice.cabal | 2 +- src/Data/Bytes.hs | 18 ++------------- src/Data/Bytes/Chunks.hs | 48 +++++++++++++++++++++++++++++++++++++++- src/Data/Bytes/Pure.hs | 17 +++++++++++++- 5 files changed, 70 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 191cdb7..43c5890 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Revision history for byteslice +## 0.2.13.0 -- 2024-01-12 + +* Add `replicate` and `replicateByte` to `Data.Bytes.Chunks`. + ## 0.2.12.0 -- 2023-12-14 * Add `Data.Bytes.Chunks.cons`. diff --git a/byteslice.cabal b/byteslice.cabal index 8cd2643..7d0b9af 100644 --- a/byteslice.cabal +++ b/byteslice.cabal @@ -1,6 +1,6 @@ cabal-version: 2.4 name: byteslice -version: 0.2.12.0 +version: 0.2.13.0 synopsis: Slicing managed and unmanaged memory description: This library provides types that allow the user to talk about a slice of diff --git a/src/Data/Bytes.hs b/src/Data/Bytes.hs index b45862c..2ec822d 100644 --- a/src/Data/Bytes.hs +++ b/src/Data/Bytes.hs @@ -38,12 +38,12 @@ module Data.Bytes , singleton , doubleton , tripleton - , replicate + , Pure.replicate -- ** Unsliced , singletonU , doubletonU , tripletonU - , replicateU + , Pure.replicateU -- * Filtering , takeWhile , dropWhile @@ -314,20 +314,6 @@ tripletonU !a !b !c = runByteArrayST do PM.writeByteArray arr 2 c PM.unsafeFreezeByteArray arr --- | Replicate a byte @n@ times. -replicate :: - Int -- ^ Desired length @n@ - -> Word8 -- ^ Byte to replicate - -> Bytes -replicate !n !w = Bytes (replicateU n w) 0 n - --- | Variant of 'replicate' that returns a unsliced byte array. -replicateU :: Int -> Word8 -> ByteArray -replicateU !n !w = runByteArrayST do - arr <- PM.newByteArray n - PM.setByteArray arr 0 n w - PM.unsafeFreezeByteArray arr - -- | /O(n)/ Return the suffix of the second string if its prefix -- matches the entire first string. stripPrefix :: Bytes -> Bytes -> Maybe Bytes diff --git a/src/Data/Bytes/Chunks.hs b/src/Data/Bytes/Chunks.hs index 273d896..07c71e1 100644 --- a/src/Data/Bytes/Chunks.hs +++ b/src/Data/Bytes/Chunks.hs @@ -29,6 +29,8 @@ module Data.Bytes.Chunks , concatByteString , reverse , reverseOnto + , replicate + , replicateByte -- * Folds , foldl' -- * Splitting @@ -48,7 +50,7 @@ module Data.Bytes.Chunks , writeFile ) where -import Prelude hiding (Foldable(..),concat,reverse,readFile,writeFile) +import Prelude hiding (Foldable(..),concat,reverse,readFile,writeFile,replicate) import Control.Exception (IOException,catch) import Control.Monad.ST.Run (runIntByteArrayST) @@ -95,6 +97,50 @@ cons :: Bytes -> Chunks -> Chunks {-# inline cons #-} cons = ChunksCons +-- | Repeat the byte sequence over and over. Returns empty chunks when given +-- a negative repetition count. +replicate :: + Bytes + -> Int -- ^ Number of times to repeat the sequence. + -> Chunks +replicate !b@(Bytes _ _ len) !n + | n <= 0 = ChunksNil + | len == 0 = ChunksNil + | otherwise = go n ChunksNil + where + -- Implementation note: We do not have to reverse the chunks at the end. + go i !acc = case i of + 0 -> acc + _ -> go (i - 1) (ChunksCons b acc) + +-- | Repeat the byte over and over. This builds a single byte array that +-- is at most 64KiB and shares that across every @ChunksCons@ cell. +-- +-- An as example, creating a 2GiB chunks this way would use 64KiB for the +-- byte array, and there would be the additional overhead of the 2^15 +-- @ChunksCons@ data constructors. On a 64-bit platform, @ChunksCons@ +-- takes 40 bytes, so the total memory consumption would be +-- @2^16 + 40 * 2^15@, which is roughly 1.37MB. The same reasoning +-- shows that it takes about 83.95MB to represent a 128GiB chunks. +-- +-- The size of the shared payload is an implementation detail. Do not +-- rely on this function producing 64KiB chunks. The implementation might +-- one day change to something smarter that minimizes the memory footprint +-- for very large chunks. +replicateByte :: + Word8 + -> Int -- ^ Number of times to replicate the byte + -> Chunks +replicateByte !w !n + | n <= 0 = ChunksNil + | n < 65536 = ChunksCons (Bytes.replicate n w) ChunksNil + | otherwise = go (Bytes.replicateU 65536 w) n ChunksNil + where + go !shared !remaining !acc + | remaining == 0 = acc + | remaining < 65536 = ChunksCons (Bytes shared 0 remaining) acc + | otherwise = go shared (remaining - 65536) (ChunksCons (Bytes shared 0 65536) acc) + -- | Are there any bytes in the chunked byte sequences? null :: Chunks -> Bool null = go where diff --git a/src/Data/Bytes/Pure.hs b/src/Data/Bytes/Pure.hs index 9fc1cda..be8082c 100644 --- a/src/Data/Bytes/Pure.hs +++ b/src/Data/Bytes/Pure.hs @@ -40,9 +40,11 @@ module Data.Bytes.Pure , mapU , null , toShortByteString + , replicate + , replicateU ) where -import Prelude hiding (Foldable(..),map) +import Prelude hiding (Foldable(..),map,replicate) import Control.Monad.Primitive (PrimState,PrimMonad) import Control.Monad.ST.Run (runByteArrayST) @@ -346,3 +348,16 @@ toShortByteString :: Bytes -> ShortByteString toShortByteString !b = case toByteArray b of PM.ByteArray x -> SBS x +-- | Replicate a byte @n@ times. +replicate :: + Int -- ^ Desired length @n@ + -> Word8 -- ^ Byte to replicate + -> Bytes +replicate !n !w = Bytes (replicateU n w) 0 n + +-- | Variant of 'replicate' that returns a unsliced byte array. +replicateU :: Int -> Word8 -> ByteArray +replicateU !n !w = runByteArrayST do + arr <- PM.newByteArray n + PM.setByteArray arr 0 n w + PM.unsafeFreezeByteArray arr