diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..f6c0b22 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +@byteverse/l3c diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..085bbaf --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,12 @@ +name: build +on: + pull_request: + branches: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build.yaml@main + secrets: inherit + with: + release: false diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..bd0bbd5 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,12 @@ +name: release +on: + push: + tags: + - "*" + +jobs: + call-workflow: + uses: byteverse/.github/.github/workflows/build.yaml@main + secrets: inherit + with: + release: true diff --git a/.gitignore b/.gitignore index 28d589b..cde1485 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.vscode/ dist dist-* cabal-dev diff --git a/CHANGELOG.md b/CHANGELOG.md index 43c5890..81abdca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Revision history for byteslice +## 0.2.13.1 -- 2024-02-01 + +* Update package metadata. + ## 0.2.13.0 -- 2024-01-12 * Add `replicate` and `replicateByte` to `Data.Bytes.Chunks`. @@ -64,7 +68,7 @@ ## 0.2.5.0 -- 2021-01-22 * Add `Data.Bytes.Chunks.concatByteString`. -* Expose `pinnedToByteString` to end users. +* Expose `pinnedToByteString` to end users. ## 0.2.4.0 -- 2020-10-15 diff --git a/Setup.hs b/Setup.hs deleted file mode 100644 index 9a994af..0000000 --- a/Setup.hs +++ /dev/null @@ -1,2 +0,0 @@ -import Distribution.Simple -main = defaultMain diff --git a/bench/Main.hs b/bench/Main.hs index 32a2f93..8dc3a15 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -1,4 +1,4 @@ -import Gauge.Main (defaultMain, bench, whnf, bgroup) +import Gauge.Main (bench, bgroup, defaultMain, whnf) import Data.Bytes.Types import Data.List (permutations) @@ -10,19 +10,21 @@ naiveMconcat :: [Bytes] -> Bytes naiveMconcat = foldr mappend mempty main :: IO () -main = defaultMain - [ bench "mconcat" $ whnf mconcat mconcatBytes - , bench "naiveMconcat" $ whnf naiveMconcat mconcatBytes - , bgroup "replace" - [ bench "the-dog-and-the-shadow" (whnf replaceMeat theDogAndTheShadow) +main = + defaultMain + [ bench "mconcat" $ whnf mconcat mconcatBytes + , bench "naiveMconcat" $ whnf naiveMconcat mconcatBytes + , bgroup + "replace" + [ bench "the-dog-and-the-shadow" (whnf replaceMeat theDogAndTheShadow) + ] ] - ] mconcatBytes :: [Bytes] -mconcatBytes = fmap Ascii.fromString $ permutations ['a'..'g'] +mconcatBytes = fmap Ascii.fromString $ permutations ['a' .. 'g'] replaceMeat :: Bytes -> Bytes -{-# noinline replaceMeat #-} +{-# NOINLINE replaceMeat #-} replaceMeat x = Bytes.replace meat gruel x meat :: Bytes @@ -32,14 +34,16 @@ gruel :: Bytes gruel = Ascii.fromString "gruel" theDogAndTheShadow :: Bytes -theDogAndTheShadow = Ascii.fromString $ concat - [ "It happened that a Dog had got a piece of meat and was " - , "carrying it home in his mouth to eat it in peace. " - , "Now on his way home he had to cross a plank lying across " - , "a running brook. As he crossed, he looked down and saw his " - , "own shadow reflected in the water beneath. Thinking it was " - , "another dog with another piece of meat, he made up his mind " - , "to have that also. So he made a snap at the shadow in the water, " - , "but as he opened his mouth the piece of meat fell out, dropped " - , "into the water and was never seen more." - ] +theDogAndTheShadow = + Ascii.fromString $ + concat + [ "It happened that a Dog had got a piece of meat and was " + , "carrying it home in his mouth to eat it in peace. " + , "Now on his way home he had to cross a plank lying across " + , "a running brook. As he crossed, he looked down and saw his " + , "own shadow reflected in the water beneath. Thinking it was " + , "another dog with another piece of meat, he made up his mind " + , "to have that also. So he made a snap at the shadow in the water, " + , "but as he opened his mouth the piece of meat fell out, dropped " + , "into the water and was never seen more." + ] diff --git a/byteslice.cabal b/byteslice.cabal index 7d0b9af..00fef70 100644 --- a/byteslice.cabal +++ b/byteslice.cabal @@ -1,25 +1,26 @@ -cabal-version: 2.4 -name: byteslice -version: 0.2.13.0 -synopsis: Slicing managed and unmanaged memory +cabal-version: 2.4 +name: byteslice +version: 0.2.13.1 +synopsis: Slicing managed and unmanaged memory description: This library provides types that allow the user to talk about a slice of a ByteArray or a MutableByteArray. It also offers UnmanagedBytes, which is kind of like a slice into unmanaged memory. However, it is just an address and a length. -homepage: https://github.com/andrewthad/byteslice -bug-reports: https://github.com/andrewthad/byteslice/issues -license: BSD-3-Clause -license-file: LICENSE -author: Andrew Martin -maintainer: andrew.thaddeus@gmail.com -copyright: 2020 Andrew Martin -category: Data -extra-source-files: CHANGELOG.md + +homepage: https://github.com/byteverse/byteslice +bug-reports: https://github.com/byteverse/byteslice/issues +license: BSD-3-Clause +license-file: LICENSE +author: Andrew Martin +maintainer: amartin@layer3com.com +copyright: 2020 Andrew Martin +category: Data +extra-doc-files: CHANGELOG.md flag avoid-rawmemchr - default: True - description: Avoid using rawmemchr which is non-portable GNU libc only + default: True + description: Avoid using rawmemchr which is non-portable GNU libc only library exposed-modules: @@ -35,70 +36,84 @@ library Data.Bytes.Text.Utf8 Data.Bytes.Text.Windows1252 Data.Bytes.Types + other-modules: - Data.Bytes.Search + Cstrlen Data.Bytes.Byte - Data.Bytes.Pure - Data.Bytes.IO Data.Bytes.Internal.Show + Data.Bytes.IO + Data.Bytes.Pure + Data.Bytes.Search Reps - Cstrlen + build-depends: - , base >=4.14 && <5 - , bytestring >=0.10.8 && <0.13 - , natural-arithmetic >=0.1.4 - , primitive >=0.7.4 && <0.10 - , primitive-addr >=0.1 && <0.2 - , primitive-unlifted >=0.1.2 && <2.2 - , run-st >=0.1.1 && <0.2 - , text >=1.2.5 - , text-short >=0.1.3 && <0.2 - , tuples >=0.1 && <0.2 - , vector >=0.12 && <0.14 - hs-source-dirs: src - ghc-options: -Wall -O2 - if impl(ghc>=9.2) + , base >=4.14 && <5 + , bytestring >=0.10.8 && <0.13 + , natural-arithmetic >=0.1.4 + , primitive >=0.7.4 && <0.10 + , primitive-addr >=0.1 && <0.2 + , primitive-unlifted >=0.1.2 && <2.2 + , run-st >=0.1.1 && <0.2 + , text >=1.2.5 + , text-short >=0.1.3 && <0.2 + , tuples >=0.1 && <0.2 + , vector >=0.12 && <0.14 + + hs-source-dirs: src + ghc-options: -Wall -O2 + + if impl(ghc >=9.2) hs-source-dirs: src-new-reps + else hs-source-dirs: src-old-reps - if impl(ghc>=9.0) + + if impl(ghc >=9.0) hs-source-dirs: src-ghc-cstrlen + else hs-source-dirs: src-no-ghc-cstrlen + default-language: Haskell2010 - include-dirs: include - includes: bs_custom.h + include-dirs: include + includes: bs_custom.h install-includes: bs_custom.h - c-sources: cbits/bs_custom.c + c-sources: cbits/bs_custom.c + if flag(avoid-rawmemchr) cc-options: -DAVOID_RAWMEMCHR=1 test-suite test default-language: Haskell2010 - type: exitcode-stdio-1.0 - hs-source-dirs: test - main-is: Main.hs - ghc-options: -Wall -O2 + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Main.hs + ghc-options: -Wall -O2 build-depends: - , base >=4.11.1 && <5 + , base >=4.11.1 && <5 , byteslice , bytestring , primitive - , quickcheck-classes >=0.6.4 + , quickcheck-classes >=0.6.4 , tasty , tasty-hunit , tasty-quickcheck + , text >=1.2 , transformers - , text >=1.2 benchmark bench - type: exitcode-stdio-1.0 + type: exitcode-stdio-1.0 build-depends: , base , byteslice , gauge , primitive - ghc-options: -Wall -O2 + + ghc-options: -Wall -O2 default-language: Haskell2010 - hs-source-dirs: bench - main-is: Main.hs + hs-source-dirs: bench + main-is: Main.hs + +source-repository head + type: git + location: git://github.com/byteverse/byteslice.git diff --git a/fourmolu.yaml b/fourmolu.yaml new file mode 100644 index 0000000..40cd005 --- /dev/null +++ b/fourmolu.yaml @@ -0,0 +1,51 @@ +# Number of spaces per indentation step +indentation: 2 + +# Max line length for automatic line breaking +column-limit: 200 + +# Styling of arrows in type signatures (choices: trailing, leading, or leading-args) +function-arrows: trailing + +# How to place commas in multi-line lists, records, etc. (choices: leading or trailing) +comma-style: leading + +# Styling of import/export lists (choices: leading, trailing, or diff-friendly) +import-export-style: leading + +# Whether to full-indent or half-indent 'where' bindings past the preceding body +indent-wheres: false + +# Whether to leave a space before an opening record brace +record-brace-space: true + +# Number of spaces between top-level declarations +newlines-between-decls: 1 + +# How to print Haddock comments (choices: single-line, multi-line, or multi-line-compact) +haddock-style: multi-line + +# How to print module docstring +haddock-style-module: null + +# Styling of let blocks (choices: auto, inline, newline, or mixed) +let-style: auto + +# How to align the 'in' keyword with respect to the 'let' keyword (choices: left-align, right-align, or no-space) +in-style: right-align + +# Whether to put parentheses around a single constraint (choices: auto, always, or never) +single-constraint-parens: always + +# Output Unicode syntax (choices: detect, always, or never) +unicode: never + +# Give the programmer more choice on where to insert blank lines +respectful: true + +# Fixity information for operators +fixities: [] + +# Module reexports Fourmolu should know about +reexports: [] + diff --git a/src-ghc-cstrlen/Cstrlen.hs b/src-ghc-cstrlen/Cstrlen.hs index 9494bf3..bd2cc2b 100644 --- a/src-ghc-cstrlen/Cstrlen.hs +++ b/src-ghc-cstrlen/Cstrlen.hs @@ -1,4 +1,4 @@ -{-# language MagicHash #-} +{-# LANGUAGE MagicHash #-} module Cstrlen ( cstringLength# diff --git a/src-new-reps/Reps.hs b/src-new-reps/Reps.hs index 0ee5142..4a371d8 100644 --- a/src-new-reps/Reps.hs +++ b/src-new-reps/Reps.hs @@ -1,16 +1,16 @@ -{-# language DataKinds #-} -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language MagicHash #-} -{-# language UnboxedTuples #-} -{-# language UnliftedNewtypes #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedNewtypes #-} module Reps - ( Bytes#(..) + ( Bytes# (..) , word8ToWord# ) where -import GHC.Exts (ByteArray#,Int#,RuntimeRep(..),Levity(Unlifted),TYPE,word8ToWord#) +import GHC.Exts (ByteArray#, Int#, Levity (Unlifted), RuntimeRep (..), TYPE, word8ToWord#) -newtype Bytes# :: TYPE ('TupleRep '[ 'BoxedRep 'Unlifted,'IntRep,'IntRep]) where +newtype Bytes# :: TYPE ('TupleRep '[ 'BoxedRep 'Unlifted, 'IntRep, 'IntRep]) where Bytes# :: (# ByteArray#, Int#, Int# #) -> Bytes# diff --git a/src-no-ghc-cstrlen/Cstrlen.hs b/src-no-ghc-cstrlen/Cstrlen.hs index ec23b7a..ef7c4ab 100644 --- a/src-no-ghc-cstrlen/Cstrlen.hs +++ b/src-no-ghc-cstrlen/Cstrlen.hs @@ -1,12 +1,12 @@ -{-# language MagicHash #-} -{-# language TypeFamilies #-} -{-# language UnliftedFFITypes #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE UnliftedFFITypes #-} module Cstrlen ( cstringLength# ) where -import GHC.Exts (Addr#,Int#) +import GHC.Exts (Addr#, Int#) foreign import ccall unsafe "strlen" c_strlen :: Addr# -> Int# diff --git a/src-old-reps/Reps.hs b/src-old-reps/Reps.hs index 9c4e5b3..01356c3 100644 --- a/src-old-reps/Reps.hs +++ b/src-old-reps/Reps.hs @@ -1,22 +1,22 @@ -{-# language GADTSyntax #-} -{-# language KindSignatures #-} -{-# language MagicHash #-} -{-# language TypeInType #-} -{-# language UnboxedTuples #-} -{-# language UnliftedNewtypes #-} +{-# LANGUAGE GADTSyntax #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE TypeInType #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedNewtypes #-} module Reps - ( Bytes#(..) + ( Bytes# (..) , word8ToWord# ) where -import GHC.Exts (ByteArray#,Int#,Word#,RuntimeRep(..),TYPE) +import GHC.Exts (ByteArray#, Int#, RuntimeRep (..), TYPE, Word#) -newtype Bytes# :: TYPE ('TupleRep '[ 'UnliftedRep,'IntRep,'IntRep]) where +newtype Bytes# :: TYPE ('TupleRep '[ 'UnliftedRep, 'IntRep, 'IntRep]) where Bytes# :: (# ByteArray#, Int#, Int# #) -> Bytes# -- In GHC 9.2, the lifted Word8 type started being backed by the -- unlifted Word8# instead of by Word#. This is a compatibility hack. word8ToWord# :: Word# -> Word# -{-# inline word8ToWord# #-} +{-# INLINE word8ToWord# #-} word8ToWord# w = w diff --git a/src/Data/Bytes.hs b/src/Data/Bytes.hs index 2ec822d..2e22757 100644 --- a/src/Data/Bytes.hs +++ b/src/Data/Bytes.hs @@ -1,70 +1,84 @@ -{-# language BangPatterns #-} -{-# language BlockArguments #-} -{-# language DataKinds #-} -{-# language DuplicateRecordFields #-} -{-# language KindSignatures #-} -{-# language MagicHash #-} -{-# language NamedFieldPuns #-} -{-# language RankNTypes #-} -{-# language TupleSections #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} - --- | If you are interested in sub-arrays of 'ByteArray's (e.g. writing a binary --- search), it would be grossly inefficient to make a copy of the sub-array. On --- the other hand, it'd be really annoying to track limit indices by hand. --- --- This module defines the 'Bytes' type which exposes a standard array interface --- for a sub-arrays without copying and without manual index manipulation. -- --- For mutable arrays, see 'Data.Bytes.Mutable'. +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BlockArguments #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE UnboxedTuples #-} + +{- | If you are interested in sub-arrays of 'ByteArray's (e.g. writing a binary +search), it would be grossly inefficient to make a copy of the sub-array. On +the other hand, it'd be really annoying to track limit indices by hand. + +This module defines the 'Bytes' type which exposes a standard array interface +for a sub-arrays without copying and without manual index manipulation. -- +For mutable arrays, see 'Data.Bytes.Mutable'. +-} module Data.Bytes ( -- * Types Bytes + -- * Constants , Pure.empty , Pure.emptyPinned , Pure.emptyPinnedU + -- * Properties , Pure.null , Pure.length + -- * Decompose , uncons , unsnoc + -- * Predicates , any , all + -- * Create + -- ** Sliced , singleton , doubleton , tripleton , Pure.replicate + -- ** Unsliced , singletonU , doubletonU , tripletonU , Pure.replicateU + -- * Filtering , takeWhile , dropWhile , takeWhileEnd , dropWhileEnd + -- * Traversals , Pure.map , Pure.mapU + -- * Folds , Pure.foldl , Pure.foldl' , Pure.foldr , Pure.foldr' + -- * Folds with Indices , Pure.ifoldl' + -- * Monadic Folds , Pure.foldlM , Pure.foldrM + -- * Common Folds , elem + -- * Splitting + -- ** Unlimited , Byte.split , Byte.splitU @@ -72,26 +86,33 @@ module Data.Bytes , Byte.splitInitU , Byte.splitNonEmpty , Byte.splitStream + -- ** Fixed from Beginning , Byte.split1 , splitTetragram1 , Byte.split2 , Byte.split3 , Byte.split4 + -- ** Fixed from End , Byte.splitEnd1 + -- * Combining , intercalate , intercalateByte2 , concatArray , concatArrayU + -- * Searching , replace , findIndices , findTetragramIndex + -- * Counting , Byte.count + -- * Prefix and Suffix + -- ** Byte Sequence , isPrefixOf , isSuffixOf @@ -101,12 +122,16 @@ module Data.Bytes , stripSuffix , stripOptionalSuffix , longestCommonPrefix + -- ** C Strings , stripCStringPrefix + -- ** Single Byte , isBytePrefixOf , isByteSuffixOf + -- * Equality + -- ** Fixed Characters , equalsLatin1 , equalsLatin2 @@ -120,22 +145,28 @@ module Data.Bytes , equalsLatin10 , equalsLatin11 , equalsLatin12 + -- ** C Strings , equalsCString + -- * Hashing , Pure.fnv1a32 , Pure.fnv1a64 + -- * Unsafe Slicing , Pure.unsafeTake , Pure.unsafeDrop , Pure.unsafeIndex , Pure.unsafeHead + -- * Copying , Pure.unsafeCopy + -- * Pointers , Pure.pin , Pure.contents , touch + -- * Conversion , Pure.toByteArray , Pure.toByteArrayClone @@ -156,38 +187,38 @@ module Data.Bytes , toShortByteString , toShortByteStringClone , toLowerAsciiByteArrayClone + -- * I\/O with Handles , BIO.hGet , readFile , BIO.hPut + -- * Unlifted Types , lift , unlift + -- * Length Indexed , withLength , withLengthU ) where -import Prelude hiding (length,takeWhile,dropWhile,null,foldl,foldr,elem,replicate,any,all,readFile,map) +import Prelude hiding (all, any, dropWhile, elem, foldl, foldr, length, map, null, readFile, replicate, takeWhile) -import Control.Monad.Primitive (PrimMonad,primitive_,unsafeIOToPrim) +import Control.Monad.Primitive (PrimMonad, primitive_, unsafeIOToPrim) import Control.Monad.ST.Run (runByteArrayST) import Cstrlen (cstringLength#) -import Data.Bits (unsafeShiftL,(.|.)) -import Data.ByteString.Short.Internal (ShortByteString(SBS)) -import Data.Bytes.Pure (length,fromByteArray,foldr,unsafeDrop) -import Data.Bytes.Pure (unsafeIndex,toShortByteString) -import Data.Bytes.Types (Bytes(Bytes,array,offset),BytesN(BytesN)) -import Data.Bytes.Types (ByteArrayN(ByteArrayN)) -import Data.Primitive (Array,ByteArray(ByteArray)) +import Data.Bits (unsafeShiftL, (.|.)) +import Data.ByteString.Short.Internal (ShortByteString (SBS)) +import Data.Bytes.Pure (foldr, fromByteArray, length, toShortByteString, unsafeDrop, unsafeIndex) +import Data.Bytes.Search (findIndices, isInfixOf, replace) +import Data.Bytes.Types (ByteArrayN (ByteArrayN), Bytes (Bytes, array, offset), BytesN (BytesN)) +import Data.Primitive (Array, ByteArray (ByteArray)) import Data.Text.Short (ShortText) import Foreign.C.String (CString) -import Foreign.Ptr (Ptr,plusPtr,castPtr) -import GHC.Exts (Addr#,Word#,Int#) -import GHC.Exts (Int(I#),Ptr(Ptr)) -import GHC.Word (Word8(W8#),Word32) -import Reps (Bytes#(..),word8ToWord#) -import Data.Bytes.Search (findIndices,replace,isInfixOf) +import Foreign.Ptr (Ptr, castPtr, plusPtr) +import GHC.Exts (Addr#, Int (I#), Int#, Ptr (Ptr), Word#) +import GHC.Word (Word32, Word8 (W8#)) +import Reps (Bytes# (..), word8ToWord#) import qualified Arithmetic.Nat as Nat import qualified Arithmetic.Types as Arithmetic @@ -207,38 +238,43 @@ import qualified Data.Text.Short as TS import qualified GHC.Exts as Exts import qualified GHC.TypeNats as GHC --- | Extract the head and tail of the 'Bytes', returning 'Nothing' if --- it is empty. +{- | Extract the head and tail of the 'Bytes', returning 'Nothing' if +it is empty. +-} uncons :: Bytes -> Maybe (Word8, Bytes) -{-# inline uncons #-} +{-# INLINE uncons #-} uncons b = case length b of 0 -> Nothing _ -> Just (unsafeIndex b 0, unsafeDrop 1 b) --- | Extract the @init@ and @last@ of the 'Bytes', returning 'Nothing' if --- it is empty. +{- | Extract the @init@ and @last@ of the 'Bytes', returning 'Nothing' if +it is empty. +-} unsnoc :: Bytes -> Maybe (Bytes, Word8) -{-# inline unsnoc #-} +{-# INLINE unsnoc #-} unsnoc b@(Bytes arr off len) = case len of 0 -> Nothing - _ -> let !len' = len - 1 in - Just (Bytes arr off len', unsafeIndex b len') + _ -> + let !len' = len - 1 + in Just (Bytes arr off len', unsafeIndex b len') --- | Does the byte sequence begin with the given byte? False if the --- byte sequence is empty. +{- | Does the byte sequence begin with the given byte? False if the +byte sequence is empty. +-} isBytePrefixOf :: Word8 -> Bytes -> Bool -{-# inline isBytePrefixOf #-} +{-# INLINE isBytePrefixOf #-} isBytePrefixOf w b = case length b of 0 -> False _ -> unsafeIndex b 0 == w --- | Does the byte sequence end with the given byte? False if the --- byte sequence is empty. +{- | Does the byte sequence end with the given byte? False if the +byte sequence is empty. +-} isByteSuffixOf :: Word8 -> Bytes -> Bool isByteSuffixOf w b = case len of 0 -> False _ -> unsafeIndex b (len - 1) == w - where + where len = length b -- | Is the first argument a prefix of the second argument? @@ -246,7 +282,7 @@ isPrefixOf :: Bytes -> Bytes -> Bool isPrefixOf (Bytes a aOff aLen) (Bytes b bOff bLen) = -- For prefix and suffix testing, we do not use -- the sameByteArray optimization that we use in - -- the Eq instance. Prefix and suffix testing seldom + -- the Eq instance. Prefix and suffix testing seldom -- compares a byte array with the same in-memory -- byte array. if aLen <= bLen @@ -263,33 +299,33 @@ isSuffixOf (Bytes a aOff aLen) (Bytes b bOff bLen) = -- | Find the longest string which is a prefix of both arguments. longestCommonPrefix :: Bytes -> Bytes -> Bytes longestCommonPrefix a b = loop 0 - where + where loop :: Int -> Bytes loop !into | into < maxLen - && unsafeIndex a into == unsafeIndex b into - = loop (into + 1) + && unsafeIndex a into == unsafeIndex b into = + loop (into + 1) | otherwise = Pure.unsafeTake into a maxLen = min (length a) (length b) -- | Create a byte sequence with one byte. singleton :: Word8 -> Bytes -{-# inline singleton #-} +{-# INLINE singleton #-} singleton !a = Bytes (singletonU a) 0 1 -- | Create a byte sequence with two bytes. doubleton :: Word8 -> Word8 -> Bytes -{-# inline doubleton #-} +{-# INLINE doubleton #-} doubleton !a !b = Bytes (doubletonU a b) 0 2 -- | Create a byte sequence with three bytes. tripleton :: Word8 -> Word8 -> Word8 -> Bytes -{-# inline tripleton #-} +{-# INLINE tripleton #-} tripleton !a !b !c = Bytes (tripletonU a b c) 0 3 -- | Create an unsliced byte sequence with one byte. singletonU :: Word8 -> ByteArray -{-# inline singletonU #-} +{-# INLINE singletonU #-} singletonU !a = runByteArrayST do arr <- PM.newByteArray 1 PM.writeByteArray arr 0 a @@ -297,7 +333,7 @@ singletonU !a = runByteArrayST do -- | Create an unsliced byte sequence with two bytes. doubletonU :: Word8 -> Word8 -> ByteArray -{-# inline doubletonU #-} +{-# INLINE doubletonU #-} doubletonU !a !b = runByteArrayST do arr <- PM.newByteArray 2 PM.writeByteArray arr 0 a @@ -306,7 +342,7 @@ doubletonU !a !b = runByteArrayST do -- | Create an unsliced byte sequence with three bytes. tripletonU :: Word8 -> Word8 -> Word8 -> ByteArray -{-# inline tripletonU #-} +{-# INLINE tripletonU #-} tripletonU !a !b !c = runByteArrayST do arr <- PM.newByteArray 3 PM.writeByteArray arr 0 a @@ -314,35 +350,43 @@ tripletonU !a !b !c = runByteArrayST do PM.writeByteArray arr 2 c PM.unsafeFreezeByteArray arr --- | /O(n)/ Return the suffix of the second string if its prefix --- matches the entire first string. +{- | /O(n)/ Return the suffix of the second string if its prefix +matches the entire first string. +-} stripPrefix :: Bytes -> Bytes -> Maybe Bytes -stripPrefix !pre !str = if pre `isPrefixOf` str - then Just (Bytes (array str) (offset str + length pre) (length str - length pre)) - else Nothing - --- | /O(n)/ Return the suffix of the second string if its prefix --- matches the entire first string. Otherwise, return the second --- string unchanged. +stripPrefix !pre !str = + if pre `isPrefixOf` str + then Just (Bytes (array str) (offset str + length pre) (length str - length pre)) + else Nothing + +{- | /O(n)/ Return the suffix of the second string if its prefix +matches the entire first string. Otherwise, return the second +string unchanged. +-} stripOptionalPrefix :: Bytes -> Bytes -> Bytes -stripOptionalPrefix !pre !str = if pre `isPrefixOf` str - then Bytes (array str) (offset str + length pre) (length str - length pre) - else str - --- | /O(n)/ Return the prefix of the second string if its suffix --- matches the entire first string. +stripOptionalPrefix !pre !str = + if pre `isPrefixOf` str + then Bytes (array str) (offset str + length pre) (length str - length pre) + else str + +{- | /O(n)/ Return the prefix of the second string if its suffix +matches the entire first string. +-} stripSuffix :: Bytes -> Bytes -> Maybe Bytes -stripSuffix !suf !str = if suf `isSuffixOf` str - then Just (Bytes (array str) (offset str) (length str - length suf)) - else Nothing - --- | /O(n)/ Return the prefix of the second string if its suffix --- matches the entire first string. Otherwise, return the second --- string unchanged. +stripSuffix !suf !str = + if suf `isSuffixOf` str + then Just (Bytes (array str) (offset str) (length str - length suf)) + else Nothing + +{- | /O(n)/ Return the prefix of the second string if its suffix +matches the entire first string. Otherwise, return the second +string unchanged. +-} stripOptionalSuffix :: Bytes -> Bytes -> Bytes -stripOptionalSuffix !suf !str = if suf `isSuffixOf` str - then Bytes (array str) (offset str) (length str - length suf) - else str +stripOptionalSuffix !suf !str = + if suf `isSuffixOf` str + then Bytes (array str) (offset str) (length str - length suf) + else str -- | Is the byte a member of the byte sequence? elem :: Word8 -> Bytes -> Bool @@ -351,31 +395,33 @@ elem (W8# w) b = case elemLoop 0# (word8ToWord# w) b of _ -> False elemLoop :: Int# -> Word# -> Bytes -> Int# -elemLoop !r !w (Bytes arr@(ByteArray arr# ) off@(I# off# ) len) = case len of +elemLoop !r !w (Bytes arr@(ByteArray arr#) off@(I# off#) len) = case len of 0 -> r - _ -> elemLoop (Exts.orI# r (Exts.eqWord# w (word8ToWord# (Exts.indexWord8Array# arr# off# )) )) w (Bytes arr (off + 1) (len - 1)) + _ -> elemLoop (Exts.orI# r (Exts.eqWord# w (word8ToWord# (Exts.indexWord8Array# arr# off#)))) w (Bytes arr (off + 1) (len - 1)) -- | Take bytes while the predicate is true. takeWhile :: (Word8 -> Bool) -> Bytes -> Bytes -{-# inline takeWhile #-} +{-# INLINE takeWhile #-} takeWhile k b = Pure.unsafeTake (countWhile k b) b -- | Drop bytes while the predicate is true. dropWhile :: (Word8 -> Bool) -> Bytes -> Bytes -{-# inline dropWhile #-} +{-# INLINE dropWhile #-} dropWhile k b = Pure.unsafeDrop (countWhile k b) b --- | /O(n)/ 'dropWhileEnd' @p@ @b@ returns the prefix remaining after --- dropping characters that satisfy the predicate @p@ from the end of --- @t@. +{- | /O(n)/ 'dropWhileEnd' @p@ @b@ returns the prefix remaining after +dropping characters that satisfy the predicate @p@ from the end of +@t@. +-} dropWhileEnd :: (Word8 -> Bool) -> Bytes -> Bytes -{-# inline dropWhileEnd #-} +{-# INLINE dropWhileEnd #-} dropWhileEnd k !b = Pure.unsafeTake (length b - countWhileEnd k b) b --- | /O(n)/ 'takeWhileEnd' @p@ @b@ returns the longest suffix of --- elements that satisfy predicate @p@. +{- | /O(n)/ 'takeWhileEnd' @p@ @b@ returns the longest suffix of +elements that satisfy predicate @p@. +-} takeWhileEnd :: (Word8 -> Bool) -> Bytes -> Bytes -{-# inline takeWhileEnd #-} +{-# INLINE takeWhileEnd #-} takeWhileEnd k !b = let n = countWhileEnd k b in Bytes (array b) (offset b + length b - n) n @@ -385,36 +431,44 @@ takeWhileEnd k !b = -- match the predicate, this will return the length originally -- provided. countWhile :: (Word8 -> Bool) -> Bytes -> Int -{-# inline countWhile #-} -countWhile k (Bytes arr off0 len0) = go off0 len0 0 where - go !off !len !n = if len > 0 - then if k (PM.indexByteArray arr off) - then go (off + 1) (len - 1) (n + 1) +{-# INLINE countWhile #-} +countWhile k (Bytes arr off0 len0) = go off0 len0 0 + where + go !off !len !n = + if len > 0 + then + if k (PM.indexByteArray arr off) + then go (off + 1) (len - 1) (n + 1) + else n else n - else n -- Internal. Variant of countWhile that starts from the end -- of the string instead of the beginning. countWhileEnd :: (Word8 -> Bool) -> Bytes -> Int -{-# inline countWhileEnd #-} -countWhileEnd k (Bytes arr off0 len0) = go (off0 + len0 - 1) (len0 - 1) 0 where - go !off !len !n = if len >= 0 - then if k (PM.indexByteArray arr off) - then go (off - 1) (len - 1) (n + 1) +{-# INLINE countWhileEnd #-} +countWhileEnd k (Bytes arr off0 len0) = go (off0 + len0 - 1) (len0 - 1) 0 + where + go !off !len !n = + if len >= 0 + then + if k (PM.indexByteArray arr off) + then go (off - 1) (len - 1) (n + 1) + else n else n - else n --- | Convert a 'String' consisting of only characters in the ASCII block --- to a byte sequence. Any character with a codepoint above @U+007F@ is --- replaced by @U+0000@. +{- | Convert a 'String' consisting of only characters in the ASCII block +to a byte sequence. Any character with a codepoint above @U+007F@ is +replaced by @U+0000@. +-} fromAsciiString :: String -> Bytes {-# DEPRECATED fromAsciiString "use Data.Bytes.Text.Ascii.fromString instead" #-} {-# INLINE fromAsciiString #-} fromAsciiString = Ascii.fromString --- | Convert a 'String' consisting of only characters representable --- by ISO-8859-1. These are encoded with ISO-8859-1. Any character --- with a codepoint above @U+00FF@ is replaced by an unspecified byte. +{- | Convert a 'String' consisting of only characters representable +by ISO-8859-1. These are encoded with ISO-8859-1. Any character +with a codepoint above @U+00FF@ is replaced by an unspecified byte. +-} fromLatinString :: String -> Bytes {-# DEPRECATED fromLatinString "use Data.Bytes.Text.Latin1.fromString instead" #-} {-# INLINE fromLatinString #-} @@ -428,14 +482,20 @@ toLatinString = Latin1.toString -- | Copy a primitive string literal into managed memory. fromCString# :: Addr# -> Bytes -fromCString# a = Bytes - ( runByteArrayST $ do - dst@(PM.MutableByteArray dst# ) <- PM.newByteArray len - PM.copyPtrToMutablePrimArray - (PM.MutablePrimArray dst# ) 0 (Ptr a :: Ptr Word8) len - PM.unsafeFreezeByteArray dst - ) 0 len - where +fromCString# a = + Bytes + ( runByteArrayST $ do + dst@(PM.MutableByteArray dst#) <- PM.newByteArray len + PM.copyPtrToMutablePrimArray + (PM.MutablePrimArray dst#) + 0 + (Ptr a :: Ptr Word8) + len + PM.unsafeFreezeByteArray dst + ) + 0 + len + where len = I# (cstringLength# a) compareByteArrays :: ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering @@ -443,107 +503,122 @@ compareByteArrays :: ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering compareByteArrays (ByteArray ba1#) (I# off1#) (ByteArray ba2#) (I# off2#) (I# n#) = compare (I# (Exts.compareByteArrays# ba1# off1# ba2# off2# n#)) 0 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a singleton whose element matches the character? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a singleton whose element matches the character? +-} equalsLatin1 :: Char -> Bytes -> Bool {-# DEPRECATED equalsLatin1 "use Data.Bytes.Text.Latin1.equals1 instead" #-} {-# INLINE equalsLatin1 #-} equalsLatin1 = Latin1.equals1 - --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a doubleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a doubleton whose elements match the characters? +-} equalsLatin2 :: Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin2 "use Data.Bytes.Text.Latin1.equals2 instead" #-} {-# INLINE equalsLatin2 #-} equalsLatin2 = Latin1.equals2 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a tripleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a tripleton whose elements match the characters? +-} equalsLatin3 :: Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin3 "use Data.Bytes.Text.Latin1.equals3 instead" #-} {-# INLINE equalsLatin3 #-} equalsLatin3 = Latin1.equals3 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a quadrupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a quadrupleton whose elements match the characters? +-} equalsLatin4 :: Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin4 "use Data.Bytes.Text.Latin1.equals4 instead" #-} {-# INLINE equalsLatin4 #-} equalsLatin4 = Latin1.equals4 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a quintupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a quintupleton whose elements match the characters? +-} equalsLatin5 :: Char -> Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin5 "use Data.Bytes.Text.Latin1.equals5 instead" #-} {-# INLINE equalsLatin5 #-} equalsLatin5 = Latin1.equals5 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a sextupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a sextupleton whose elements match the characters? +-} equalsLatin6 :: Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin6 "use Data.Bytes.Text.Latin1.equals6 instead" #-} {-# INLINE equalsLatin6 #-} equalsLatin6 = Latin1.equals6 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a septupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a septupleton whose elements match the characters? +-} equalsLatin7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin7 "use Data.Bytes.Text.Latin1.equals7 instead" #-} {-# INLINE equalsLatin7 #-} equalsLatin7 = Latin1.equals7 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- an octupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +an octupleton whose elements match the characters? +-} equalsLatin8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin8 "use Data.Bytes.Text.Latin1.equals8 instead" #-} {-# INLINE equalsLatin8 #-} equalsLatin8 = Latin1.equals8 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a 9-tuple whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a 9-tuple whose elements match the characters? +-} equalsLatin9 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin9 "use Data.Bytes.Text.Latin1.equals9 instead" #-} {-# INLINE equalsLatin9 #-} equalsLatin9 = Latin1.equals9 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a 10-tuple whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a 10-tuple whose elements match the characters? +-} equalsLatin10 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin10 "use Data.Bytes.Text.Latin1.equals10 instead" #-} {-# INLINE equalsLatin10 #-} equalsLatin10 = Latin1.equals10 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a 11-tuple whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a 11-tuple whose elements match the characters? +-} equalsLatin11 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin11 "use Data.Bytes.Text.Latin1.equals11 instead" #-} {-# INLINE equalsLatin11 #-} equalsLatin11 = Latin1.equals11 --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a 12-tuple whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a 12-tuple whose elements match the characters? +-} equalsLatin12 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool {-# DEPRECATED equalsLatin12 "use Data.Bytes.Text.Latin1.equals12 instead" #-} {-# INLINE equalsLatin12 #-} equalsLatin12 = Latin1.equals12 --- | Is the byte sequence equal to the @NUL@-terminated C String? --- The C string must be a constant. +{- | Is the byte sequence equal to the @NUL@-terminated C String? +The C string must be a constant. +-} equalsCString :: CString -> Bytes -> Bool -{-# inline equalsCString #-} -equalsCString !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) off0 len0 where +{-# INLINE equalsCString #-} +equalsCString !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) off0 len0 + where go !ptr !off !len = case len of 0 -> PM.indexOffPtr ptr 0 == (0 :: Word8) _ -> case PM.indexOffPtr ptr 0 of 0 -> False c -> c == PM.indexByteArray arr off && go (plusPtr ptr 1) (off + 1) (len - 1) --- | /O(n)/ Variant of 'stripPrefix' that takes a @NUL@-terminated C String --- as the prefix to test for. +{- | /O(n)/ Variant of 'stripPrefix' that takes a @NUL@-terminated C String +as the prefix to test for. +-} stripCStringPrefix :: CString -> Bytes -> Maybe Bytes -{-# inline stripCStringPrefix #-} -stripCStringPrefix !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) off0 len0 where +{-# INLINE stripCStringPrefix #-} +stripCStringPrefix !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) off0 len0 + where go !ptr !off !len = case PM.indexOffPtr ptr 0 of 0 -> Just (Bytes arr off len) c -> case len of @@ -552,194 +627,211 @@ stripCStringPrefix !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) True -> go (plusPtr ptr 1) (off + 1) (len - 1) False -> Nothing --- | Touch the byte array backing the byte sequence. This sometimes needed --- after calling 'Pure.contents' so that the @ByteArray@ does not get garbage --- collected. -touch :: PrimMonad m => Bytes -> m () -touch (Bytes (ByteArray arr) _ _) = unsafeIOToPrim - (primitive_ (\s -> Exts.touch# arr s)) +{- | Touch the byte array backing the byte sequence. This sometimes needed +after calling 'Pure.contents' so that the @ByteArray@ does not get garbage +collected. +-} +touch :: (PrimMonad m) => Bytes -> m () +touch (Bytes (ByteArray arr) _ _) = + unsafeIOToPrim + (primitive_ (\s -> Exts.touch# arr s)) -- | Read an entire file strictly into a 'Bytes'. readFile :: FilePath -> IO Bytes readFile f = Chunks.concat <$> Chunks.readFile f --- | /O(n)/ The intercalate function takes a separator 'Bytes' and a list of --- 'Bytes' and concatenates the list elements by interspersing the separator --- between each element. +{- | /O(n)/ The intercalate function takes a separator 'Bytes' and a list of +'Bytes' and concatenates the list elements by interspersing the separator +between each element. +-} intercalate :: - Bytes -- ^ Separator (interspersed into the list) - -> [Bytes] -- ^ List - -> Bytes + -- | Separator (interspersed into the list) + Bytes -> + -- | List + [Bytes] -> + Bytes intercalate !_ [] = mempty intercalate !_ [x] = x intercalate (Bytes sarr soff slen) (Bytes arr0 off0 len0 : bs) = Bytes r 0 fullLen - where + where !fullLen = List.foldl' (\acc (Bytes _ _ len) -> acc + len + slen) 0 bs + len0 r = runByteArrayST $ do marr <- PM.newByteArray fullLen PM.copyByteArray marr 0 arr0 off0 len0 - !_ <- F.foldlM - (\ !currLen (Bytes arr off len) -> do - PM.copyByteArray marr currLen sarr soff slen - PM.copyByteArray marr (currLen + slen) arr off len - pure (currLen + len + slen) - ) len0 bs + !_ <- + F.foldlM + ( \ !currLen (Bytes arr off len) -> do + PM.copyByteArray marr currLen sarr soff slen + PM.copyByteArray marr (currLen + slen) arr off len + pure (currLen + len + slen) + ) + len0 + bs PM.unsafeFreezeByteArray marr --- | Specialization of 'intercalate' where the separator is a single byte and --- there are exactly two byte sequences that are being concatenated. +{- | Specialization of 'intercalate' where the separator is a single byte and +there are exactly two byte sequences that are being concatenated. +-} intercalateByte2 :: - Word8 -- ^ Separator - -> Bytes -- ^ First byte sequence - -> Bytes -- ^ Second byte sequence - -> Bytes -intercalateByte2 !sep !a !b = Bytes - { Types.array = runByteArrayST $ do - dst <- PM.newByteArray len - Pure.unsafeCopy dst 0 a - PM.writeByteArray dst (length a) sep - Pure.unsafeCopy dst (length a + 1) b - PM.unsafeFreezeByteArray dst - , Types.length = len - , Types.offset = 0 - } - where len = length a + length b + 1 + -- | Separator + Word8 -> + -- | First byte sequence + Bytes -> + -- | Second byte sequence + Bytes -> + Bytes +intercalateByte2 !sep !a !b = + Bytes + { Types.array = runByteArrayST $ do + dst <- PM.newByteArray len + Pure.unsafeCopy dst 0 a + PM.writeByteArray dst (length a) sep + Pure.unsafeCopy dst (length a + 1) b + PM.unsafeFreezeByteArray dst + , Types.length = len + , Types.offset = 0 + } + where + len = length a + length b + 1 -- | /O(n)/ Returns true if any byte in the sequence satisfies the predicate. any :: (Word8 -> Bool) -> Bytes -> Bool -{-# inline any #-} +{-# INLINE any #-} any f = foldr (\b r -> f b || r) False -- | /O(n)/ Returns true if all bytes in the sequence satisfy the predicate. all :: (Word8 -> Bool) -> Bytes -> Bool -{-# inline all #-} +{-# INLINE all #-} all f = foldr (\b r -> f b && r) True --- | Variant of 'toShortByteString' that unconditionally makes a copy of --- the array backing the sliced 'Bytes' even if the original array --- could be reused. Prefer 'toShortByteString'. +{- | Variant of 'toShortByteString' that unconditionally makes a copy of +the array backing the sliced 'Bytes' even if the original array +could be reused. Prefer 'toShortByteString'. +-} toShortByteStringClone :: Bytes -> ShortByteString -{-# inline toShortByteStringClone #-} +{-# INLINE toShortByteStringClone #-} toShortByteStringClone !b = case Pure.toByteArrayClone b of PM.ByteArray x -> SBS x -- | /O(1)/ Create 'Bytes' from a 'ShortByteString'. fromShortByteString :: ShortByteString -> Bytes -{-# inline fromShortByteString #-} +{-# INLINE fromShortByteString #-} fromShortByteString (SBS x) = fromByteArray (ByteArray x) --- | /O(1)/ Create 'Bytes' from a 'ShortText'. This encodes the text as UTF-8. --- It is a no-op. +{- | /O(1)/ Create 'Bytes' from a 'ShortText'. This encodes the text as UTF-8. +It is a no-op. +-} fromShortText :: ShortText -> Bytes -{-# inline fromShortText #-} +{-# INLINE fromShortText #-} fromShortText t = case TS.toShortByteString t of SBS x -> fromByteArray (ByteArray x) --- | /O(n)/ Interpreting the bytes an ASCII-encoded characters, convert --- the string to lowercase. This adds @0x20@ to bytes in the range --- @[0x41,0x5A]@ and leaves all other bytes alone. Unconditionally --- copies the bytes. +{- | /O(n)/ Interpreting the bytes an ASCII-encoded characters, convert +the string to lowercase. This adds @0x20@ to bytes in the range +@[0x41,0x5A]@ and leaves all other bytes alone. Unconditionally +copies the bytes. +-} toLowerAsciiByteArrayClone :: Bytes -> ByteArray {-# DEPRECATED toLowerAsciiByteArrayClone "use Data.Bytes/Text/AsciiExt.toLowerU" #-} {-# INLINE toLowerAsciiByteArrayClone #-} toLowerAsciiByteArrayClone = AsciiExt.toLowerU lift :: Bytes# -> Bytes -{-# inline lift #-} +{-# INLINE lift #-} lift (Bytes# (# arr, off, len #)) = Bytes (ByteArray arr) (I# off) (I# len) unlift :: Bytes -> Bytes# -{-# inline unlift #-} +{-# INLINE unlift #-} unlift (Bytes (ByteArray arr) (I# off) (I# len)) = Bytes# (# arr, off, len #) concatArrayU :: Array Bytes -> ByteArray -{-# noinline concatArrayU #-} +{-# NOINLINE concatArrayU #-} concatArrayU !xs = runByteArrayST $ do let !arrLen = PM.sizeofArray xs let !totalByteLen = F.foldl' (\acc b -> length b + acc) 0 xs dst <- PM.newByteArray totalByteLen - let go !ix !dstOff = if ix < arrLen - then do - x <- PM.indexArrayM xs ix - Pure.unsafeCopy dst dstOff x - go (ix + 1) (dstOff + length x) - else PM.unsafeFreezeByteArray dst + let go !ix !dstOff = + if ix < arrLen + then do + x <- PM.indexArrayM xs ix + Pure.unsafeCopy dst dstOff x + go (ix + 1) (dstOff + length x) + else PM.unsafeFreezeByteArray dst go 0 0 concatArray :: Array Bytes -> Bytes -{-# inline concatArray #-} +{-# INLINE concatArray #-} concatArray !xs = Pure.fromByteArray (concatArrayU xs) --- | Convert 'Bytes' to 'BytesN', exposing the length in a type-safe --- way in the callback. +{- | Convert 'Bytes' to 'BytesN', exposing the length in a type-safe +way in the callback. +-} withLength :: - Bytes - -> (forall (n :: GHC.Nat). Arithmetic.Nat n -> BytesN n -> a) - -> a -{-# inline withLength #-} -withLength Bytes{array,offset,length=len} f = Nat.with - len - (\n -> f n BytesN{array,offset}) + Bytes -> + (forall (n :: GHC.Nat). Arithmetic.Nat n -> BytesN n -> a) -> + a +{-# INLINE withLength #-} +withLength Bytes {array, offset, length = len} f = + Nat.with + len + (\n -> f n BytesN {array, offset}) withLengthU :: - ByteArray - -> (forall (n :: GHC.Nat). Arithmetic.Nat n -> ByteArrayN n -> a) - -> a -{-# inline withLengthU #-} -withLengthU !arr f = Nat.with - (PM.sizeofByteArray arr) - (\n -> f n (ByteArrayN arr)) + ByteArray -> + (forall (n :: GHC.Nat). Arithmetic.Nat n -> ByteArrayN n -> a) -> + a +{-# INLINE withLengthU #-} +withLengthU !arr f = + Nat.with + (PM.sizeofByteArray arr) + (\n -> f n (ByteArrayN arr)) findTetragramIndex :: - Word8 - -> Word8 - -> Word8 - -> Word8 - -> Bytes - -> Maybe Int -findTetragramIndex !w0 !w1 !w2 !w3 (Bytes arr off len) = if len < 4 - then Nothing - else - let !target = - unsafeShiftL (fromIntegral w0 :: Word32) 24 - .|. - unsafeShiftL (fromIntegral w1 :: Word32) 16 - .|. - unsafeShiftL (fromIntegral w2 :: Word32) 8 - .|. - unsafeShiftL (fromIntegral w3 :: Word32) 0 - !end = off + len - go !ix !acc = if acc == target - then - let n = ix - off - in Just (n - 4) - else if ix < end - then - let !w = PM.indexByteArray arr ix :: Word8 - acc' = - (fromIntegral w :: Word32) - .|. - unsafeShiftL acc 8 - in go (ix + 1) acc' - else Nothing - !acc0 = - unsafeShiftL (fromIntegral (PM.indexByteArray arr 0 :: Word8) :: Word32) 24 - .|. - unsafeShiftL (fromIntegral (PM.indexByteArray arr 1 :: Word8) :: Word32) 16 - .|. - unsafeShiftL (fromIntegral (PM.indexByteArray arr 2 :: Word8) :: Word32) 8 - .|. - unsafeShiftL (fromIntegral (PM.indexByteArray arr 3 :: Word8) :: Word32) 0 - in go 4 acc0 + Word8 -> + Word8 -> + Word8 -> + Word8 -> + Bytes -> + Maybe Int +findTetragramIndex !w0 !w1 !w2 !w3 (Bytes arr off len) = + if len < 4 + then Nothing + else + let !target = + unsafeShiftL (fromIntegral w0 :: Word32) 24 + .|. unsafeShiftL (fromIntegral w1 :: Word32) 16 + .|. unsafeShiftL (fromIntegral w2 :: Word32) 8 + .|. unsafeShiftL (fromIntegral w3 :: Word32) 0 + !end = off + len + go !ix !acc = + if acc == target + then + let n = ix - off + in Just (n - 4) + else + if ix < end + then + let !w = PM.indexByteArray arr ix :: Word8 + acc' = + (fromIntegral w :: Word32) + .|. unsafeShiftL acc 8 + in go (ix + 1) acc' + else Nothing + !acc0 = + unsafeShiftL (fromIntegral (PM.indexByteArray arr 0 :: Word8) :: Word32) 24 + .|. unsafeShiftL (fromIntegral (PM.indexByteArray arr 1 :: Word8) :: Word32) 16 + .|. unsafeShiftL (fromIntegral (PM.indexByteArray arr 2 :: Word8) :: Word32) 8 + .|. unsafeShiftL (fromIntegral (PM.indexByteArray arr 3 :: Word8) :: Word32) 0 + in go 4 acc0 splitTetragram1 :: - Word8 - -> Word8 - -> Word8 - -> Word8 - -> Bytes - -> Maybe (Bytes,Bytes) + Word8 -> + Word8 -> + Word8 -> + Word8 -> + Bytes -> + Maybe (Bytes, Bytes) splitTetragram1 !w0 !w1 !w2 !w3 !b = case findTetragramIndex w0 w1 w2 w3 b of Nothing -> Nothing Just n -> Just (Pure.unsafeTake n b, Pure.unsafeDrop (n + 4) b) diff --git a/src/Data/Bytes/Byte.hs b/src/Data/Bytes/Byte.hs index 45c5d79..93abc96 100644 --- a/src/Data/Bytes/Byte.hs +++ b/src/Data/Bytes/Byte.hs @@ -1,11 +1,9 @@ -{-# language BangPatterns #-} -{-# language BlockArguments #-} -{-# language MagicHash #-} -{-# language NamedFieldPuns #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} -{-# language UnliftedFFITypes #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BlockArguments #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE UnliftedFFITypes #-} -- This internal module has functions for splitting strings -- on a particular byte and for counting occurences of that @@ -24,7 +22,7 @@ module Data.Bytes.Byte , split3 , split4 , splitEnd1 - -- Used by other internal modules + -- Used by other internal modules , elemIndexLoop# ) where @@ -32,14 +30,14 @@ import Prelude hiding (length) import Control.Monad.ST (runST) import Control.Monad.ST.Run (runPrimArrayST) -import Data.Bytes.Types (Bytes(..)) -import Data.List.NonEmpty (NonEmpty((:|))) -import Data.Primitive (PrimArray(..),MutablePrimArray(..),ByteArray(..)) +import Data.Bytes.Types (Bytes (..)) +import Data.List.NonEmpty (NonEmpty ((:|))) +import Data.Primitive (ByteArray (..), MutablePrimArray (..), PrimArray (..)) import Data.Primitive.Unlifted.Array (UnliftedArray) -import Data.Tuple.Types (IntPair(IntPair)) -import Data.Vector.Fusion.Stream.Monadic (Stream(Stream),Step(Yield,Done)) +import Data.Tuple.Types (IntPair (IntPair)) +import Data.Vector.Fusion.Stream.Monadic (Step (Done, Yield), Stream (Stream)) import Data.Word (Word8) -import GHC.Exts (ByteArray#,MutableByteArray#,Int#,Int(I#)) +import GHC.Exts (ByteArray#, Int (I#), Int#, MutableByteArray#) import GHC.IO (unsafeIOToST) import qualified Data.Primitive as PM @@ -48,23 +46,25 @@ import qualified GHC.Exts as Exts -- | Count the number of times the byte appears in the sequence. count :: Word8 -> Bytes -> Int -count !b (Bytes{array=ByteArray arr,offset,length}) = +count !b (Bytes {array = ByteArray arr, offset, length}) = count_ba arr offset length b --- | Variant of 'split' that returns an array of unsliced byte sequences. --- Unlike 'split', this is not a good producer for list fusion. (It does --- not return a list, so it could not be.) Prefer 'split' if the result --- is going to be consumed exactly once by a good consumer. Prefer 'splitU' --- if the result of the split is going to be around for a while and --- inspected multiple times. +{- | Variant of 'split' that returns an array of unsliced byte sequences. +Unlike 'split', this is not a good producer for list fusion. (It does +not return a list, so it could not be.) Prefer 'split' if the result +is going to be consumed exactly once by a good consumer. Prefer 'splitU' +if the result of the split is going to be around for a while and +inspected multiple times. +-} splitU :: Word8 -> Bytes -> UnliftedArray ByteArray splitU !w !bs = let !lens = splitLengthsAlt w bs !lensSz = PM.sizeofPrimArray lens in splitCommonU lens lensSz bs --- | Variant of 'splitU' that drops the trailing element. See 'splitInit' --- for an explanation of why this may be useful. +{- | Variant of 'splitU' that drops the trailing element. See 'splitInit' +for an explanation of why this may be useful. +-} splitInitU :: Word8 -> Bytes -> UnliftedArray ByteArray splitInitU !w !bs = let !lens = splitLengthsAlt w bs @@ -73,103 +73,117 @@ splitInitU !w !bs = -- Internal function splitCommonU :: - PrimArray Int -- array of segment lengths - -> Int -- number of lengths to consider - -> Bytes - -> UnliftedArray ByteArray -splitCommonU !lens !lensSz Bytes{array,offset=arrIx0} = runST do + PrimArray Int -> -- array of segment lengths + Int -> -- number of lengths to consider + Bytes -> + UnliftedArray ByteArray +splitCommonU !lens !lensSz Bytes {array, offset = arrIx0} = runST do dst <- PM.unsafeNewUnliftedArray lensSz - let go !lenIx !arrIx = if lenIx < lensSz - then do - let !len = PM.indexPrimArray lens lenIx - buf <- PM.newByteArray len - PM.copyByteArray buf 0 array arrIx len - buf' <- PM.unsafeFreezeByteArray buf - PM.writeUnliftedArray dst lenIx buf' - go (lenIx + 1) (arrIx + len + 1) - else pure () + let go !lenIx !arrIx = + if lenIx < lensSz + then do + let !len = PM.indexPrimArray lens lenIx + buf <- PM.newByteArray len + PM.copyByteArray buf 0 array arrIx len + buf' <- PM.unsafeFreezeByteArray buf + PM.writeUnliftedArray dst lenIx buf' + go (lenIx + 1) (arrIx + len + 1) + else pure () go 0 arrIx0 PM.unsafeFreezeUnliftedArray dst - --- | Break a byte sequence into pieces separated by the byte argument, --- consuming the delimiter. This function is a good producer for list --- fusion. It is common to immidiately consume the results of @split@ --- with @foldl'@, @traverse_@, @foldlM@, and being a good producer helps --- in this situation. --- --- Note: this function differs from its counterpart in @bytestring@. --- If the byte sequence is empty, this returns a singleton list with --- the empty byte sequence. +{- | Break a byte sequence into pieces separated by the byte argument, +consuming the delimiter. This function is a good producer for list +fusion. It is common to immidiately consume the results of @split@ +with @foldl'@, @traverse_@, @foldlM@, and being a good producer helps +in this situation. + +Note: this function differs from its counterpart in @bytestring@. +If the byte sequence is empty, this returns a singleton list with +the empty byte sequence. +-} split :: Word8 -> Bytes -> [Bytes] -{-# inline split #-} -split !w !bs@Bytes{array,offset=arrIx0} = Exts.build - (\g x0 -> - let go !lenIx !arrIx = if lenIx < lensSz - then let !len = PM.indexPrimArray lens lenIx in - g (Bytes array arrIx len) (go (lenIx + 1) (arrIx + len + 1)) - else x0 - in go 0 arrIx0 - ) - where +{-# INLINE split #-} +split !w !bs@Bytes {array, offset = arrIx0} = + Exts.build + ( \g x0 -> + let go !lenIx !arrIx = + if lenIx < lensSz + then + let !len = PM.indexPrimArray lens lenIx + in g (Bytes array arrIx len) (go (lenIx + 1) (arrIx + len + 1)) + else x0 + in go 0 arrIx0 + ) + where !lens = splitLengthsAlt w bs !lensSz = PM.sizeofPrimArray lens --- | Variant of 'split' that intended for use with stream fusion rather --- than @build@-@foldr@ fusion. -splitStream :: forall m. Applicative m => Word8 -> Bytes -> Stream m Bytes -{-# inline [1] splitStream #-} -splitStream !w !bs@Bytes{array,offset=arrIx0} = Stream step (IntPair 0 arrIx0) - where +{- | Variant of 'split' that intended for use with stream fusion rather +than @build@-@foldr@ fusion. +-} +splitStream :: forall m. (Applicative m) => Word8 -> Bytes -> Stream m Bytes +{-# INLINE [1] splitStream #-} +splitStream !w !bs@Bytes {array, offset = arrIx0} = Stream step (IntPair 0 arrIx0) + where !lens = splitLengthsAlt w bs !lensSz = PM.sizeofPrimArray lens - {-# inline [0] step #-} + {-# INLINE [0] step #-} step :: IntPair -> m (Step IntPair Bytes) - step (IntPair lenIx arrIx) = if lenIx < lensSz - then do - let !len = PM.indexPrimArray lens lenIx - !element = Bytes array arrIx len - !acc = IntPair (lenIx + 1) (arrIx + len + 1) - pure (Yield element acc) - else pure Done + step (IntPair lenIx arrIx) = + if lenIx < lensSz + then do + let !len = PM.indexPrimArray lens lenIx + !element = Bytes array arrIx len + !acc = IntPair (lenIx + 1) (arrIx + len + 1) + pure (Yield element acc) + else pure Done --- | Variant of 'split' that returns the result as a 'NonEmpty' --- instead of @[]@. This is also eligible for stream fusion. +{- | Variant of 'split' that returns the result as a 'NonEmpty' +instead of @[]@. This is also eligible for stream fusion. +-} splitNonEmpty :: Word8 -> Bytes -> NonEmpty Bytes -{-# inline splitNonEmpty #-} -splitNonEmpty !w !bs@Bytes{array,offset=arrIx0} = - Bytes array arrIx0 len0 :| Exts.build - (\g x0 -> - let go !lenIx !arrIx = if lenIx < lensSz - then let !len = PM.indexPrimArray lens lenIx in - g (Bytes array arrIx len) (go (lenIx + 1) (arrIx + len + 1)) - else x0 - in go 1 (1 + (arrIx0 + len0)) - ) - where +{-# INLINE splitNonEmpty #-} +splitNonEmpty !w !bs@Bytes {array, offset = arrIx0} = + Bytes array arrIx0 len0 + :| Exts.build + ( \g x0 -> + let go !lenIx !arrIx = + if lenIx < lensSz + then + let !len = PM.indexPrimArray lens lenIx + in g (Bytes array arrIx len) (go (lenIx + 1) (arrIx + len + 1)) + else x0 + in go 1 (1 + (arrIx0 + len0)) + ) + where !lens = splitLengthsAlt w bs !lensSz = PM.sizeofPrimArray lens !len0 = PM.indexPrimArray lens 0 :: Int --- | Variant of 'split' that drops the trailing element. This behaves --- correctly even if the byte sequence is empty. This is a good producer --- for list fusion. This is useful when splitting a text file --- into lines. --- --- mandates that text files end with a newline, so the list resulting --- from 'split' always has an empty byte sequence as its last element. --- With 'splitInit', that unwanted element is discarded. +{- | Variant of 'split' that drops the trailing element. This behaves +correctly even if the byte sequence is empty. This is a good producer +for list fusion. This is useful when splitting a text file +into lines. + +mandates that text files end with a newline, so the list resulting +from 'split' always has an empty byte sequence as its last element. +With 'splitInit', that unwanted element is discarded. +-} splitInit :: Word8 -> Bytes -> [Bytes] -{-# inline splitInit #-} -splitInit !w !bs@Bytes{array,offset=arrIx0} = Exts.build - (\g x0 -> - let go !lenIx !arrIx = if lenIx < lensSz - then let !len = PM.indexPrimArray lens lenIx in - g (Bytes array arrIx len) (go (lenIx + 1) (arrIx + len + 1)) - else x0 - in go 0 arrIx0 - ) - where +{-# INLINE splitInit #-} +splitInit !w !bs@Bytes {array, offset = arrIx0} = + Exts.build + ( \g x0 -> + let go !lenIx !arrIx = + if lenIx < lensSz + then + let !len = PM.indexPrimArray lens lenIx + in g (Bytes array arrIx len) (go (lenIx + 1) (arrIx + len + 1)) + else x0 + in go 0 arrIx0 + ) + where -- Remember, the resulting array from splitLengthsAlt always has -- a length of at least one. !lens = splitLengthsAlt w bs @@ -179,99 +193,118 @@ splitInit !w !bs@Bytes{array,offset=arrIx0} = Exts.build -- it does not treat the empty byte sequences specially. The result -- for that byte sequence is a singleton array with the element zero. splitLengthsAlt :: Word8 -> Bytes -> PrimArray Int -splitLengthsAlt b Bytes{array=ByteArray arr#,offset=off,length=len} = runPrimArrayST do +splitLengthsAlt b Bytes {array = ByteArray arr#, offset = off, length = len} = runPrimArrayST do let !n = count_ba arr# off len b - dst@(MutablePrimArray dst# ) :: MutablePrimArray s Int <- PM.newPrimArray (n + 1) + dst@(MutablePrimArray dst#) :: MutablePrimArray s Int <- PM.newPrimArray (n + 1) total <- unsafeIOToST (memchr_ba_many arr# off len dst# n b) PM.writePrimArray dst n (len - total) PM.unsafeFreezePrimArray dst -foreign import ccall unsafe "bs_custom.h memchr_ba_many" memchr_ba_many - :: ByteArray# -> Int -> Int -> MutableByteArray# s -> Int -> Word8 -> IO Int +foreign import ccall unsafe "bs_custom.h memchr_ba_many" + memchr_ba_many :: + ByteArray# -> Int -> Int -> MutableByteArray# s -> Int -> Word8 -> IO Int -foreign import ccall unsafe "bs_custom.h count_ba" count_ba - :: ByteArray# -> Int -> Int -> Word8 -> Int +foreign import ccall unsafe "bs_custom.h count_ba" + count_ba :: + ByteArray# -> Int -> Int -> Word8 -> Int --- | Split a byte sequence on the first occurrence of the target --- byte. The target is removed from the result. For example: --- --- >>> split1 0xA [0x1,0x2,0xA,0xB] --- Just ([0x1,0x2],[0xB]) -split1 :: Word8 -> Bytes -> Maybe (Bytes,Bytes) -{-# inline split1 #-} +{- | Split a byte sequence on the first occurrence of the target +byte. The target is removed from the result. For example: + +>>> split1 0xA [0x1,0x2,0xA,0xB] +Just ([0x1,0x2],[0xB]) +-} +split1 :: Word8 -> Bytes -> Maybe (Bytes, Bytes) +{-# INLINE split1 #-} split1 w b@(Bytes arr off len) = case elemIndexLoop# w b of (-1#) -> Nothing - i# -> let i = I# i# in - Just (Bytes arr off (i - off), Bytes arr (i + 1) (len - (1 + i - off))) + i# -> + let i = I# i# + in Just (Bytes arr off (i - off), Bytes arr (i + 1) (len - (1 + i - off))) --- | Split a byte sequence on the first and second occurrences --- of the target byte. The target is removed from the result. --- For example: --- --- >>> split2 0xA [0x1,0x2,0xA,0xB,0xA,0xA,0xA] --- Just ([0x1,0x2],[0xB],[0xA,0xA]) -split2 :: Word8 -> Bytes -> Maybe (Bytes,Bytes,Bytes) -{-# inline split2 #-} +{- | Split a byte sequence on the first and second occurrences +of the target byte. The target is removed from the result. +For example: + +>>> split2 0xA [0x1,0x2,0xA,0xB,0xA,0xA,0xA] +Just ([0x1,0x2],[0xB],[0xA,0xA]) +-} +split2 :: Word8 -> Bytes -> Maybe (Bytes, Bytes, Bytes) +{-# INLINE split2 #-} split2 w b@(Bytes arr off len) = case elemIndexLoop# w b of (-1#) -> Nothing - i# -> let i = I# i# in - case elemIndexLoop# w (Bytes arr (i + 1) (len - (1 + i - off))) of - (-1#) -> Nothing - j# -> let j = I# j# in Just - ( Bytes arr off (i - off) - , Bytes arr (i + 1) (j - (i + 1)) - , Bytes arr (j + 1) (len - (1 + j - off)) - ) + i# -> + let i = I# i# + in case elemIndexLoop# w (Bytes arr (i + 1) (len - (1 + i - off))) of + (-1#) -> Nothing + j# -> + let j = I# j# + in Just + ( Bytes arr off (i - off) + , Bytes arr (i + 1) (j - (i + 1)) + , Bytes arr (j + 1) (len - (1 + j - off)) + ) --- | Split a byte sequence on the first, second, and third occurrences --- of the target byte. The target is removed from the result. --- For example: --- --- >>> split3 0xA [0x1,0x2,0xA,0xB,0xA,0xA,0xA] --- Just ([0x1,0x2],[0xB],[],[0xA]) -split3 :: Word8 -> Bytes -> Maybe (Bytes,Bytes,Bytes,Bytes) -{-# inline split3 #-} +{- | Split a byte sequence on the first, second, and third occurrences +of the target byte. The target is removed from the result. +For example: + +>>> split3 0xA [0x1,0x2,0xA,0xB,0xA,0xA,0xA] +Just ([0x1,0x2],[0xB],[],[0xA]) +-} +split3 :: Word8 -> Bytes -> Maybe (Bytes, Bytes, Bytes, Bytes) +{-# INLINE split3 #-} split3 w b@(Bytes arr off len) = case elemIndexLoop# w b of (-1#) -> Nothing - i# -> let i = I# i# in - case elemIndexLoop# w (Bytes arr (i + 1) (len - (1 + i - off))) of - (-1#) -> Nothing - j# -> let j = I# j# in - case elemIndexLoop# w (Bytes arr (j + 1) (len - (1 + j - off))) of + i# -> + let i = I# i# + in case elemIndexLoop# w (Bytes arr (i + 1) (len - (1 + i - off))) of (-1#) -> Nothing - k# -> let k = I# k# in Just - ( Bytes arr off (i - off) - , Bytes arr (i + 1) (j - (i + 1)) - , Bytes arr (j + 1) (k - (j + 1)) - , Bytes arr (k + 1) (len - (1 + k - off)) - ) + j# -> + let j = I# j# + in case elemIndexLoop# w (Bytes arr (j + 1) (len - (1 + j - off))) of + (-1#) -> Nothing + k# -> + let k = I# k# + in Just + ( Bytes arr off (i - off) + , Bytes arr (i + 1) (j - (i + 1)) + , Bytes arr (j + 1) (k - (j + 1)) + , Bytes arr (k + 1) (len - (1 + k - off)) + ) --- | Split a byte sequence on the first, second, third, and fourth --- occurrences of the target byte. The target is removed from the result. --- For example: --- --- >>> split4 0xA [0x1,0x2,0xA,0xB,0xA,0xA,0xA] --- Just ([0x1,0x2],[0xB],[],[],[]) -split4 :: Word8 -> Bytes -> Maybe (Bytes,Bytes,Bytes,Bytes,Bytes) -{-# inline split4 #-} +{- | Split a byte sequence on the first, second, third, and fourth +occurrences of the target byte. The target is removed from the result. +For example: + +>>> split4 0xA [0x1,0x2,0xA,0xB,0xA,0xA,0xA] +Just ([0x1,0x2],[0xB],[],[],[]) +-} +split4 :: Word8 -> Bytes -> Maybe (Bytes, Bytes, Bytes, Bytes, Bytes) +{-# INLINE split4 #-} split4 w b@(Bytes arr off len) = case elemIndexLoop# w b of (-1#) -> Nothing - i# -> let i = I# i# in - case elemIndexLoop# w (Bytes arr (i + 1) (len - (1 + i - off))) of - (-1#) -> Nothing - j# -> let j = I# j# in - case elemIndexLoop# w (Bytes arr (j + 1) (len - (1 + j - off))) of + i# -> + let i = I# i# + in case elemIndexLoop# w (Bytes arr (i + 1) (len - (1 + i - off))) of (-1#) -> Nothing - k# -> let k = I# k# in - case elemIndexLoop# w (Bytes arr (k + 1) (len - (1 + k - off))) of - (-1#) -> Nothing - m# -> let m = I# m# in Just - ( Bytes arr off (i - off) - , Bytes arr (i + 1) (j - (i + 1)) - , Bytes arr (j + 1) (k - (j + 1)) - , Bytes arr (k + 1) (m - (k + 1)) - , Bytes arr (m + 1) (len - (1 + m - off)) - ) + j# -> + let j = I# j# + in case elemIndexLoop# w (Bytes arr (j + 1) (len - (1 + j - off))) of + (-1#) -> Nothing + k# -> + let k = I# k# + in case elemIndexLoop# w (Bytes arr (k + 1) (len - (1 + k - off))) of + (-1#) -> Nothing + m# -> + let m = I# m# + in Just + ( Bytes arr off (i - off) + , Bytes arr (i + 1) (j - (i + 1)) + , Bytes arr (j + 1) (k - (j + 1)) + , Bytes arr (k + 1) (m - (k + 1)) + , Bytes arr (m + 1) (len - (1 + m - off)) + ) -- This returns the offset into the byte array. This is not an index -- that will mean anything to the end user, so it cannot be returned @@ -280,30 +313,35 @@ split4 w b@(Bytes arr off len) = case elemIndexLoop# w b of -- Exported for use in other internal modules because it is needed in -- Data.Bytes.Search. elemIndexLoop# :: Word8 -> Bytes -> Int# -{-# inline elemIndexLoop# #-} -elemIndexLoop# !w (Bytes arr off@(I# off# ) len) = case len of +{-# INLINE elemIndexLoop# #-} +elemIndexLoop# !w (Bytes arr off@(I# off#) len) = case len of 0 -> (-1#) - _ -> if PM.indexByteArray arr off == w - then off# - else elemIndexLoop# w (Bytes arr (off + 1) (len - 1)) + _ -> + if PM.indexByteArray arr off == w + then off# + else elemIndexLoop# w (Bytes arr (off + 1) (len - 1)) -- Variant of elemIndexLoop# that starts at the end. Similarly, returns -- negative one if the element is not found. elemIndexLoopBackwards# :: Word8 -> ByteArray -> Int -> Int -> Int# -elemIndexLoopBackwards# !w !arr !start !pos@(I# pos#) = if pos < start - then (-1#) - else if PM.indexByteArray arr pos == w - then pos# - else elemIndexLoopBackwards# w arr start (pos - 1) +elemIndexLoopBackwards# !w !arr !start !pos@(I# pos#) = + if pos < start + then (-1#) + else + if PM.indexByteArray arr pos == w + then pos# + else elemIndexLoopBackwards# w arr start (pos - 1) --- | Split a byte sequence on the last occurrence of the target --- byte. The target is removed from the result. For example: --- --- >>> split1 0xA [0x1,0x2,0xA,0xB,0xA,0xC] --- Just ([0x1,0x2,0xA,0xB],[0xC]) -splitEnd1 :: Word8 -> Bytes -> Maybe (Bytes,Bytes) -{-# inline splitEnd1 #-} +{- | Split a byte sequence on the last occurrence of the target +byte. The target is removed from the result. For example: + +>>> split1 0xA [0x1,0x2,0xA,0xB,0xA,0xC] +Just ([0x1,0x2,0xA,0xB],[0xC]) +-} +splitEnd1 :: Word8 -> Bytes -> Maybe (Bytes, Bytes) +{-# INLINE splitEnd1 #-} splitEnd1 !w (Bytes arr off len) = case elemIndexLoopBackwards# w arr off (off + len - 1) of (-1#) -> Nothing - i# -> let i = I# i# in - Just (Bytes arr off (i - off), Bytes arr (i + 1) (len - (1 + i - off))) + i# -> + let i = I# i# + in Just (Bytes arr off (i - off), Bytes arr (i + 1) (len - (1 + i - off))) diff --git a/src/Data/Bytes/Chunks.hs b/src/Data/Bytes/Chunks.hs index 07c71e1..b544bab 100644 --- a/src/Data/Bytes/Chunks.hs +++ b/src/Data/Bytes/Chunks.hs @@ -1,25 +1,28 @@ -{-# language BangPatterns #-} -{-# language DerivingStrategies #-} -{-# language DuplicateRecordFields #-} -{-# language MagicHash #-} -{-# language NamedFieldPuns #-} -{-# language RankNTypes #-} -{-# language TypeApplications #-} -{-# language TypeFamilies #-} -{-# language UnboxedTuples #-} - --- | Chunks of bytes. This is useful as a target for a builder --- or as a way to read a large amount of whose size is unknown --- in advance. Structurally, this type is similar to --- @Data.ByteString.Lazy.ByteString@. However, the type in this --- module is strict in its spine. Additionally, none of the --- @Handle@ functions perform lazy I\/O. +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE UnboxedTuples #-} + +{- | Chunks of bytes. This is useful as a target for a builder +or as a way to read a large amount of whose size is unknown +in advance. Structurally, this type is similar to +@Data.ByteString.Lazy.ByteString@. However, the type in this +module is strict in its spine. Additionally, none of the +@Handle@ functions perform lazy I\/O. +-} module Data.Bytes.Chunks ( -- * Types - Chunks(..) + Chunks (..) + -- * Properties , length , null + -- * Manipulate , cons , concat @@ -31,18 +34,24 @@ module Data.Bytes.Chunks , reverseOnto , replicate , replicateByte + -- * Folds , foldl' + -- * Splitting , split + -- * Hashing , fnv1a32 , fnv1a64 + -- * Create , fromBytes , fromByteArray + -- * Copy to buffer , unsafeCopy + -- * I\/O with Handles , hGetContents , readFile @@ -50,19 +59,18 @@ module Data.Bytes.Chunks , writeFile ) where -import Prelude hiding (Foldable(..),concat,reverse,readFile,writeFile,replicate) +import Prelude hiding (Foldable (..), concat, readFile, replicate, reverse, writeFile) -import Control.Exception (IOException,catch) +import Control.Exception (IOException, catch) import Control.Monad.ST.Run (runIntByteArrayST) import Data.Bits (xor) import Data.ByteString (ByteString) -import Data.Bytes.Types (Bytes(Bytes)) -import Data.Primitive (ByteArray(..),MutableByteArray(..)) -import Data.Word (Word8,Word32,Word64) -import GHC.Exts (ByteArray#,MutableByteArray#) -import GHC.Exts (Int#,State#,Int(I#),(+#)) -import GHC.ST (ST(..)) -import System.IO (Handle,hFileSize,IOMode(ReadMode,WriteMode),withBinaryFile) +import Data.Bytes.Types (Bytes (Bytes)) +import Data.Primitive (ByteArray (..), MutableByteArray (..)) +import Data.Word (Word32, Word64, Word8) +import GHC.Exts (ByteArray#, Int (I#), Int#, MutableByteArray#, State#, (+#)) +import GHC.ST (ST (..)) +import System.IO (Handle, IOMode (ReadMode, WriteMode), hFileSize, withBinaryFile) import qualified Data.Bytes.Byte as Byte import qualified Data.Bytes.IO as IO @@ -94,48 +102,52 @@ instance Eq Chunks where -- | Add a byte sequence to the beginning. cons :: Bytes -> Chunks -> Chunks -{-# inline cons #-} +{-# INLINE cons #-} cons = ChunksCons --- | Repeat the byte sequence over and over. Returns empty chunks when given --- a negative repetition count. +{- | Repeat the byte sequence over and over. Returns empty chunks when given +a negative repetition count. +-} replicate :: - Bytes - -> Int -- ^ Number of times to repeat the sequence. - -> Chunks + Bytes -> + -- | Number of times to repeat the sequence. + Int -> + Chunks replicate !b@(Bytes _ _ len) !n | n <= 0 = ChunksNil | len == 0 = ChunksNil | otherwise = go n ChunksNil - where + where -- Implementation note: We do not have to reverse the chunks at the end. go i !acc = case i of 0 -> acc _ -> go (i - 1) (ChunksCons b acc) --- | Repeat the byte over and over. This builds a single byte array that --- is at most 64KiB and shares that across every @ChunksCons@ cell. --- --- An as example, creating a 2GiB chunks this way would use 64KiB for the --- byte array, and there would be the additional overhead of the 2^15 --- @ChunksCons@ data constructors. On a 64-bit platform, @ChunksCons@ --- takes 40 bytes, so the total memory consumption would be --- @2^16 + 40 * 2^15@, which is roughly 1.37MB. The same reasoning --- shows that it takes about 83.95MB to represent a 128GiB chunks. --- --- The size of the shared payload is an implementation detail. Do not --- rely on this function producing 64KiB chunks. The implementation might --- one day change to something smarter that minimizes the memory footprint --- for very large chunks. +{- | Repeat the byte over and over. This builds a single byte array that +is at most 64KiB and shares that across every @ChunksCons@ cell. + +An as example, creating a 2GiB chunks this way would use 64KiB for the +byte array, and there would be the additional overhead of the 2^15 +@ChunksCons@ data constructors. On a 64-bit platform, @ChunksCons@ +takes 40 bytes, so the total memory consumption would be +@2^16 + 40 * 2^15@, which is roughly 1.37MB. The same reasoning +shows that it takes about 83.95MB to represent a 128GiB chunks. + +The size of the shared payload is an implementation detail. Do not +rely on this function producing 64KiB chunks. The implementation might +one day change to something smarter that minimizes the memory footprint +for very large chunks. +-} replicateByte :: - Word8 - -> Int -- ^ Number of times to replicate the byte - -> Chunks + Word8 -> + -- | Number of times to replicate the byte + Int -> + Chunks replicateByte !w !n | n <= 0 = ChunksNil | n < 65536 = ChunksCons (Bytes.replicate n w) ChunksNil | otherwise = go (Bytes.replicateU 65536 w) n ChunksNil - where + where go !shared !remaining !acc | remaining == 0 = acc | remaining < 65536 = ChunksCons (Bytes shared 0 remaining) acc @@ -143,14 +155,16 @@ replicateByte !w !n -- | Are there any bytes in the chunked byte sequences? null :: Chunks -> Bool -null = go where +null = go + where go ChunksNil = True go (ChunksCons (Bytes _ _ len) xs) = case len of 0 -> go xs _ -> False --- | Variant of 'concat' that ensure that the resulting byte --- sequence is pinned memory. +{- | Variant of 'concat' that ensure that the resulting byte +sequence is pinned memory. +-} concatPinned :: Chunks -> Bytes concatPinned x = case x of ChunksNil -> Bytes.emptyPinned @@ -197,15 +211,17 @@ concatPinnedFollowing2 :: Bytes -> Bytes -> Chunks -> (# Int#, ByteArray# #) concatPinnedFollowing2 = internalConcatFollowing2 PM.newPinnedByteArray internalConcatFollowing2 :: - (forall s. Int -> ST s (MutableByteArray s)) - -> Bytes - -> Bytes - -> Chunks - -> (# Int#, ByteArray# #) -{-# inline internalConcatFollowing2 #-} -internalConcatFollowing2 allocate - (Bytes{array=c,offset=coff,length=szc}) - (Bytes{array=d,offset=doff,length=szd}) ds = + (forall s. Int -> ST s (MutableByteArray s)) -> + Bytes -> + Bytes -> + Chunks -> + (# Int#, ByteArray# #) +{-# INLINE internalConcatFollowing2 #-} +internalConcatFollowing2 + allocate + (Bytes {array = c, offset = coff, length = szc}) + (Bytes {array = d, offset = doff, length = szd}) + ds = let !(I# x, ByteArray y) = runIntByteArrayST $ do let !szboth = szc + szd !len = chunksLengthGo szboth ds @@ -215,7 +231,7 @@ internalConcatFollowing2 allocate -- Note: len2 will always be the same as len. !len2 <- unsafeCopy dst szboth ds result <- PM.unsafeFreezeByteArray dst - pure (len2,result) + pure (len2, result) in (# x, y #) -- | The total number of bytes in all the chunks. @@ -224,26 +240,32 @@ length = chunksLengthGo 0 chunksLengthGo :: Int -> Chunks -> Int chunksLengthGo !n ChunksNil = n -chunksLengthGo !n (ChunksCons (Bytes{B.length=len}) cs) = +chunksLengthGo !n (ChunksCons (Bytes {B.length = len}) cs) = chunksLengthGo (n + len) cs --- | Copy the contents of the chunks into a mutable array. --- Precondition: The destination must have enough space to --- house the contents. This is not checked. +{- | Copy the contents of the chunks into a mutable array. +Precondition: The destination must have enough space to +house the contents. This is not checked. +-} unsafeCopy :: - MutableByteArray s -- ^ Destination - -> Int -- ^ Destination offset - -> Chunks -- ^ Source - -> ST s Int -- ^ Returns the next index into the destination after the payload -{-# inline unsafeCopy #-} -unsafeCopy (MutableByteArray dst) (I# off) cs = ST - (\s0 -> case copy# dst off cs s0 of - (# s1, nextOff #) -> (# s1, I# nextOff #) - ) + -- | Destination + MutableByteArray s -> + -- | Destination offset + Int -> + -- | Source + Chunks -> + -- | Returns the next index into the destination after the payload + ST s Int +{-# INLINE unsafeCopy #-} +unsafeCopy (MutableByteArray dst) (I# off) cs = + ST + ( \s0 -> case copy# dst off cs s0 of + (# s1, nextOff #) -> (# s1, I# nextOff #) + ) copy# :: MutableByteArray# s -> Int# -> Chunks -> State# s -> (# State# s, Int# #) copy# _ off ChunksNil s0 = (# s0, off #) -copy# marr off (ChunksCons (Bytes{B.array,B.offset,B.length=len}) cs) s0 = +copy# marr off (ChunksCons (Bytes {B.array, B.offset, B.length = len}) cs) s0 = case Exts.copyByteArray# (unBa array) (unI offset) marr off (unI len) s0 of s1 -> copy# marr (off +# unI len) cs s1 @@ -251,20 +273,21 @@ copy# marr off (ChunksCons (Bytes{B.array,B.offset,B.length=len}) cs) s0 = reverse :: Chunks -> Chunks reverse = reverseOnto ChunksNil --- | Variant of 'reverse' that allows the caller to provide --- an initial list of chunks that the reversed chunks will --- be pushed onto. +{- | Variant of 'reverse' that allows the caller to provide +an initial list of chunks that the reversed chunks will +be pushed onto. +-} reverseOnto :: Chunks -> Chunks -> Chunks reverseOnto !x ChunksNil = x reverseOnto !x (ChunksCons y ys) = reverseOnto (ChunksCons y x) ys unI :: Int -> Int# -{-# inline unI #-} +{-# INLINE unI #-} unI (I# i) = i unBa :: ByteArray -> ByteArray# -{-# inline unBa #-} +{-# INLINE unBa #-} unBa (ByteArray x) = x -- | Read a handle's entire contents strictly into chunks. @@ -281,10 +304,11 @@ hGetContentsHint !hint !h = do else hGetContentsCommon r h hGetContentsCommon :: - Chunks -- reversed chunks - -> Handle - -> IO Chunks -hGetContentsCommon !acc0 !h = go acc0 where + Chunks -> -- reversed chunks + Handle -> + IO Chunks +hGetContentsCommon !acc0 !h = go acc0 + where go !acc = do c <- IO.hGet h chunkSize let !r = ChunksCons c acc @@ -292,9 +316,10 @@ hGetContentsCommon !acc0 !h = go acc0 where then go r else pure $! reverse r --- | Read an entire file strictly into chunks. If reading from a --- regular file, this makes an effort read the file into a single --- chunk. +{- | Read an entire file strictly into chunks. If reading from a +regular file, this makes an effort read the file into a single +chunk. +-} readFile :: FilePath -> IO Chunks readFile f = withBinaryFile f ReadMode $ \h -> do -- Implementation copied from bytestring. @@ -303,13 +328,14 @@ readFile f = withBinaryFile f ReadMode $ \h -> do filesz <- catch (hFileSize h) useZeroIfNotRegularFile let hint = (fromIntegral filesz `max` 255) + 1 hGetContentsHint hint h + where -- Our initial size is one bigger than the file size so that in the -- typical case we will read the whole file in one go and not have -- to allocate any more chunks. We'll still do the right thing if the -- file size is 0 or is changed before we do the read. - where - useZeroIfNotRegularFile :: IOException -> IO Integer - useZeroIfNotRegularFile _ = return 0 + + useZeroIfNotRegularFile :: IOException -> IO Integer + useZeroIfNotRegularFile _ = return 0 chunkSize :: Int chunkSize = 16384 - 16 @@ -324,63 +350,77 @@ fromByteArray !b = fromBytes (Bytes.fromByteArray b) -- | Left fold over all bytes in the chunks, strict in the accumulator. foldl' :: (a -> Word8 -> a) -> a -> Chunks -> a -{-# inline foldl' #-} -foldl' g = go where +{-# INLINE foldl' #-} +foldl' g = go + where go !a ChunksNil = a go !a (ChunksCons c cs) = go (Bytes.foldl' g a c) cs -- | Hash byte sequence with 32-bit variant of FNV-1a. fnv1a32 :: Chunks -> Word32 -fnv1a32 !b = foldl' - (\acc w -> (fromIntegral @Word8 @Word32 w `xor` acc) * 0x01000193 - ) 0x811c9dc5 b +fnv1a32 !b = + foldl' + ( \acc w -> (fromIntegral @Word8 @Word32 w `xor` acc) * 0x01000193 + ) + 0x811c9dc5 + b -- | Hash byte sequence with 64-bit variant of FNV-1a. fnv1a64 :: Chunks -> Word64 -fnv1a64 !b = foldl' - (\acc w -> (fromIntegral @Word8 @Word64 w `xor` acc) * 0x00000100000001B3 - ) 0xcbf29ce484222325 b - --- | Outputs 'Chunks' to the specified 'Handle'. This is implemented --- with 'IO.hPut'. +fnv1a64 !b = + foldl' + ( \acc w -> (fromIntegral @Word8 @Word64 w `xor` acc) * 0x00000100000001B3 + ) + 0xcbf29ce484222325 + b + +{- | Outputs 'Chunks' to the specified 'Handle'. This is implemented +with 'IO.hPut'. +-} hPut :: Handle -> Chunks -> IO () -hPut h = go where +hPut h = go + where go ChunksNil = pure () go (ChunksCons c cs) = IO.hPut h c *> go cs --- | Write 'Chunks' to a file, replacing the previous contents of --- the file. +{- | Write 'Chunks' to a file, replacing the previous contents of +the file. +-} writeFile :: FilePath -> Chunks -> IO () writeFile path cs = withBinaryFile path WriteMode (\h -> hPut h cs) --- | Break chunks of bytes into contiguous pieces separated by the --- byte argument. This is a good producer for list fusion. For this --- function to perform well, each chunk should contain multiple separators. --- Any piece that spans multiple chunks must be copied. +{- | Break chunks of bytes into contiguous pieces separated by the +byte argument. This is a good producer for list fusion. For this +function to perform well, each chunk should contain multiple separators. +Any piece that spans multiple chunks must be copied. +-} split :: Word8 -> Chunks -> [Bytes] -{-# inline split #-} -split !w !cs0 = Exts.build - (\g x0 -> - -- It is possible to optimize for the common case where a - -- piece does not span multiple chunks. However, such an - -- optimization would actually cause this to tail call in - -- two places rather than one and may actually adversely - -- affect performance. It hasn't been benchmarked. - let go !cs = case splitOnto ChunksNil w cs of - (hd,tl) -> let !x = concat (reverse hd) in - case tl of - ChunksNil -> x0 - _ -> g x (go tl) - in go cs0 - ) - -splitOnto :: Chunks -> Word8 -> Chunks -> (Chunks,Chunks) -{-# inline splitOnto #-} -splitOnto !acc0 !w !cs0 = go acc0 cs0 where - go !acc ChunksNil = (acc,ChunksNil) +{-# INLINE split #-} +split !w !cs0 = + Exts.build + ( \g x0 -> + -- It is possible to optimize for the common case where a + -- piece does not span multiple chunks. However, such an + -- optimization would actually cause this to tail call in + -- two places rather than one and may actually adversely + -- affect performance. It hasn't been benchmarked. + let go !cs = case splitOnto ChunksNil w cs of + (hd, tl) -> + let !x = concat (reverse hd) + in case tl of + ChunksNil -> x0 + _ -> g x (go tl) + in go cs0 + ) + +splitOnto :: Chunks -> Word8 -> Chunks -> (Chunks, Chunks) +{-# INLINE splitOnto #-} +splitOnto !acc0 !w !cs0 = go acc0 cs0 + where + go !acc ChunksNil = (acc, ChunksNil) go !acc (ChunksCons b bs) = case Byte.split1 w b of Nothing -> go (ChunksCons b acc) bs - Just (hd,tl) -> + Just (hd, tl) -> let !r1 = ChunksCons hd acc !r2 = ChunksCons tl bs - in (r1,r2) + in (r1, r2) diff --git a/src/Data/Bytes/Encode/BigEndian.hs b/src/Data/Bytes/Encode/BigEndian.hs index 39a1430..2b4ba8c 100644 --- a/src/Data/Bytes/Encode/BigEndian.hs +++ b/src/Data/Bytes/Encode/BigEndian.hs @@ -1,5 +1,5 @@ -{-# language BangPatterns #-} -{-# language TypeApplications #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE TypeApplications #-} module Data.Bytes.Encode.BigEndian ( word16 @@ -13,17 +13,17 @@ module Data.Bytes.Encode.BigEndian import Control.Monad.ST.Run (runByteArrayST) import Data.Bits (unsafeShiftR) import Data.Bytes.Types (Bytes) -import Data.Int (Int16,Int32,Int64) +import Data.Int (Int16, Int32, Int64) import Data.Primitive (ByteArray) -import Data.Word (Word8,Word16,Word32,Word64) +import Data.Word (Word16, Word32, Word64, Word8) import qualified Data.Bytes.Pure as Pure import qualified Data.Primitive as PM -- | Encode a 32-bit signed integer as 4 bytes. int32 :: Int32 -> Bytes -{-# inline int32 #-} -int32 = word32 . fromIntegral @Int32 @Word32 +{-# INLINE int32 #-} +int32 = word32 . fromIntegral @Int32 @Word32 -- | Encode a 32-bit unsigned integer as 4 bytes. word32 :: Word32 -> Bytes @@ -40,8 +40,8 @@ word32U !w = runByteArrayST $ do -- | Encode a 16-bit signed integer as 4 bytes. int16 :: Int16 -> Bytes -{-# inline int16 #-} -int16 = word16 . fromIntegral @Int16 @Word16 +{-# INLINE int16 #-} +int16 = word16 . fromIntegral @Int16 @Word16 -- | Encode a 16-bit unsigned integer as 4 bytes. word16 :: Word16 -> Bytes @@ -56,8 +56,8 @@ word16U !w = runByteArrayST $ do -- | Encode a 16-bit signed integer as 4 bytes. int64 :: Int64 -> Bytes -{-# inline int64 #-} -int64 = word64 . fromIntegral @Int64 @Word64 +{-# INLINE int64 #-} +int64 = word64 . fromIntegral @Int64 @Word64 -- | Encode a 16-bit unsigned integer as 4 bytes. word64 :: Word64 -> Bytes diff --git a/src/Data/Bytes/Encode/LittleEndian.hs b/src/Data/Bytes/Encode/LittleEndian.hs index 5451468..1fa33a6 100644 --- a/src/Data/Bytes/Encode/LittleEndian.hs +++ b/src/Data/Bytes/Encode/LittleEndian.hs @@ -1,5 +1,5 @@ -{-# language BangPatterns #-} -{-# language TypeApplications #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE TypeApplications #-} module Data.Bytes.Encode.LittleEndian ( word16 @@ -13,17 +13,17 @@ module Data.Bytes.Encode.LittleEndian import Control.Monad.ST.Run (runByteArrayST) import Data.Bits (unsafeShiftR) import Data.Bytes.Types (Bytes) -import Data.Int (Int16,Int32,Int64) +import Data.Int (Int16, Int32, Int64) import Data.Primitive (ByteArray) -import Data.Word (Word8,Word16,Word32,Word64) +import Data.Word (Word16, Word32, Word64, Word8) import qualified Data.Bytes.Pure as Pure import qualified Data.Primitive as PM -- | Encode a 32-bit signed integer as 4 bytes. int32 :: Int32 -> Bytes -{-# inline int32 #-} -int32 = word32 . fromIntegral @Int32 @Word32 +{-# INLINE int32 #-} +int32 = word32 . fromIntegral @Int32 @Word32 -- | Encode a 32-bit unsigned integer as 4 bytes. word32 :: Word32 -> Bytes @@ -40,8 +40,8 @@ word32U !w = runByteArrayST $ do -- | Encode a 16-bit signed integer as 4 bytes. int16 :: Int16 -> Bytes -{-# inline int16 #-} -int16 = word16 . fromIntegral @Int16 @Word16 +{-# INLINE int16 #-} +int16 = word16 . fromIntegral @Int16 @Word16 -- | Encode a 16-bit unsigned integer as 4 bytes. word16 :: Word16 -> Bytes @@ -56,8 +56,8 @@ word16U !w = runByteArrayST $ do -- | Encode a 16-bit signed integer as 4 bytes. int64 :: Int64 -> Bytes -{-# inline int64 #-} -int64 = word64 . fromIntegral @Int64 @Word64 +{-# INLINE int64 #-} +int64 = word64 . fromIntegral @Int64 @Word64 -- | Encode a 16-bit unsigned integer as 4 bytes. word64 :: Word64 -> Bytes diff --git a/src/Data/Bytes/IO.hs b/src/Data/Bytes/IO.hs index 37457eb..ba095a0 100644 --- a/src/Data/Bytes/IO.hs +++ b/src/Data/Bytes/IO.hs @@ -1,33 +1,33 @@ -{-# language BangPatterns #-} -{-# language BlockArguments #-} -{-# language MagicHash #-} -{-# language NamedFieldPuns #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BlockArguments #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedTuples #-} module Data.Bytes.IO ( hGet , hPut ) where -import Data.Primitive (MutableByteArray,ByteArray(..)) +import Data.Bytes.Pure (contents, pin) +import Data.Bytes.Types (Bytes (Bytes)) +import Data.Primitive (ByteArray (..), MutableByteArray) +import qualified Data.Primitive as PM import Data.Word (Word8) -import Data.Bytes.Types (Bytes(Bytes)) -import Data.Bytes.Pure (pin,contents) -import System.IO (Handle) import Foreign.Ptr (Ptr) -import GHC.IO (IO(IO)) -import qualified System.IO as IO import qualified GHC.Exts as Exts -import qualified Data.Primitive as PM +import GHC.IO (IO (IO)) +import System.IO (Handle) +import qualified System.IO as IO --- | Read 'Bytes' directly from the specified 'Handle'. The resulting --- 'Bytes' are pinned. This is implemented with 'IO.hGetBuf'. +{- | Read 'Bytes' directly from the specified 'Handle'. The resulting +'Bytes' are pinned. This is implemented with 'IO.hGetBuf'. +-} hGet :: Handle -> Int -> IO Bytes hGet h i = createPinnedAndTrim i (\p -> IO.hGetBuf h p i) --- | Outputs 'Bytes' to the specified 'Handle'. This is implemented --- with 'IO.hPutBuf'. +{- | Outputs 'Bytes' to the specified 'Handle'. This is implemented +with 'IO.hPutBuf'. +-} hPut :: Handle -> Bytes -> IO () hPut h b0 = do let b1@(Bytes arr _ len) = pin b0 @@ -36,7 +36,7 @@ hPut h b0 = do -- Only used internally. createPinnedAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO Bytes -{-# inline createPinnedAndTrim #-} +{-# INLINE createPinnedAndTrim #-} createPinnedAndTrim maxSz f = do arr@(PM.MutableByteArray arr#) <- PM.newPinnedByteArray maxSz sz <- f (PM.mutableByteArrayContents arr) diff --git a/src/Data/Bytes/Internal.hs b/src/Data/Bytes/Internal.hs index 60d342e..df2de9c 100644 --- a/src/Data/Bytes/Internal.hs +++ b/src/Data/Bytes/Internal.hs @@ -1,25 +1,24 @@ -{-# language BangPatterns #-} -{-# language DataKinds #-} -{-# language MagicHash #-} -{-# language TypeFamilies #-} -{-# language DuplicateRecordFields #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE TypeFamilies #-} -- This needs to be in its own module to prevent a cyclic dependency -- between UnliftedBytes and Data.Bytes.Types module Data.Bytes.Internal - ( Bytes(..) + ( Bytes (..) ) where import Control.Monad.ST (runST) import Control.Monad.ST.Run (runByteArrayST) -import Data.Primitive (ByteArray(..)) -import Data.Word (Word8) -import GHC.Exts (Int(I#),unsafeCoerce#,sameMutableByteArray#) -import GHC.Exts (isTrue#,compareByteArrays#,IsList(..)) import Data.Bytes.Internal.Show (showsSlice) +import Data.Primitive (ByteArray (..)) +import Data.Word (Word8) +import GHC.Exts (Int (I#), IsList (..), compareByteArrays#, isTrue#, sameMutableByteArray#, unsafeCoerce#) -import qualified Data.List as L import qualified Data.Foldable as F +import qualified Data.List as L import qualified Data.Primitive as PM -- | A slice of a 'ByteArray'. @@ -36,9 +35,10 @@ instance IsList Bytes where toList (Bytes arr off len) = toListLoop off len arr toListLoop :: Int -> Int -> ByteArray -> [Word8] -toListLoop !off !len !arr = if len > 0 - then PM.indexByteArray arr off : toListLoop (off + 1) (len - 1) arr - else [] +toListLoop !off !len !arr = + if len > 0 + then PM.indexByteArray arr off : toListLoop (off + 1) (len - 1) arr + else [] instance Show Bytes where showsPrec _ (Bytes arr off len) s = showsSlice arr off len s @@ -69,15 +69,18 @@ instance Monoid Bytes where mconcat [] = mempty mconcat [x] = x mconcat bs = Bytes r 0 fullLen - where + where !fullLen = L.foldl' (\acc (Bytes _ _ len) -> acc + len) 0 bs r = runByteArrayST $ do marr <- PM.newByteArray fullLen - !_ <- F.foldlM - (\ !currLen (Bytes arr off len) -> do - PM.copyByteArray marr currLen arr off len - pure (currLen + len) - ) 0 bs + !_ <- + F.foldlM + ( \ !currLen (Bytes arr off len) -> do + PM.copyByteArray marr currLen arr off len + pure (currLen + len) + ) + 0 + bs PM.unsafeFreezeByteArray marr compareByteArrays :: ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering @@ -89,4 +92,3 @@ sameByteArray :: ByteArray -> ByteArray -> Bool {-# INLINE sameByteArray #-} sameByteArray (ByteArray ba1#) (ByteArray ba2#) = isTrue# (sameMutableByteArray# (unsafeCoerce# ba1#) (unsafeCoerce# ba2#)) - diff --git a/src/Data/Bytes/Internal/Show.hs b/src/Data/Bytes/Internal/Show.hs index 6e7c7db..29041ab 100644 --- a/src/Data/Bytes/Internal/Show.hs +++ b/src/Data/Bytes/Internal/Show.hs @@ -1,10 +1,10 @@ -{-# language BangPatterns #-} +{-# LANGUAGE BangPatterns #-} module Data.Bytes.Internal.Show ( showsSlice ) where -import Data.Bits ((.&.),unsafeShiftR) +import Data.Bits (unsafeShiftR, (.&.)) import Data.Char (ord) import Data.Primitive (ByteArray) import Data.Word (Word8) @@ -13,23 +13,27 @@ import GHC.Base (unsafeChr) import qualified Data.Primitive as PM showsSlice :: ByteArray -> Int -> Int -> String -> String -showsSlice arr off len s = if len == 0 - then showString "[]" s - else showString "[0x" - $ showHexDigitsWord8 (PM.indexByteArray arr off) - $ showHexLoop (off + 1) (len - 1) arr - $ showChar ']' - $ s +showsSlice arr off len s = + if len == 0 + then showString "[]" s + else + showString "[0x" $ + showHexDigitsWord8 (PM.indexByteArray arr off) $ + showHexLoop (off + 1) (len - 1) arr $ + showChar ']' $ + s showHexLoop :: Int -> Int -> ByteArray -> String -> String -showHexLoop !ix !len !arr s = if len > 0 - then ',':'0':'x':showHexDigitsWord8 (PM.indexByteArray arr ix) (showHexLoop (ix + 1) (len - 1) arr s) - else s +showHexLoop !ix !len !arr s = + if len > 0 + then ',' : '0' : 'x' : showHexDigitsWord8 (PM.indexByteArray arr ix) (showHexLoop (ix + 1) (len - 1) arr s) + else s showHexDigitsWord8 :: Word8 -> String -> String showHexDigitsWord8 !w s = word4ToChar (unsafeShiftR w 4) : word4ToChar (0x0F .&. w) : s word4ToChar :: Word8 -> Char -word4ToChar w = if w < 10 - then unsafeChr (ord '0' + fromIntegral w) - else unsafeChr (ord 'a' + (fromIntegral w) - 10) +word4ToChar w = + if w < 10 + then unsafeChr (ord '0' + fromIntegral w) + else unsafeChr (ord 'a' + (fromIntegral w) - 10) diff --git a/src/Data/Bytes/Mutable.hs b/src/Data/Bytes/Mutable.hs index abce95c..2222f75 100644 --- a/src/Data/Bytes/Mutable.hs +++ b/src/Data/Bytes/Mutable.hs @@ -1,76 +1,87 @@ -{-# language BangPatterns #-} -{-# language LambdaCase #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE LambdaCase #-} --- | If you are interested in sub-arrays of 'MutableByteArray's (e.g. writing --- quicksort), it would be grossly inefficient to make a copy of the sub-array. --- On the other hand, it'd be really annoying to track limit indices by hand. --- --- This module defines the 'MutableBytes' type which exposes a standard array --- interface for a sub-arrays without copying and without manual index --- manipulation. For immutable arrays, see 'Data.Bytes'. +{- | If you are interested in sub-arrays of 'MutableByteArray's (e.g. writing +quicksort), it would be grossly inefficient to make a copy of the sub-array. +On the other hand, it'd be really annoying to track limit indices by hand. + +This module defines the 'MutableBytes' type which exposes a standard array +interface for a sub-arrays without copying and without manual index +manipulation. For immutable arrays, see 'Data.Bytes'. +-} module Data.Bytes.Mutable ( -- * Types MutableBytes + -- * Filtering , takeWhile , dropWhile + -- * Unsafe Slicing , unsafeTake , unsafeDrop + -- * Conversion , fromMutableByteArray ) where -import Prelude hiding (takeWhile,dropWhile) +import Prelude hiding (dropWhile, takeWhile) -import Data.Bytes.Types (MutableBytes(MutableBytes)) +import Control.Monad.Primitive (PrimMonad, PrimState) +import Data.Bytes.Types (MutableBytes (MutableBytes)) import Data.Primitive (MutableByteArray) import Data.Word (Word8) -import Control.Monad.Primitive (PrimMonad,PrimState) import qualified Data.Primitive as PM --- | Take bytes while the predicate is true, aliasing the --- argument array. -takeWhile :: PrimMonad m - => (Word8 -> m Bool) - -> MutableBytes (PrimState m) - -> m (MutableBytes (PrimState m)) -{-# inline takeWhile #-} +{- | Take bytes while the predicate is true, aliasing the +argument array. +-} +takeWhile :: + (PrimMonad m) => + (Word8 -> m Bool) -> + MutableBytes (PrimState m) -> + m (MutableBytes (PrimState m)) +{-# INLINE takeWhile #-} takeWhile k b = do n <- countWhile k b pure (unsafeTake n b) --- | Drop bytes while the predicate is true, aliasing the --- argument array. -dropWhile :: PrimMonad m - => (Word8 -> m Bool) - -> MutableBytes (PrimState m) - -> m (MutableBytes (PrimState m)) -{-# inline dropWhile #-} +{- | Drop bytes while the predicate is true, aliasing the +argument array. +-} +dropWhile :: + (PrimMonad m) => + (Word8 -> m Bool) -> + MutableBytes (PrimState m) -> + m (MutableBytes (PrimState m)) +{-# INLINE dropWhile #-} dropWhile k b = do n <- countWhile k b pure (unsafeDrop n b) -- | Take the first @n@ bytes from the argument, aliasing it. unsafeTake :: Int -> MutableBytes s -> MutableBytes s -{-# inline unsafeTake #-} +{-# INLINE unsafeTake #-} unsafeTake n (MutableBytes arr off _) = MutableBytes arr off n --- | Drop the first @n@ bytes from the argument, aliasing it. --- The new length will be @len - n@. +{- | Drop the first @n@ bytes from the argument, aliasing it. +The new length will be @len - n@. +-} unsafeDrop :: Int -> MutableBytes s -> MutableBytes s -{-# inline unsafeDrop #-} +{-# INLINE unsafeDrop #-} unsafeDrop n (MutableBytes arr off len) = MutableBytes arr (off + n) (len - n) --- | Create a slice of 'MutableBytes' that spans the entire --- argument array. This aliases the argument. -fromMutableByteArray :: PrimMonad m - => MutableByteArray (PrimState m) - -> m (MutableBytes (PrimState m)) -{-# inline fromMutableByteArray #-} +{- | Create a slice of 'MutableBytes' that spans the entire +argument array. This aliases the argument. +-} +fromMutableByteArray :: + (PrimMonad m) => + MutableByteArray (PrimState m) -> + m (MutableBytes (PrimState m)) +{-# INLINE fromMutableByteArray #-} fromMutableByteArray mba = do sz <- PM.getSizeofMutableByteArray mba pure (MutableBytes mba 0 sz) @@ -79,14 +90,18 @@ fromMutableByteArray mba = do -- predicate until the first non-match occurs. If all bytes -- match the predicate, this will return the length originally -- provided. -countWhile :: PrimMonad m - => (Word8 -> m Bool) - -> MutableBytes (PrimState m) - -> m Int -{-# inline countWhile #-} -countWhile k (MutableBytes arr off0 len0) = go off0 len0 0 where - go !off !len !n = if len > 0 - then (k =<< PM.readByteArray arr off) >>= \case - True -> go (off + 1) (len - 1) (n + 1) - False -> pure n - else pure n +countWhile :: + (PrimMonad m) => + (Word8 -> m Bool) -> + MutableBytes (PrimState m) -> + m Int +{-# INLINE countWhile #-} +countWhile k (MutableBytes arr off0 len0) = go off0 len0 0 + where + go !off !len !n = + if len > 0 + then + (k =<< PM.readByteArray arr off) >>= \case + True -> go (off + 1) (len - 1) (n + 1) + False -> pure n + else pure n diff --git a/src/Data/Bytes/Pure.hs b/src/Data/Bytes/Pure.hs index be8082c..cd5060b 100644 --- a/src/Data/Bytes/Pure.hs +++ b/src/Data/Bytes/Pure.hs @@ -1,9 +1,7 @@ -{-# language BangPatterns #-} -{-# language BlockArguments #-} -{-# language MagicHash #-} -{-# language NamedFieldPuns #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BlockArguments #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE TypeApplications #-} module Data.Bytes.Pure ( empty @@ -44,17 +42,17 @@ module Data.Bytes.Pure , replicateU ) where -import Prelude hiding (Foldable(..),map,replicate) +import Prelude hiding (Foldable (..), map, replicate) -import Control.Monad.Primitive (PrimState,PrimMonad) +import Control.Monad.Primitive (PrimMonad, PrimState) import Control.Monad.ST.Run (runByteArrayST) import Data.Bits (xor) -import Data.Bytes.Types (Bytes(Bytes)) import Data.ByteString (ByteString) -import Data.ByteString.Short.Internal (ShortByteString(SBS)) -import Data.Primitive (ByteArray(ByteArray),MutableByteArray,PrimArray(PrimArray)) -import Data.Word (Word64,Word32,Word8) -import Foreign.Ptr (Ptr,plusPtr) +import Data.ByteString.Short.Internal (ShortByteString (SBS)) +import Data.Bytes.Types (Bytes (Bytes)) +import Data.Primitive (ByteArray (ByteArray), MutableByteArray, PrimArray (PrimArray)) +import Data.Word (Word32, Word64, Word8) +import Foreign.Ptr (Ptr, plusPtr) import GHC.IO (unsafeIOToST) import qualified Data.ByteString as ByteString @@ -72,110 +70,133 @@ empty = Bytes mempty 0 0 -- | The empty pinned byte sequence. emptyPinned :: Bytes -emptyPinned = Bytes - ( runByteArrayST - (PM.newPinnedByteArray 0 >>= PM.unsafeFreezeByteArray) - ) 0 0 +emptyPinned = + Bytes + ( runByteArrayST + (PM.newPinnedByteArray 0 >>= PM.unsafeFreezeByteArray) + ) + 0 + 0 -- | The empty pinned byte sequence. emptyPinnedU :: ByteArray -emptyPinnedU = runByteArrayST - (PM.newPinnedByteArray 0 >>= PM.unsafeFreezeByteArray) +emptyPinnedU = + runByteArrayST + (PM.newPinnedByteArray 0 >>= PM.unsafeFreezeByteArray) --- | Yields a pinned byte sequence whose contents are identical to those --- of the original byte sequence. If the @ByteArray@ backing the argument --- was already pinned, this simply aliases the argument and does not perform --- any copying. +{- | Yields a pinned byte sequence whose contents are identical to those +of the original byte sequence. If the @ByteArray@ backing the argument +was already pinned, this simply aliases the argument and does not perform +any copying. +-} pin :: Bytes -> Bytes pin b@(Bytes arr _ len) = case PM.isByteArrayPinned arr of True -> b - False -> Bytes - ( runByteArrayST do - dst <- PM.newPinnedByteArray len - unsafeCopy dst 0 b - PM.unsafeFreezeByteArray dst - ) 0 len - --- | Convert the sliced 'Bytes' to an unsliced 'ByteArray'. This --- reuses the array backing the sliced 'Bytes' if the slicing metadata --- implies that all of the bytes are used. Otherwise, it makes a copy. + False -> + Bytes + ( runByteArrayST do + dst <- PM.newPinnedByteArray len + unsafeCopy dst 0 b + PM.unsafeFreezeByteArray dst + ) + 0 + len + +{- | Convert the sliced 'Bytes' to an unsliced 'ByteArray'. This +reuses the array backing the sliced 'Bytes' if the slicing metadata +implies that all of the bytes are used. Otherwise, it makes a copy. +-} toByteArray :: Bytes -> ByteArray -{-# inline toByteArray #-} +{-# INLINE toByteArray #-} toByteArray b@(Bytes arr off len) | off == 0, PM.sizeofByteArray arr == len = arr | otherwise = toByteArrayClone b --- | Variant of 'toByteArray' that unconditionally makes a copy of --- the array backing the sliced 'Bytes' even if the original array --- could be reused. Prefer 'toByteArray'. +{- | Variant of 'toByteArray' that unconditionally makes a copy of +the array backing the sliced 'Bytes' even if the original array +could be reused. Prefer 'toByteArray'. +-} toByteArrayClone :: Bytes -> ByteArray -{-# inline toByteArrayClone #-} +{-# INLINE toByteArrayClone #-} toByteArrayClone (Bytes arr off len) = runByteArrayST $ do m <- PM.newByteArray len PM.copyByteArray m 0 arr off len PM.unsafeFreezeByteArray m --- | Copy the byte sequence into a mutable buffer. The buffer must have --- enough space to accomodate the byte sequence, but this this is not --- checked. -unsafeCopy :: PrimMonad m - => MutableByteArray (PrimState m) -- ^ Destination - -> Int -- ^ Destination Offset - -> Bytes -- ^ Source - -> m () -{-# inline unsafeCopy #-} +{- | Copy the byte sequence into a mutable buffer. The buffer must have +enough space to accomodate the byte sequence, but this this is not +checked. +-} +unsafeCopy :: + (PrimMonad m) => + -- | Destination + MutableByteArray (PrimState m) -> + -- | Destination Offset + Int -> + -- | Source + Bytes -> + m () +{-# INLINE unsafeCopy #-} unsafeCopy dst dstIx (Bytes src srcIx len) = PM.copyByteArray dst dstIx src srcIx len -- | Create a slice of 'Bytes' that spans the entire argument array. fromByteArray :: ByteArray -> Bytes -{-# inline fromByteArray #-} +{-# INLINE fromByteArray #-} fromByteArray b = Bytes b 0 (PM.sizeofByteArray b) -- | Create a slice of 'Bytes' that spans the entire 'PrimArray' of 8-bit words. fromPrimArray :: PrimArray Word8 -> Bytes -{-# inline fromPrimArray #-} +{-# INLINE fromPrimArray #-} fromPrimArray p@(PrimArray b) = Bytes (ByteArray b) 0 (PM.sizeofPrimArray p) -- | The length of a slice of bytes. length :: Bytes -> Int -{-# inline length #-} +{-# INLINE length #-} length (Bytes _ _ len) = len -- | Hash byte sequence with 32-bit variant of FNV-1a. fnv1a32 :: Bytes -> Word32 -fnv1a32 !b = foldl' - (\acc w -> (fromIntegral @Word8 @Word32 w `xor` acc) * 0x01000193 - ) 0x811c9dc5 b +fnv1a32 !b = + foldl' + ( \acc w -> (fromIntegral @Word8 @Word32 w `xor` acc) * 0x01000193 + ) + 0x811c9dc5 + b -- | Hash byte sequence with 64-bit variant of FNV-1a. fnv1a64 :: Bytes -> Word64 -fnv1a64 !b = foldl' - (\acc w -> (fromIntegral @Word8 @Word64 w `xor` acc) * 0x00000100000001B3 - ) 0xcbf29ce484222325 b +fnv1a64 !b = + foldl' + ( \acc w -> (fromIntegral @Word8 @Word64 w `xor` acc) * 0x00000100000001B3 + ) + 0xcbf29ce484222325 + b -- | Left fold over bytes, non-strict in the accumulator. foldl :: (a -> Word8 -> a) -> a -> Bytes -> a -{-# inline foldl #-} +{-# INLINE foldl #-} foldl f a0 (Bytes arr off0 len0) = - go (off0 + len0 - 1) (len0 - 1) - where + go (off0 + len0 - 1) (len0 - 1) + where go !off !ix = case ix of (-1) -> a0 _ -> f (go (off - 1) (ix - 1)) (PM.indexByteArray arr off) -- | Left fold over bytes, strict in the accumulator. foldl' :: (a -> Word8 -> a) -> a -> Bytes -> a -{-# inline foldl' #-} -foldl' f a0 (Bytes arr off0 len0) = go a0 off0 len0 where +{-# INLINE foldl' #-} +foldl' f a0 (Bytes arr off0 len0) = go a0 off0 len0 + where go !a !off !len = case len of 0 -> a _ -> go (f a (PM.indexByteArray arr off)) (off + 1) (len - 1) -- | Left monadic fold over bytes, non-strict in the accumulator. -foldlM :: Monad m => (a -> Word8 -> m a) -> a -> Bytes -> m a -{-# inline foldlM #-} -foldlM f a0 (Bytes arr off0 len0) = go a0 off0 len0 where +foldlM :: (Monad m) => (a -> Word8 -> m a) -> a -> Bytes -> m a +{-# INLINE foldlM #-} +foldlM f a0 (Bytes arr off0 len0) = go a0 off0 len0 + where go a !off !len = case len of 0 -> pure a _ -> do @@ -184,94 +205,106 @@ foldlM f a0 (Bytes arr off0 len0) = go a0 off0 len0 where -- | Right fold over bytes, non-strict in the accumulator. foldr :: (Word8 -> a -> a) -> a -> Bytes -> a -{-# inline foldr #-} -foldr f a0 (Bytes arr off0 len0) = go off0 len0 where +{-# INLINE foldr #-} +foldr f a0 (Bytes arr off0 len0) = go off0 len0 + where go !off !len = case len of 0 -> a0 _ -> f (PM.indexByteArray arr off) (go (off + 1) (len - 1)) --- | Left fold over bytes, strict in the accumulator. The reduction function --- is applied to each element along with its index. +{- | Left fold over bytes, strict in the accumulator. The reduction function +is applied to each element along with its index. +-} ifoldl' :: (a -> Int -> Word8 -> a) -> a -> Bytes -> a -{-# inline ifoldl' #-} -ifoldl' f a0 (Bytes arr off0 len0) = go a0 0 off0 len0 where +{-# INLINE ifoldl' #-} +ifoldl' f a0 (Bytes arr off0 len0) = go a0 0 off0 len0 + where go !a !ix !off !len = case len of 0 -> a _ -> go (f a ix (PM.indexByteArray arr off)) (ix + 1) (off + 1) (len - 1) -- | Right fold over bytes, strict in the accumulator. foldr' :: (Word8 -> a -> a) -> a -> Bytes -> a -{-# inline foldr' #-} +{-# INLINE foldr' #-} foldr' f a0 (Bytes arr off0 len0) = - go a0 (off0 + len0 - 1) (len0 - 1) - where + go a0 (off0 + len0 - 1) (len0 - 1) + where go !a !off !ix = case ix of (-1) -> a _ -> go (f (PM.indexByteArray arr off) a) (off - 1) (ix - 1) -- | Right monadic fold over bytes, non-strict in the accumulator. -foldrM :: Monad m => (Word8 -> a -> m a) -> a -> Bytes -> m a -{-# inline foldrM #-} +foldrM :: (Monad m) => (Word8 -> a -> m a) -> a -> Bytes -> m a +{-# INLINE foldrM #-} foldrM f a0 (Bytes arr off0 len0) = - go a0 (off0 + len0 - 1) (len0 - 1) - where + go a0 (off0 + len0 - 1) (len0 - 1) + where go !a !off !ix = case ix of (-1) -> pure a _ -> do a' <- f (PM.indexByteArray arr off) a go a' (off - 1) (ix - 1) --- | Yields a pointer to the beginning of the byte sequence. It is only safe --- to call this on a 'Bytes' backed by a pinned @ByteArray@. +{- | Yields a pointer to the beginning of the byte sequence. It is only safe +to call this on a 'Bytes' backed by a pinned @ByteArray@. +-} contents :: Bytes -> Ptr Word8 -{-# inline contents #-} +{-# INLINE contents #-} contents (Bytes arr off _) = plusPtr (PM.byteArrayContents arr) off --- | Convert the sliced 'Bytes' to an unsliced 'ByteArray'. This --- reuses the array backing the sliced 'Bytes' if the slicing metadata --- implies that all of the bytes are used and they are already pinned. --- Otherwise, it makes a copy. +{- | Convert the sliced 'Bytes' to an unsliced 'ByteArray'. This +reuses the array backing the sliced 'Bytes' if the slicing metadata +implies that all of the bytes are used and they are already pinned. +Otherwise, it makes a copy. +-} toPinnedByteArray :: Bytes -> ByteArray -{-# inline toPinnedByteArray #-} +{-# INLINE toPinnedByteArray #-} toPinnedByteArray b@(Bytes arr off len) | off == 0, PM.sizeofByteArray arr == len, PM.isByteArrayPinned arr = arr | otherwise = toPinnedByteArrayClone b --- | Variant of 'toPinnedByteArray' that unconditionally makes a copy of --- the array backing the sliced 'Bytes' even if the original array --- could be reused. Prefer 'toPinnedByteArray'. +{- | Variant of 'toPinnedByteArray' that unconditionally makes a copy of +the array backing the sliced 'Bytes' even if the original array +could be reused. Prefer 'toPinnedByteArray'. +-} toPinnedByteArrayClone :: Bytes -> ByteArray toPinnedByteArrayClone (Bytes arr off len) = runByteArrayST $ do m <- PM.newPinnedByteArray len PM.copyByteArray m 0 arr off len PM.unsafeFreezeByteArray m --- | /O(n)/ when unpinned, /O(1)/ when pinned. Create a 'ByteString' from --- a byte sequence. This only copies the byte sequence if it is not pinned. +{- | /O(n)/ when unpinned, /O(1)/ when pinned. Create a 'ByteString' from +a byte sequence. This only copies the byte sequence if it is not pinned. +-} toByteString :: Bytes -> ByteString toByteString !b = pinnedToByteString (pin b) --- | Convert a pinned 'Bytes' to a 'ByteString' --- /O(1)/ Precondition: bytes are pinned. Behavior is undefined otherwise. +{- | Convert a pinned 'Bytes' to a 'ByteString' +/O(1)/ Precondition: bytes are pinned. Behavior is undefined otherwise. +-} pinnedToByteString :: Bytes -> ByteString pinnedToByteString (Bytes y@(PM.ByteArray x) off len) = ByteString.PS - (ForeignPtr.ForeignPtr - (case plusPtr (PM.byteArrayContents y) off of {Exts.Ptr p -> p}) - (ForeignPtr.PlainPtr (Exts.unsafeCoerce# x)) + ( ForeignPtr.ForeignPtr + (case plusPtr (PM.byteArrayContents y) off of Exts.Ptr p -> p) + (ForeignPtr.PlainPtr (Exts.unsafeCoerce# x)) ) - 0 len + 0 + len -- | /O(n)/ Copy a 'ByteString' to a byte sequence. fromByteString :: ByteString -> Bytes -fromByteString !b = Bytes - ( runByteArrayST $ unsafeIOToST $ do - dst@(PM.MutableByteArray dst# ) <- PM.newByteArray len - ByteString.unsafeUseAsCString b $ \src -> do - PM.copyPtrToMutablePrimArray (PM.MutablePrimArray dst# ) 0 src len - PM.unsafeFreezeByteArray dst - ) 0 len - where +fromByteString !b = + Bytes + ( runByteArrayST $ unsafeIOToST $ do + dst@(PM.MutableByteArray dst#) <- PM.newByteArray len + ByteString.unsafeUseAsCString b $ \src -> do + PM.copyPtrToMutablePrimArray (PM.MutablePrimArray dst#) 0 src len + PM.unsafeFreezeByteArray dst + ) + 0 + len + where !len = ByteString.length b -- | /O(n)/ Copy a lazy bytestring to a byte sequence. @@ -281,78 +314,86 @@ fromLazyByteString x = case LBS.length x of n64 -> let n = fromIntegral n64 :: Int in Bytes - (runByteArrayST $ unsafeIOToST $ do - dst@(PM.MutableByteArray dst# ) <- PM.newByteArray n - let loop chunks !ix = case chunks of - LBS.Empty -> PM.unsafeFreezeByteArray dst - LBS.Chunk c cs -> do - let !len = ByteString.length c - ByteString.unsafeUseAsCString c $ \src -> do - PM.copyPtrToMutablePrimArray (PM.MutablePrimArray dst# ) ix src len - loop cs (ix + len) - loop x 0 - ) 0 n + ( runByteArrayST $ unsafeIOToST $ do + dst@(PM.MutableByteArray dst#) <- PM.newByteArray n + let loop chunks !ix = case chunks of + LBS.Empty -> PM.unsafeFreezeByteArray dst + LBS.Chunk c cs -> do + let !len = ByteString.length c + ByteString.unsafeUseAsCString c $ \src -> do + PM.copyPtrToMutablePrimArray (PM.MutablePrimArray dst#) ix src len + loop cs (ix + len) + loop x 0 + ) + 0 + n -- | Drop the first @n@ bytes from the argument. Precondition: @n ≤ len@ unsafeDrop :: Int -> Bytes -> Bytes -{-# inline unsafeDrop #-} +{-# INLINE unsafeDrop #-} unsafeDrop n (Bytes arr off len) = Bytes arr (off + n) (len - n) -- | Variant of 'map' that returns unsliced byte sequence. mapU :: (Word8 -> Word8) -> Bytes -> ByteArray -{-# inline mapU #-} +{-# INLINE mapU #-} mapU f (Bytes array ix0 len) = runByteArrayST do dst <- PM.newByteArray len - let go !srcIx !dstIx = if dstIx < len - then do - let w = PM.indexByteArray array srcIx :: Word8 - PM.writeByteArray dst dstIx (f w) - go (srcIx + 1) (dstIx + 1) - else PM.unsafeFreezeByteArray dst + let go !srcIx !dstIx = + if dstIx < len + then do + let w = PM.indexByteArray array srcIx :: Word8 + PM.writeByteArray dst dstIx (f w) + go (srcIx + 1) (dstIx + 1) + else PM.unsafeFreezeByteArray dst go ix0 0 --- | Map over bytes in a sequence. The result has the same length as --- the argument. +{- | Map over bytes in a sequence. The result has the same length as +the argument. +-} map :: (Word8 -> Word8) -> Bytes -> Bytes -{-# inline map #-} +{-# INLINE map #-} map f !b = Bytes (mapU f b) 0 (length b) -- | Is the byte sequence empty? null :: Bytes -> Bool -{-# inline null #-} +{-# INLINE null #-} null (Bytes _ _ len) = len == 0 -- | Take the first @n@ bytes from the argument. Precondition: @n ≤ len@ unsafeTake :: Int -> Bytes -> Bytes -{-# inline unsafeTake #-} +{-# INLINE unsafeTake #-} unsafeTake n (Bytes arr off _) = Bytes arr off n --- | Index into the byte sequence at the given position. This index --- must be less than the length. +{- | Index into the byte sequence at the given position. This index +must be less than the length. +-} unsafeIndex :: Bytes -> Int -> Word8 -{-# inline unsafeIndex #-} +{-# INLINE unsafeIndex #-} unsafeIndex (Bytes arr off _) ix = PM.indexByteArray arr (off + ix) -- | Access the first byte. The given 'Bytes' must be non-empty. -{-# inline unsafeHead #-} +{-# INLINE unsafeHead #-} unsafeHead :: Bytes -> Word8 unsafeHead bs = unsafeIndex bs 0 --- | Convert the sliced 'Bytes' to an unsliced 'ShortByteString'. This --- reuses the array backing the sliced 'Bytes' if the slicing metadata --- implies that all of the bytes are used. Otherwise, it makes a copy. +{- | Convert the sliced 'Bytes' to an unsliced 'ShortByteString'. This +reuses the array backing the sliced 'Bytes' if the slicing metadata +implies that all of the bytes are used. Otherwise, it makes a copy. +-} toShortByteString :: Bytes -> ShortByteString -{-# inline toShortByteString #-} +{-# INLINE toShortByteString #-} toShortByteString !b = case toByteArray b of PM.ByteArray x -> SBS x -- | Replicate a byte @n@ times. replicate :: - Int -- ^ Desired length @n@ - -> Word8 -- ^ Byte to replicate - -> Bytes + -- | Desired length @n@ + Int -> + -- | Byte to replicate + Word8 -> + Bytes replicate !n !w = Bytes (replicateU n w) 0 n -- | Variant of 'replicate' that returns a unsliced byte array. diff --git a/src/Data/Bytes/Search.hs b/src/Data/Bytes/Search.hs index 4bb9a5e..403a883 100644 --- a/src/Data/Bytes/Search.hs +++ b/src/Data/Bytes/Search.hs @@ -1,12 +1,9 @@ -{-# language BangPatterns #-} -{-# language BlockArguments #-} -{-# language DuplicateRecordFields #-} -{-# language MagicHash #-} -{-# language NamedFieldPuns #-} -{-# language RankNTypes #-} -{-# language TupleSections #-} -{-# language TypeApplications #-} -{-# language UnboxedTuples #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BlockArguments #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE RankNTypes #-} -- This is broken out into a separate module to make it easier -- to dump core and investigate performance issues. @@ -16,14 +13,14 @@ module Data.Bytes.Search , isInfixOf ) where -import Prelude hiding (length,takeWhile,dropWhile,null,foldl,foldr,elem,replicate,any,all,readFile,map) +import Prelude hiding (all, any, dropWhile, elem, foldl, foldr, length, map, null, readFile, replicate, takeWhile) -import Control.Monad.ST.Run (runByteArrayST,runPrimArrayST) -import Data.Bits((.&.),(.|.),shiftL,finiteBitSize) -import Data.Bytes.Pure (length,unsafeIndex,unsafeHead) -import Data.Bytes.Types (Bytes(Bytes,array,offset)) -import Data.Primitive (ByteArray,PrimArray) -import GHC.Exts (Int(I#)) +import Control.Monad.ST.Run (runByteArrayST, runPrimArrayST) +import Data.Bits (finiteBitSize, shiftL, (.&.), (.|.)) +import Data.Bytes.Pure (length, unsafeHead, unsafeIndex) +import Data.Bytes.Types (Bytes (Bytes, array, offset)) +import Data.Primitive (ByteArray, PrimArray) +import GHC.Exts (Int (I#)) import GHC.Word (Word32) import qualified Data.Bytes.Byte as Byte @@ -38,27 +35,36 @@ import qualified Data.Primitive as PM -- sure that there is no unnecessary boxing of Int, Word32, or Bytes -- going on. Here are some other things that have not been done: -- + -- * The hash is currently a Word32. It would be better to use either + -- Word or Word64 for this. We would need for hashKey to be different. + -- * In several places, we track an index into a Bytes. This index gets + -- repeatedly added to the base offset as we loop over the bytes. We -- could instead track the true offset instead of repeatedly -- recalculating it. --- | Replace every non-overlapping occurrence of @needle@ in --- @haystack@ with @replacement@. +{- | Replace every non-overlapping occurrence of @needle@ in +@haystack@ with @replacement@. +-} replace :: - Bytes -- ^ needle, must not be empty - -> Bytes -- ^ replacement - -> Bytes -- ^ haystack - -> Bytes -{-# noinline replace #-} + -- | needle, must not be empty + Bytes -> + -- | replacement + Bytes -> + -- | haystack + Bytes -> + Bytes +{-# NOINLINE replace #-} -- Implementation note: there is a lot of room to improve the performance -- of this function. -replace !needle !replacement !haystack@Bytes{array=haystackArray,offset=haystackIndex,length=haystackLength} +replace !needle !replacement !haystack@Bytes {array = haystackArray, offset = haystackIndex, length = haystackLength} | Pure.length needle == 0 = errorWithoutStackTrace "Data.Bytes.replace: needle of length zero" | Pure.length haystack == 0 = Pure.empty - | Pure.length needle == 1, Pure.length replacement == 1 = + | Pure.length needle == 1 + , Pure.length replacement == 1 = let !needle0 = unsafeIndex needle 0 !replacement0 = unsafeIndex replacement 0 in Pure.map (\w -> if w == needle0 then replacement0 else w) haystack @@ -67,75 +73,93 @@ replace !needle !replacement !haystack@Bytes{array=haystackArray,offset=haystack !ixs = findIndicesKarpRabin 0 hp needle haystackArray haystackIndex haystackLength in Pure.fromByteArray (replaceIndices ixs replacement (Pure.length needle) haystackArray haystackIndex haystackLength) - -- This is an internal function because it deals explicitly with -- an offset into a byte array. -- -- Example: + -- * haystack len: 39 + -- * ixs: 7, 19, 33 + -- * patLen: 5 + -- * replacment: foo (len 3) + -- We want to perform these copies: + -- * src[0,7] -> dst[0,7] + -- * foo -> dst[7,3] + -- * src[12,7] -> dst[10,7] + -- * foo -> dst[17,3] + -- * src[24,9] -> dst[20,9] + -- * foo -> dst[29,3] + -- * src[38,1] -> dst[32,1] replaceIndices :: PrimArray Int -> Bytes -> Int -> ByteArray -> Int -> Int -> ByteArray replaceIndices !ixs !replacement !patLen !haystack !ix0 !len0 = runByteArrayST $ do let !ixsLen = PM.sizeofPrimArray ixs let !delta = Pure.length replacement - patLen dst <- PM.newByteArray (len0 + ixsLen * delta) - let applyReplacement !ixIx !prevSrcIx = if ixIx < ixsLen - then do - let !srcMatchIx = PM.indexPrimArray ixs ixIx - let !offset = ixIx * delta - let !dstIx = srcMatchIx + offset - ix0 - Pure.unsafeCopy dst (prevSrcIx + offset - ix0) - Bytes{array=haystack,offset=prevSrcIx,length=srcMatchIx - prevSrcIx} - Pure.unsafeCopy dst dstIx replacement - applyReplacement (ixIx + 1) (srcMatchIx + patLen) - else do - let !offset = ixIx * delta - Pure.unsafeCopy dst (prevSrcIx + offset - ix0) - Bytes{array=haystack,offset=prevSrcIx,length=(len0 + ix0) - prevSrcIx} - PM.unsafeFreezeByteArray dst + let applyReplacement !ixIx !prevSrcIx = + if ixIx < ixsLen + then do + let !srcMatchIx = PM.indexPrimArray ixs ixIx + let !offset = ixIx * delta + let !dstIx = srcMatchIx + offset - ix0 + Pure.unsafeCopy + dst + (prevSrcIx + offset - ix0) + Bytes {array = haystack, offset = prevSrcIx, length = srcMatchIx - prevSrcIx} + Pure.unsafeCopy dst dstIx replacement + applyReplacement (ixIx + 1) (srcMatchIx + patLen) + else do + let !offset = ixIx * delta + Pure.unsafeCopy + dst + (prevSrcIx + offset - ix0) + Bytes {array = haystack, offset = prevSrcIx, length = (len0 + ix0) - prevSrcIx} + PM.unsafeFreezeByteArray dst applyReplacement 0 ix0 -- | Find locations of non-overlapping instances of @needle@ within @haystack@. findIndices :: - Bytes -- ^ needle - -> Bytes -- ^ haystack - -> PrimArray Int -findIndices needle Bytes{array,offset=off,length=len} + -- | needle + Bytes -> + -- | haystack + Bytes -> + PrimArray Int +findIndices needle Bytes {array, offset = off, length = len} | needleLen == 0 = errorWithoutStackTrace "Data.Bytes.findIndices: needle with length zero" | len == 0 = mempty - | otherwise = + | otherwise = let !hp = rollingHash needle in findIndicesKarpRabin (negate off) hp needle array off len - where + where needleLen = Pure.length needle -- Precondition: Haystack has non-zero length -- Precondition: Pattern has non-zero length --- Uses karp rabin to search. +-- Uses karp rabin to search. -- Easy opportunity to improve implementation. Instead of having karpRabin -- return two slices, we could have it just return a single index. findIndicesKarpRabin :: - Int -- Output index modifier. Set to negated initial index to make slicing invisible in results. - -> Word32 -- Hash to search for (must agree with pattern) - -> Bytes -- Pattern to search for - -> ByteArray - -> Int -- initial index - -> Int -- length - -> PrimArray Int + Int -> -- Output index modifier. Set to negated initial index to make slicing invisible in results. + Word32 -> -- Hash to search for (must agree with pattern) + Bytes -> -- Pattern to search for + ByteArray -> + Int -> -- initial index + Int -> -- length + PrimArray Int findIndicesKarpRabin !ixModifier !hp !pat !haystack !ix0 !len0 = runPrimArrayST $ do let dstLen = 1 + quot len0 (Pure.length pat) dst <- PM.newPrimArray dstLen - let go !ix !len !ixIx = case karpRabin hp pat Bytes{array=haystack,offset=ix,length=len} of + let go !ix !len !ixIx = case karpRabin hp pat Bytes {array = haystack, offset = ix, length = len} of (-1) -> do PM.shrinkMutablePrimArray dst ixIx PM.unsafeFreezePrimArray dst @@ -150,46 +174,50 @@ findIndicesKarpRabin !ixModifier !hp !pat !haystack !ix0 !len0 = runPrimArrayST -- Output: Negative one means match not found. Other negative -- numbers should not occur. Zero may occur. Positive number -- means the number of bytes to skip to make it past the match. -breakSubstring :: Bytes -- ^ String to search for - -> Bytes -- ^ String to search in - -> Int +breakSubstring :: + -- | String to search for + Bytes -> + -- | String to search in + Bytes -> + Int breakSubstring !pat !haystack@(Bytes _ off0 _) = case lp of 0 -> 0 1 -> case Byte.elemIndexLoop# (unsafeHead pat) haystack of (-1#) -> (-1) off -> 1 + (I# off) - off0 - _ -> if lp * 8 <= finiteBitSize (0 :: Word) - then shift haystack - else karpRabin (rollingHash pat) pat haystack - where - lp = length pat + _ -> + if lp * 8 <= finiteBitSize (0 :: Word) + then shift haystack + else karpRabin (rollingHash pat) pat haystack + where + lp = length pat {-# INLINE shift #-} shift :: Bytes -> Int shift !src - | length src < lp = (-1) - | otherwise = search (intoWord $ Pure.unsafeTake lp src) lp - where + | length src < lp = (-1) + | otherwise = search (intoWord $ Pure.unsafeTake lp src) lp + where intoWord :: Bytes -> Word intoWord = Pure.foldl' (\w b -> (w `shiftL` 8) .|. fromIntegral b) 0 - wp = intoWord pat + wp = intoWord pat mask = (1 `shiftL` (8 * lp)) - 1 search :: Word -> Int -> Int search !w !i - | w == wp = i - | length src <= i = (-1) - | otherwise = search w' (i + 1) - where - b = fromIntegral (Pure.unsafeIndex src i) + | w == wp = i + | length src <= i = (-1) + | otherwise = search w' (i + 1) + where + b = fromIntegral (Pure.unsafeIndex src i) w' = mask .&. ((w `shiftL` 8) .|. b) -- Only used for karp rabin rollingHash :: Bytes -> Word32 -{-# inline rollingHash #-} +{-# INLINE rollingHash #-} rollingHash = Pure.foldl' (\h b -> h * hashKey + fromIntegral b) 0 -hashKey :: Word32 -{-# inline hashKey #-} +hashKey :: Word32 +{-# INLINE hashKey #-} hashKey = 2891336453 -- Precondition: Length of bytes is greater than or equal to 1. @@ -199,9 +227,9 @@ hashKey = 2891336453 -- means the number of bytes to skip to make it past the match. karpRabin :: Word32 -> Bytes -> Bytes -> Int karpRabin !hp !pat !src - | length src < lp = (-1) - | otherwise = search (rollingHash $ Pure.unsafeTake lp src) lp - where + | length src < lp = (-1) + | otherwise = search (rollingHash $ Pure.unsafeTake lp src) lp + where lp :: Int !lp = Pure.length pat m :: Word32 @@ -209,26 +237,30 @@ karpRabin !hp !pat !src get :: Int -> Word32 get !ix = fromIntegral (Pure.unsafeIndex src ix) search !hs !i - | hp == hs && eqBytesNoShortCut pat (Pure.unsafeTake lp (Pure.unsafeDrop (i - lp) src)) = i - | length src <= i = (-1) - | otherwise = search hs' (i + 1) - where - hs' = hs * hashKey + - get i - - m * get (i - lp) - --- | Is the first argument an infix of the second argument? --- --- Uses the Rabin-Karp algorithm: expected time @O(n+m)@, worst-case @O(nm)@. -isInfixOf :: Bytes -- ^ String to search for - -> Bytes -- ^ String to search in - -> Bool -isInfixOf p s = Pure.null p || breakSubstring p s >= 0 + | hp == hs && eqBytesNoShortCut pat (Pure.unsafeTake lp (Pure.unsafeDrop (i - lp) src)) = i + | length src <= i = (-1) + | otherwise = search hs' (i + 1) + where + hs' = + hs * hashKey + + get i + - m * get (i - lp) +{- | Is the first argument an infix of the second argument? + +Uses the Rabin-Karp algorithm: expected time @O(n+m)@, worst-case @O(nm)@. +-} +isInfixOf :: + -- | String to search for + Bytes -> + -- | String to search in + Bytes -> + Bool +isInfixOf p s = Pure.null p || breakSubstring p s >= 0 -- Precondition: both arguments have the same length -- Skips the pointer equality check and the length check. eqBytesNoShortCut :: Bytes -> Bytes -> Bool -{-# inline eqBytesNoShortCut #-} +{-# INLINE eqBytesNoShortCut #-} eqBytesNoShortCut (Bytes arr1 off1 len1) (Bytes arr2 off2 _) = PM.compareByteArrays arr1 off1 arr2 off2 len1 == EQ diff --git a/src/Data/Bytes/Text/Ascii.hs b/src/Data/Bytes/Text/Ascii.hs index ad94efd..ce56ef6 100644 --- a/src/Data/Bytes/Text/Ascii.hs +++ b/src/Data/Bytes/Text/Ascii.hs @@ -4,12 +4,13 @@ {-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE TypeApplications #-} --- | This module treats 'Bytes' data as holding ASCII text. Providing bytes --- outside the ASCII range (@U+0000@ -- @U+007F@) may cause a failure or --- unspecified results, but such bytes will never be inspected. --- --- For functions that can operate on ASCII-compatible encodings, see --- 'Data.Bytes.Text.AsciiExt'. +{- | This module treats 'Bytes' data as holding ASCII text. Providing bytes +outside the ASCII range (@U+0000@ -- @U+007F@) may cause a failure or +unspecified results, but such bytes will never be inspected. + +For functions that can operate on ASCII-compatible encodings, see +'Data.Bytes.Text.AsciiExt'. +-} module Data.Bytes.Text.Ascii ( fromString , decodeDecWord @@ -22,16 +23,16 @@ module Data.Bytes.Text.Ascii ) where import Data.Bits ((.&.)) -import Data.ByteString.Short.Internal (ShortByteString(SBS)) +import Data.ByteString.Short.Internal (ShortByteString (SBS)) import Data.Bytes.Text.Latin1 (decodeDecWord) -import Data.Bytes.Types (Bytes(Bytes)) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) import Data.Primitive (ByteArray) import Data.Text (Text) import Data.Text.Short (ShortText) import Data.Word (Word8) import Foreign.C.String (CString) -import Foreign.Ptr (Ptr,plusPtr,castPtr) +import Foreign.Ptr (Ptr, castPtr, plusPtr) import qualified Data.Bytes.Pure as Bytes import qualified Data.Primitive as PM @@ -41,26 +42,28 @@ import qualified Data.Text.Internal as I import qualified Data.Text.Short.Unsafe as TS import qualified GHC.Exts as Exts --- | Convert a 'String' consisting of only characters in the ASCII block --- to a byte sequence. Any character with a codepoint above @U+007F@ is --- replaced by @U+0000@. +{- | Convert a 'String' consisting of only characters in the ASCII block +to a byte sequence. Any character with a codepoint above @U+007F@ is +replaced by @U+0000@. +-} fromString :: String -> Bytes -fromString = Bytes.fromByteArray - . Exts.fromList - . map (\c -> let i = ord c in if i < 128 then fromIntegral @Int @Word8 i else 0) +fromString = + Bytes.fromByteArray + . Exts.fromList + . map (\c -> let i = ord c in if i < 128 then fromIntegral @Int @Word8 i else 0) -- TODO presumably also fromText and fromShortText toShortText :: Bytes -> Maybe ShortText -{-# inline toShortText #-} +{-# INLINE toShortText #-} toShortText !b = case Bytes.foldr (\w acc -> w < 128 && acc) True b of True -> Just (TS.fromShortByteStringUnsafe (Bytes.toShortByteString b)) False -> Nothing toShortTextU :: ByteArray -> Maybe ShortText -{-# inline toShortTextU #-} +{-# INLINE toShortTextU #-} toShortTextU !b = case Bytes.foldr (\w acc -> w < 128 && acc) True (Bytes.fromByteArray b) of - True -> Just (TS.fromShortByteStringUnsafe (case b of {PM.ByteArray x -> SBS x})) + True -> Just (TS.fromShortByteStringUnsafe (case b of PM.ByteArray x -> SBS x)) False -> Nothing #if MIN_VERSION_text(2,0,0) @@ -75,16 +78,17 @@ toText !b@(Bytes (PM.ByteArray arr) off len) = case Bytes.foldr (\w acc -> w < 1 False -> Nothing #endif --- | Is the byte sequence equal to the @NUL@-terminated C String? --- The C string must be a constant. +{- | Is the byte sequence equal to the @NUL@-terminated C String? +The C string must be a constant. +-} equalsCStringCaseInsensitive :: CString -> Bytes -> Bool -{-# inline equalsCStringCaseInsensitive #-} -equalsCStringCaseInsensitive !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) off0 len0 where +{-# INLINE equalsCStringCaseInsensitive #-} +equalsCStringCaseInsensitive !ptr0 (Bytes arr off0 len0) = go (castPtr ptr0 :: Ptr Word8) off0 len0 + where go !ptr !off !len = case len of 0 -> PM.indexOffPtr ptr 0 == (0 :: Word8) _ -> case PM.indexOffPtr ptr 0 of 0 -> False c -> (c .&. 0b1101_1111) == (PM.indexByteArray arr off .&. 0b1101_1111) - && - go (plusPtr ptr 1) (off + 1) (len - 1) + && go (plusPtr ptr 1) (off + 1) (len - 1) diff --git a/src/Data/Bytes/Text/AsciiExt.hs b/src/Data/Bytes/Text/AsciiExt.hs index 1096b9a..36d695e 100644 --- a/src/Data/Bytes/Text/AsciiExt.hs +++ b/src/Data/Bytes/Text/AsciiExt.hs @@ -2,33 +2,36 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE RankNTypes #-} --- | This module contains functions which operate on supersets of 'Bytes' containing ASCII-encoded text. --- That is, none of the functions here inspect bytes with a value greater than 127, and do not fail due to the presence of such bytes. - -- For functions that can fail for bytes outside the ASCII range, see -- 'Data.Bytes.Ascii'. For functions that can inspect bytes outside ASCII, see -- any of the modules for ASCII-compatible encodings (e.g. 'Data.Bytes.Utf8', -- 'Data.Bytes.Latin1', and so on). + +{- | This module contains functions which operate on supersets of 'Bytes' containing ASCII-encoded text. +That is, none of the functions here inspect bytes with a value greater than 127, and do not fail due to the presence of such bytes. +-} module Data.Bytes.Text.AsciiExt ( -- * Line-Oriented IO hFoldLines , hForLines_ - -- ** Standard Handles + + -- ** Standard Handles , forLines_ , foldLines - -- * Text Manipulation + + -- * Text Manipulation , toLowerU ) where import Control.Monad.ST (ST) import Control.Monad.ST.Run (runByteArrayST) -import Data.Bytes.Types (Bytes(..)) +import Data.Bytes.Types (Bytes (..)) import Data.Primitive (ByteArray) import Data.Word (Word8) import System.IO (Handle, hIsEOF, stdin) -import qualified Data.Bytes.Pure as Bytes import qualified Data.ByteString.Char8 as BC8 +import qualified Data.Bytes.Pure as Bytes import qualified Data.Primitive as PM -- | `hForLines_` over `stdin` @@ -41,56 +44,63 @@ foldLines :: a -> (a -> Bytes -> IO a) -> IO a {-# INLINEABLE foldLines #-} foldLines = hFoldLines stdin --- | Perform an action on each line of the input, discarding results. --- To maintain a running state, see 'hFoldLines'. --- --- Lines are extracted with with 'BC8.hGetLine', which does not document its --- detection algorithm. As of writing (bytestring v0.11.1.0), lines are --- delimited by a single @\n@ character (UNIX-style, as all things should be). +{- | Perform an action on each line of the input, discarding results. +To maintain a running state, see 'hFoldLines'. + +Lines are extracted with with 'BC8.hGetLine', which does not document its +detection algorithm. As of writing (bytestring v0.11.1.0), lines are +delimited by a single @\n@ character (UNIX-style, as all things should be). +-} hForLines_ :: Handle -> (Bytes -> IO a) -> IO () hForLines_ h body = loop - where - loop = hIsEOF h >>= \case - False -> do - line <- Bytes.fromByteString <$> BC8.hGetLine h - _ <- body line - loop - True -> pure () + where + loop = + hIsEOF h >>= \case + False -> do + line <- Bytes.fromByteString <$> BC8.hGetLine h + _ <- body line + loop + True -> pure () + +{- | Perform an action on each line of the input, threading state through the computation. +If you do not need to keep a state, see `hForLines_`. --- | Perform an action on each line of the input, threading state through the computation. --- If you do not need to keep a state, see `hForLines_`. --- --- Lines are extracted with with 'BC8.hGetLine', which does not document its --- detection algorithm. As of writing (bytestring v0.11.1.0), lines are --- delimited by a single @\n@ character (UNIX-style, as all things should be). +Lines are extracted with with 'BC8.hGetLine', which does not document its +detection algorithm. As of writing (bytestring v0.11.1.0), lines are +delimited by a single @\n@ character (UNIX-style, as all things should be). +-} hFoldLines :: Handle -> a -> (a -> Bytes -> IO a) -> IO a hFoldLines h z body = loop z - where - loop !x = hIsEOF h >>= \case - False -> do - line <- Bytes.fromByteString <$> BC8.hGetLine h - x' <- body x line - loop x' - True -> pure x + where + loop !x = + hIsEOF h >>= \case + False -> do + line <- Bytes.fromByteString <$> BC8.hGetLine h + x' <- body x line + loop x' + True -> pure x --- | /O(n)/ Convert ASCII letters to lowercase. This adds @0x20@ to bytes in the --- range @[0x41,0x5A]@ (@A-Z@ ⇒ @a-z@) and leaves all other bytes alone. --- Unconditionally copies the bytes. +{- | /O(n)/ Convert ASCII letters to lowercase. This adds @0x20@ to bytes in the +range @[0x41,0x5A]@ (@A-Z@ ⇒ @a-z@) and leaves all other bytes alone. +Unconditionally copies the bytes. +-} toLowerU :: Bytes -> ByteArray toLowerU (Bytes src off0 len0) = runByteArrayST action - where + where action :: forall s. ST s ByteArray action = do dst <- PM.newByteArray len0 - let go !off !ix !len = if len == 0 - then pure () - else do - let w = PM.indexByteArray src off :: Word8 - w' = if w >= 0x41 && w <= 0x5A - then w + 32 - else w - PM.writeByteArray dst ix w' - go (off + 1) (ix + 1) (len - 1) + let go !off !ix !len = + if len == 0 + then pure () + else do + let w = PM.indexByteArray src off :: Word8 + w' = + if w >= 0x41 && w <= 0x5A + then w + 32 + else w + PM.writeByteArray dst ix w' + go (off + 1) (ix + 1) (len - 1) go off0 0 len0 PM.unsafeFreezeByteArray dst diff --git a/src/Data/Bytes/Text/Latin1.hs b/src/Data/Bytes/Text/Latin1.hs index 8909bf4..91c477c 100644 --- a/src/Data/Bytes/Text/Latin1.hs +++ b/src/Data/Bytes/Text/Latin1.hs @@ -1,28 +1,29 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE TypeApplications #-} -{-# LANGUAGE UnboxedSums #-} {-# LANGUAGE UnboxedTuples #-} --- | This module treats 'Bytes' data as holding text encoded in ISO-8859-1. This --- encoding can only encode codepoints strictly below @U+0100@, but this allows --- each codepoint to be placed directly into a single byte. This range consists --- of Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls, which includes --- ASCII. --- --- Strictly, ISO-8859-1 is not to be confused with ISO/IEC 8859-1 (which was the --- default encoding for webpages before HTML5). ISO/IEC 8859-1 lacks encodings --- for the C0 and C1 control characters. --- --- With HTML5, the default encoding of webpages was changed to Windows-1252, --- which is _not_ compatible with ISO-8859-1. Windows-1252 uses the C1 Control --- range (@U+0080@ -- @U+009F@) mostly to encode a variety of printable --- characters. For this encoding, see 'Data.Bytes.Text.Windows1252'. +{- | This module treats 'Bytes' data as holding text encoded in ISO-8859-1. This +encoding can only encode codepoints strictly below @U+0100@, but this allows +each codepoint to be placed directly into a single byte. This range consists +of Unicode Basic Latin, Latin-1 Supplement and C0+C1 Controls, which includes +ASCII. + +Strictly, ISO-8859-1 is not to be confused with ISO/IEC 8859-1 (which was the +default encoding for webpages before HTML5). ISO/IEC 8859-1 lacks encodings +for the C0 and C1 control characters. + +With HTML5, the default encoding of webpages was changed to Windows-1252, +which is _not_ compatible with ISO-8859-1. Windows-1252 uses the C1 Control +range (@U+0080@ -- @U+009F@) mostly to encode a variety of printable +characters. For this encoding, see 'Data.Bytes.Text.Windows1252'. +-} module Data.Bytes.Text.Latin1 ( toString , fromString , decodeDecWord - -- * Specialized Comparisons + + -- * Specialized Comparisons , equals1 , equals2 , equals3 @@ -42,21 +43,20 @@ module Data.Bytes.Text.Latin1 import Prelude hiding (length) -import Data.Bytes.Types (Bytes(..)) -import Data.Char (ord,chr) -import Data.Primitive (ByteArray(ByteArray)) +import Data.Bytes.Types (Bytes (..)) +import Data.Char (chr, ord) +import Data.Primitive (ByteArray (ByteArray)) import Data.Word (Word8) -import GHC.Exts (Int(I#),Char(C#),or#,ltWord#,int2Word#) -import GHC.Exts (Word(W#),Word#) +import GHC.Exts (Char (C#), Int (I#), Word (W#), Word#, int2Word#, ltWord#, or#) -import qualified Data.Primitive as PM import qualified Data.Bytes.Pure as Bytes +import qualified Data.Primitive as PM import qualified GHC.Exts as Exts - --- | Convert a 'String' consisting of only characters representable --- by ISO-8859-1. These are encoded with ISO-8859-1. Any character --- with a codepoint above @U+00FF@ is replaced by an unspecified byte. +{- | Convert a 'String' consisting of only characters representable +by ISO-8859-1. These are encoded with ISO-8859-1. Any character +with a codepoint above @U+00FF@ is replaced by an unspecified byte. +-} fromString :: String -> Bytes fromString = Bytes.fromByteArray . Exts.fromList . map (fromIntegral @Int @Word8 . ord) @@ -68,263 +68,296 @@ toString = Bytes.foldr (\w xs -> chr (fromIntegral @Word8 @Int w) : xs) [] -- TODO presumably also fromText and fromShortText - --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a singleton whose element matches the character? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a singleton whose element matches the character? +-} equals1 :: Char -> Bytes -> Bool {-# INLINE equals1 #-} equals1 !c0 (Bytes arr off len) = case len of 1 -> c0 == indexCharArray arr off _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a doubleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a doubleton whose elements match the characters? +-} equals2 :: Char -> Char -> Bytes -> Bool equals2 !c0 !c1 (Bytes arr off len) = case len of - 2 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) + 2 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a tripleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a tripleton whose elements match the characters? +-} equals3 :: Char -> Char -> Char -> Bytes -> Bool equals3 !c0 !c1 !c2 (Bytes arr off len) = case len of - 3 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) + 3 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a quadrupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a quadrupleton whose elements match the characters? +-} equals4 :: Char -> Char -> Char -> Char -> Bytes -> Bool equals4 !c0 !c1 !c2 !c3 (Bytes arr off len) = case len of - 4 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) + 4 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a quintupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a quintupleton whose elements match the characters? +-} equals5 :: Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals5 !c0 !c1 !c2 !c3 !c4 (Bytes arr off len) = case len of - 5 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) + 5 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a sextupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a sextupleton whose elements match the characters? +-} equals6 :: Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals6 !c0 !c1 !c2 !c3 !c4 !c5 (Bytes arr off len) = case len of - 6 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) + 6 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a septupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a septupleton whose elements match the characters? +-} equals7 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals7 !c0 !c1 !c2 !c3 !c4 !c5 !c6 (Bytes arr off len) = case len of - 7 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) + 7 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- an octupleton whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +an octupleton whose elements match the characters? +-} equals8 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals8 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 (Bytes arr off len) = case len of - 8 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) && - c7 == indexCharArray arr (off + 7) + 8 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) + && c7 == indexCharArray arr (off + 7) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a 9-tuple whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a 9-tuple whose elements match the characters? +-} equals9 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals9 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 (Bytes arr off len) = case len of - 9 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) && - c7 == indexCharArray arr (off + 7) && - c8 == indexCharArray arr (off + 8) + 9 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) + && c7 == indexCharArray arr (off + 7) + && c8 == indexCharArray arr (off + 8) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a 10-tuple whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a 10-tuple whose elements match the characters? +-} equals10 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals10 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 (Bytes arr off len) = case len of - 10 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) && - c7 == indexCharArray arr (off + 7) && - c8 == indexCharArray arr (off + 8) && - c9 == indexCharArray arr (off + 9) + 10 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) + && c7 == indexCharArray arr (off + 7) + && c8 == indexCharArray arr (off + 8) + && c9 == indexCharArray arr (off + 9) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a 11-tuple whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a 11-tuple whose elements match the characters? +-} equals11 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals11 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 (Bytes arr off len) = case len of - 11 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) && - c7 == indexCharArray arr (off + 7) && - c8 == indexCharArray arr (off + 8) && - c9 == indexCharArray arr (off + 9) && - c10 == indexCharArray arr (off + 10) + 11 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) + && c7 == indexCharArray arr (off + 7) + && c8 == indexCharArray arr (off + 8) + && c9 == indexCharArray arr (off + 9) + && c10 == indexCharArray arr (off + 10) _ -> False --- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, --- a 12-tuple whose elements match the characters? +{- | Is the byte sequence, when interpreted as ISO-8859-1-encoded text, +a 12-tuple whose elements match the characters? +-} equals12 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals12 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 (Bytes arr off len) = case len of - 12 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) && - c7 == indexCharArray arr (off + 7) && - c8 == indexCharArray arr (off + 8) && - c9 == indexCharArray arr (off + 9) && - c10 == indexCharArray arr (off + 10) && - c11 == indexCharArray arr (off + 11) + 12 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) + && c7 == indexCharArray arr (off + 7) + && c8 == indexCharArray arr (off + 8) + && c9 == indexCharArray arr (off + 9) + && c10 == indexCharArray arr (off + 10) + && c11 == indexCharArray arr (off + 11) _ -> False equals13 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals13 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 !c12 (Bytes arr off len) = case len of - 13 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) && - c7 == indexCharArray arr (off + 7) && - c8 == indexCharArray arr (off + 8) && - c9 == indexCharArray arr (off + 9) && - c10 == indexCharArray arr (off + 10) && - c11 == indexCharArray arr (off + 11) && - c12 == indexCharArray arr (off + 12) + 13 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) + && c7 == indexCharArray arr (off + 7) + && c8 == indexCharArray arr (off + 8) + && c9 == indexCharArray arr (off + 9) + && c10 == indexCharArray arr (off + 10) + && c11 == indexCharArray arr (off + 11) + && c12 == indexCharArray arr (off + 12) _ -> False equals14 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals14 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 !c12 !c13 (Bytes arr off len) = case len of - 14 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) && - c7 == indexCharArray arr (off + 7) && - c8 == indexCharArray arr (off + 8) && - c9 == indexCharArray arr (off + 9) && - c10 == indexCharArray arr (off + 10) && - c11 == indexCharArray arr (off + 11) && - c12 == indexCharArray arr (off + 12) && - c13 == indexCharArray arr (off + 13) + 14 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) + && c7 == indexCharArray arr (off + 7) + && c8 == indexCharArray arr (off + 8) + && c9 == indexCharArray arr (off + 9) + && c10 == indexCharArray arr (off + 10) + && c11 == indexCharArray arr (off + 11) + && c12 == indexCharArray arr (off + 12) + && c13 == indexCharArray arr (off + 13) _ -> False equals15 :: Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Char -> Bytes -> Bool equals15 !c0 !c1 !c2 !c3 !c4 !c5 !c6 !c7 !c8 !c9 !c10 !c11 !c12 !c13 !c14 (Bytes arr off len) = case len of - 15 -> c0 == indexCharArray arr off && - c1 == indexCharArray arr (off + 1) && - c2 == indexCharArray arr (off + 2) && - c3 == indexCharArray arr (off + 3) && - c4 == indexCharArray arr (off + 4) && - c5 == indexCharArray arr (off + 5) && - c6 == indexCharArray arr (off + 6) && - c7 == indexCharArray arr (off + 7) && - c8 == indexCharArray arr (off + 8) && - c9 == indexCharArray arr (off + 9) && - c10 == indexCharArray arr (off + 10) && - c11 == indexCharArray arr (off + 11) && - c12 == indexCharArray arr (off + 12) && - c13 == indexCharArray arr (off + 13) && - c14 == indexCharArray arr (off + 14) + 15 -> + c0 == indexCharArray arr off + && c1 == indexCharArray arr (off + 1) + && c2 == indexCharArray arr (off + 2) + && c3 == indexCharArray arr (off + 3) + && c4 == indexCharArray arr (off + 4) + && c5 == indexCharArray arr (off + 5) + && c6 == indexCharArray arr (off + 6) + && c7 == indexCharArray arr (off + 7) + && c8 == indexCharArray arr (off + 8) + && c9 == indexCharArray arr (off + 9) + && c10 == indexCharArray arr (off + 10) + && c11 == indexCharArray arr (off + 11) + && c12 == indexCharArray arr (off + 12) + && c13 == indexCharArray arr (off + 13) + && c14 == indexCharArray arr (off + 14) _ -> False indexCharArray :: ByteArray -> Int -> Char indexCharArray (ByteArray arr) (I# off) = C# (Exts.indexCharArray# arr off) --- | Decode machine-sized word from decimal representation. Returns --- Nothing on overflow. Allows any number of leading zeros. Trailing --- non-digit bytes cause Nothing to be returned. +{- | Decode machine-sized word from decimal representation. Returns +Nothing on overflow. Allows any number of leading zeros. Trailing +non-digit bytes cause Nothing to be returned. +-} decodeDecWord :: Bytes -> Maybe Word -{-# inline decodeDecWord #-} +{-# INLINE decodeDecWord #-} decodeDecWord !b = case decWordStart b of (# (# #) | #) -> Nothing (# | w #) -> Just (W# w) decWordStart :: - Bytes -- Chunk - -> (# (# #) | Word# #) -{-# noinline decWordStart #-} -decWordStart !chunk0 = if length chunk0 > 0 - then - let !w = fromIntegral @Word8 @Word - (PM.indexByteArray (array chunk0) (offset chunk0)) - 48 - in if w < 10 - then decWordMore w (Bytes.unsafeDrop 1 chunk0) - else (# (# #) | #) - else (# (# #) | #) - where - decWordMore :: - Word -- Accumulator - -> Bytes -- Chunk - -> (# (# #) | Word# #) - decWordMore !acc !chunk = let len = length chunk in case len of - 0 -> (# | unW (fromIntegral acc) #) - _ -> - let !w = fromIntegral @Word8 @Word - (PM.indexByteArray (array chunk) (offset chunk)) - 48 + Bytes -> -- Chunk + (# (# #) | Word# #) +{-# NOINLINE decWordStart #-} +decWordStart !chunk0 = + if length chunk0 > 0 + then + let !w = + fromIntegral @Word8 @Word + (PM.indexByteArray (array chunk0) (offset chunk0)) + - 48 in if w < 10 - then - let (overflow,acc') = unsignedPushBase10 acc w - in if overflow - then (# (# #) | #) - else decWordMore acc' (Bytes.unsafeDrop 1 chunk) + then decWordMore w (Bytes.unsafeDrop 1 chunk0) else (# (# #) | #) + else (# (# #) | #) + where + decWordMore :: + Word -> -- Accumulator + Bytes -> -- Chunk + (# (# #) | Word# #) + decWordMore !acc !chunk = + let len = length chunk + in case len of + 0 -> (# | unW (fromIntegral acc) #) + _ -> + let !w = + fromIntegral @Word8 @Word + (PM.indexByteArray (array chunk) (offset chunk)) + - 48 + in if w < 10 + then + let (overflow, acc') = unsignedPushBase10 acc w + in if overflow + then (# (# #) | #) + else decWordMore acc' (Bytes.unsafeDrop 1 chunk) + else (# (# #) | #) -unsignedPushBase10 :: Word -> Word -> (Bool,Word) -{-# inline unsignedPushBase10 #-} -unsignedPushBase10 (W# a) (W# b) = +unsignedPushBase10 :: Word -> Word -> (Bool, Word) +{-# INLINE unsignedPushBase10 #-} +unsignedPushBase10 (W# a) (W# b) = let !(# ca, r0 #) = Exts.timesWord2# a 10## !r1 = Exts.plusWord# r0 b !cb = int2Word# (ltWord# r1 r0) !c = ca `or#` cb - in (case c of { 0## -> False; _ -> True }, W# r1) + in (case c of 0## -> False; _ -> True, W# r1) unW :: Word -> Word# -{-# inline unW #-} +{-# INLINE unW #-} unW (W# w) = w diff --git a/src/Data/Bytes/Text/Utf8.hs b/src/Data/Bytes/Text/Utf8.hs index 2efbfb5..7026bfa 100644 --- a/src/Data/Bytes/Text/Utf8.hs +++ b/src/Data/Bytes/Text/Utf8.hs @@ -1,5 +1,5 @@ -{-# language CPP #-} -{-# language BangPatterns #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} -- | Convert 'Bytes' to and from 'Text' and 'ShortText'. module Data.Bytes.Text.Utf8 @@ -9,34 +9,36 @@ module Data.Bytes.Text.Utf8 , fromText #endif #if MIN_VERSION_text(2,1,0) - , toText + -- , toText #endif ) where -import Data.Bytes.Types (Bytes(Bytes)) -import Data.Text.Short (ShortText) +import Data.Bytes.Types (Bytes (Bytes)) +import Data.Primitive (ByteArray (ByteArray)) import Data.Text (Text) -import Data.Primitive (ByteArray(ByteArray)) +import Data.Text.Short (ShortText) -import qualified Data.Text.Short as TS import qualified Data.Bytes as Bytes -import qualified Data.Text.Internal as I import qualified Data.Text.Array as A +import qualified Data.Text.Internal as I +import qualified Data.Text.Short as TS #if MIN_VERSION_text(2,1,0) import qualified Data.Text.Internal.Validate #endif --- | Encode 'ShortText' using UTF-8. Since 'ShortText' is backed by a UTF-8 --- byte sequence, this does not perform a copy. +{- | Encode 'ShortText' using UTF-8. Since 'ShortText' is backed by a UTF-8 +byte sequence, this does not perform a copy. +-} fromShortText :: ShortText -> Bytes -{-# inline fromShortText #-} +{-# INLINE fromShortText #-} fromShortText = Bytes.fromShortByteString . TS.toShortByteString --- | Attempt to interpret the byte sequence as UTF-8 encoded text. Returns --- 'Nothing' if the bytes are not UTF-8 encoded text. +{- | Attempt to interpret the byte sequence as UTF-8 encoded text. Returns +'Nothing' if the bytes are not UTF-8 encoded text. +-} toShortText :: Bytes -> Maybe ShortText -{-# inline toShortText #-} +{-# INLINE toShortText #-} toShortText !b = TS.fromShortByteString (Bytes.toShortByteString b) #if MIN_VERSION_text(2,0,0) diff --git a/src/Data/Bytes/Text/Windows1252.hs b/src/Data/Bytes/Text/Windows1252.hs index 852960e..be922ee 100644 --- a/src/Data/Bytes/Text/Windows1252.hs +++ b/src/Data/Bytes/Text/Windows1252.hs @@ -1,3 +1,4 @@ --- | Placeholder module in case there is demand for treating 'Bytes' as --- Windows-1252-encoded text +{- | Placeholder module in case there is demand for treating 'Bytes' as +Windows-1252-encoded text +-} module Data.Bytes.Text.Windows1252 () where diff --git a/src/Data/Bytes/Types.hs b/src/Data/Bytes/Types.hs index c1e6ad3..b1db982 100644 --- a/src/Data/Bytes/Types.hs +++ b/src/Data/Bytes/Types.hs @@ -1,50 +1,51 @@ -{-# language BangPatterns #-} -{-# language DataKinds #-} -{-# language PolyKinds #-} -{-# language MagicHash #-} -{-# language TypeFamilies #-} -{-# language DuplicateRecordFields #-} -{-# language ScopedTypeVariables #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE PolyKinds #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeFamilies #-} module Data.Bytes.Types - ( Bytes(..) - , Bytes#(..) - , MutableBytes(..) - , UnmanagedBytes(..) - , BytesN(..) - , ByteArrayN(..) + ( Bytes (..) + , Bytes# (..) + , MutableBytes (..) + , UnmanagedBytes (..) + , BytesN (..) + , ByteArrayN (..) ) where -import Data.Bytes.Internal (Bytes(..)) -import Data.Primitive (ByteArray(..),MutableByteArray(..)) -import Data.Primitive.Addr (Addr) -import GHC.TypeNats (Nat,KnownNat,natVal) -import Reps (Bytes#(..)) +import Data.Bytes.Internal (Bytes (..)) import Data.Bytes.Internal.Show (showsSlice) -import Data.Proxy (Proxy(Proxy)) +import Data.Primitive (ByteArray (..), MutableByteArray (..)) +import Data.Primitive.Addr (Addr) +import Data.Proxy (Proxy (Proxy)) import GHC.Natural (naturalToInteger) +import GHC.TypeNats (KnownNat, Nat, natVal) +import Reps (Bytes# (..)) --- | A slice of a 'ByteArray' whose compile-time-known length is represented --- by a phantom type variable. Consumers of this data constructor must be --- careful to preserve the expected invariant. +{- | A slice of a 'ByteArray' whose compile-time-known length is represented +by a phantom type variable. Consumers of this data constructor must be +careful to preserve the expected invariant. +-} data BytesN (n :: Nat) = BytesN { array :: {-# UNPACK #-} !ByteArray , offset :: {-# UNPACK #-} !Int } -instance KnownNat n => Show (BytesN n) where +instance (KnownNat n) => Show (BytesN n) where showsPrec _ (BytesN arr off) s = let len = fromInteger (naturalToInteger (natVal (Proxy :: Proxy n))) in showsSlice arr off len s --- | A 'ByteArray' whose compile-time-known length is represented --- by a phantom type variable. Consumers of this data constructor must be --- careful to preserve the expected invariant. +{- | A 'ByteArray' whose compile-time-known length is represented +by a phantom type variable. Consumers of this data constructor must be +careful to preserve the expected invariant. +-} newtype ByteArrayN (n :: Nat) = ByteArrayN { array :: ByteArray } -instance KnownNat n => Show (ByteArrayN n) where +instance (KnownNat n) => Show (ByteArrayN n) where showsPrec _ (ByteArrayN arr) s = let len = fromInteger (naturalToInteger (natVal (Proxy :: Proxy n))) in showsSlice arr 0 len s diff --git a/test/Main.hs b/test/Main.hs index ab9b2e5..e4fd9fb 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1,37 +1,40 @@ -{-# language BangPatterns #-} -{-# language MultiWayIf #-} -{-# language NumDecimals #-} -{-# language OverloadedStrings #-} -{-# language ScopedTypeVariables #-} -{-# language TypeApplications #-} - +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TypeApplications #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -import Data.Bytes.Chunks (Chunks(ChunksNil,ChunksCons)) -import Data.Bytes.Types (Bytes(Bytes)) +import Control.Monad.Trans.Writer (Writer, tell) +import Data.Bytes.Chunks (Chunks (ChunksCons, ChunksNil)) +import Data.Bytes.Types (Bytes (Bytes)) import Data.Char (ord) import Data.Primitive (ByteArray) -import Data.Proxy (Proxy(..)) -import Data.Word (Word8) -import Test.Tasty (defaultMain,testGroup,TestTree) -import Test.Tasty.HUnit ((@=?),testCase) -import Test.Tasty.QuickCheck ((===),testProperty,property,Discard(Discard)) -import Test.Tasty.QuickCheck ((==>),Arbitrary) -import Test.Tasty.QuickCheck (ASCIIString(ASCIIString)) -import Control.Monad.Trans.Writer (Writer,tell) +import Data.Proxy (Proxy (..)) import Data.Text (Text) +import Data.Word (Word8) +import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty.HUnit (testCase, (@=?)) +import Test.Tasty.QuickCheck + ( ASCIIString (ASCIIString) + , Arbitrary + , Discard (Discard) + , property + , testProperty + , (===) + , (==>) + ) -import qualified Data.Text as Text -import qualified Data.Text.Encoding as Text.Encoding import qualified Data.ByteString as ByteString import qualified Data.Bytes as Bytes +import qualified Data.Bytes.Chunks as Chunks import qualified Data.Bytes.Text.Ascii as Ascii -import qualified Data.Bytes.Text.Latin1 as Latin1 import qualified Data.Bytes.Text.AsciiExt as AsciiExt -import qualified Data.Bytes.Chunks as Chunks +import qualified Data.Bytes.Text.Latin1 as Latin1 import qualified Data.Foldable as Foldable import qualified Data.List as List import qualified Data.Primitive as PM +import qualified Data.Text as Text +import qualified Data.Text.Encoding as Text.Encoding import qualified GHC.Exts as Exts import qualified Test.QuickCheck.Classes as QCC import qualified Test.Tasty.HUnit as THU @@ -41,303 +44,296 @@ main :: IO () main = defaultMain tests tests :: TestTree -tests = testGroup "Bytes" - [ lawsToTest (QCC.eqLaws (Proxy :: Proxy Bytes)) - , lawsToTest (QCC.ordLaws (Proxy :: Proxy Bytes)) - , lawsToTest (QCC.semigroupLaws (Proxy :: Proxy Bytes)) - , lawsToTest (QCC.monoidLaws (Proxy :: Proxy Bytes)) - , testGroup "toLowerAsciiByteArrayClone" - [ testCase "A" $ THU.assertBool "" $ - Bytes.isPrefixOf (bytes "hey") (bytes "hey man") - ] - , testGroup "isPrefixOf" - [ testCase "A" $ - AsciiExt.toLowerU (bytes "FooBar") - @=? - pack "foobar" - , testCase "B" $ THU.assertBool "" $ - not (Bytes.isPrefixOf (bytes "an") (bytes "hey man")) - ] - , testGroup "isSuffixOf" - [ testCase "A" $ THU.assertBool "" $ - Bytes.isSuffixOf (bytes "an") (bytes "hey man") - , testCase "B" $ THU.assertBool "" $ - not (Bytes.isSuffixOf (bytes "h") (bytes "hey man")) - ] - , testGroup "isInfixOf" - [ testCase "small pattern A" $ THU.assertBool "" $ - Bytes.isInfixOf (bytes "y m") (bytes "hey man") - , testCase "small pattern B" $ THU.assertBool "" $ - Bytes.isInfixOf (bytes "h") (bytes "hey man") - , testCase "small pattern C" $ THU.assertBool "" $ - Bytes.isInfixOf (bytes "an") (bytes "hey man") - , testCase "small pattern D" $ THU.assertBool "" $ - not (Bytes.isInfixOf (bytes "Y M") (bytes "hey man")) - , testCase "large pattern A" $ THU.assertBool "" $ - Bytes.isInfixOf (bytes "Hello, hello!") (bytes "I say hello. Hello, hello! I don't know why you say goodbye, I say hello!") - , testCase "large pattern D" $ THU.assertBool "" $ - not (Bytes.isInfixOf (bytes "Hello, hello!") (bytes "I don't know why you say goodbye, I say hello!")) - , testCase "edge: empty pattern" $ THU.assertBool "" $ - Bytes.isInfixOf (bytes "") (bytes "hello hello!") - , testCase "edge: empty string" $ THU.assertBool "" $ - not (Bytes.isInfixOf (bytes "hello hello!") (bytes "")) - ] - , testGroup "findIndices" - [ testCase "A" $ - Exts.fromList [4] - @=? - Bytes.findIndices (Bytes.fromByteArray (pack "greatest showman")) (Bytes.fromByteArray (pack "the greatest showman of all")) - , testCase "B" $ - Exts.fromList [4] - @=? - Bytes.findIndices (bytes "greatest showman") (bytes "the greatest showman of all") - , testCase "C" $ - Exts.fromList [0,1,2,3,4,5] - @=? - Bytes.findIndices (bytes "a") (bytes "aaaaaa") - , testCase "D" $ - Exts.fromList [0,2,4] - @=? - Bytes.findIndices (bytes "aa") (bytes "aaaaaaa") - ] - , testGroup "replace-spec" - [ testCase "A" $ - Bytes.empty - @=? - Bytes.replace (Bytes.fromByteArray (pack "hello")) (Bytes.fromByteArray (pack "world")) Bytes.empty - , testCase "B" $ - bytes "xzybbcbbc" - @=? - Bytes.replace (bytes "a") (bytes "b") (bytes "xzyabcabc") - , testCase "C" $ - bytes "my favorite month is March!" - @=? - Bytes.replace (bytes "November") (bytes "March") (bytes "my favorite month is November!") - , testCase "D" $ - bytes "Saturn, Saturn, Mars, Saturn" - @=? - Bytes.replace (bytes "Jupiter") (bytes "Saturn") (bytes "Jupiter, Jupiter, Mars, Jupiter") - ] - , testGroup "stripOptionalSuffix" - [ testCase "A" $ - Ascii.fromString "hey m" - @=? - Bytes.stripOptionalSuffix (bytes "an") (bytes "hey man") - , testCase "B" $ - Ascii.fromString "hey man" - @=? - Bytes.stripOptionalSuffix (bytes "h") (bytes "hey man") - ] - , testGroup "longestCommonPrefix" - [ testProperty "finds prefix" $ \(pre :: Bytes) (a :: Bytes) (b :: Bytes) -> - if | Just (wa,_) <- Bytes.uncons a, Just (wb,_) <- Bytes.uncons b, wa /= wb -> - Bytes.longestCommonPrefix (pre <> a) (pre <> b) === pre - | otherwise -> property Discard - , testProperty "finds no prefix" $ \(a :: Bytes) (b :: Bytes) -> - if | Just (wa,_) <- Bytes.uncons a, Just (wb,_) <- Bytes.uncons b, wa /= wb -> - Bytes.longestCommonPrefix a b === mempty - | otherwise -> property Discard - ] - , testGroup "dropWhileEnd" - [ testCase "A" $ - Ascii.fromString "aabbcc" - @=? - Bytes.dropWhileEnd (== c2w 'b') (bytes "aabbccbb") - ] - , testGroup "takeWhileEnd" - [ testCase "A" $ - Ascii.fromString "bb" - @=? - Bytes.takeWhileEnd (== c2w 'b') (bytes "aabbccbb") - , testCase "B" $ - Ascii.fromString "" - @=? - Bytes.takeWhileEnd (/= c2w '\n') (bytes "aabbccbb\n") - , testCase "C" $ - slicedPack [0x1,0x2,0x3] - @=? - Bytes.takeWhileEnd (/= 0x0) (slicedPack [0x1,0x0,0x1,0x2,0x3]) - ] - , testProperty "decodeDecWord" $ \(w :: Word) -> - Just w - === - Latin1.decodeDecWord (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : (Exts.toList (Latin1.fromString (show w)))))) - , testProperty "elem" $ \(x :: Word8) (xs :: [Word8]) -> - List.elem x xs - === - Bytes.elem x (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) - , testProperty "foldl" $ \(x :: Word8) (xs :: [Word8]) -> - List.foldl (-) 0 xs - === - Bytes.foldl (-) 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) - , testProperty "foldlM" $ \(x :: Word8) (xs :: [Word8]) -> - let f acc y = (tell [x] >> pure (acc - y)) :: Writer [Word8] Word8 in - Foldable.foldlM f 0 xs - === - Bytes.foldlM f 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) - , testProperty "foldrM" $ \(x :: Word8) (xs :: [Word8]) -> - let f acc y = (tell [x] >> pure (acc - y)) :: Writer [Word8] Word8 in - Foldable.foldrM f 0 xs - === - Bytes.foldrM f 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) - , testProperty "foldl'" $ \(x :: Word8) (xs :: [Word8]) -> - List.foldl' (-) 0 xs - === - Bytes.foldl' (-) 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) - , testProperty "foldr" $ \(x :: Word8) (xs :: [Word8]) -> - Foldable.foldr (-) 0 xs - === - Bytes.foldr (-) 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) - , testProperty "foldr'" $ \(x :: Word8) (xs :: [Word8]) -> - Foldable.foldr' (-) 0 xs - === - Bytes.foldr' (-) 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) - , testProperty "count" $ \(x :: Word8) (xs :: [Word8]) -> - ByteString.count x (ByteString.pack xs) - === - Bytes.count x (slicedPack xs) - , testProperty "split" $ \(x :: Word8) (xs :: [Word8]) -> - not (List.null xs) - ==> - ByteString.split x (ByteString.pack xs) - === - map bytesToByteString (Bytes.split x (slicedPack xs)) - , testProperty "intercalate" $ \(x :: Bytes) (xs :: [Bytes]) -> - mconcat (List.intersperse x xs) - === - Bytes.intercalate x xs - , testProperty "concatArray" $ \(xs :: [Bytes]) -> - mconcat xs - === - Bytes.concatArray (Exts.fromList xs) - , testProperty "splitNonEmpty" $ \(x :: Word8) (xs :: [Word8]) -> - Bytes.split x (slicedPack xs) - === - Foldable.toList (Bytes.splitNonEmpty x (slicedPack xs)) - , testProperty "splitInit" $ \(x :: Word8) (xs :: [Word8]) -> case xs of - [] -> Bytes.splitInit x (slicedPack xs) === [] - _ -> - List.init (ByteString.split x (ByteString.pack xs)) - === - map bytesToByteString (Bytes.splitInit x (slicedPack xs)) - , testProperty "splitU" $ \(x :: Word8) (xs :: [Word8]) -> - not (List.null xs) - ==> - Bytes.split x (slicedPack xs) - === - map Bytes.fromByteArray (Exts.toList (Bytes.splitU x (slicedPack xs))) - , testCase "splitInit-A" $ - [Ascii.fromString "hello", Ascii.fromString "world"] - @=? - (Bytes.splitInit 0x0A (Ascii.fromString "hello\nworld\n")) - , testCase "splitInit-B" $ - [Ascii.fromString "hello", Ascii.fromString "world"] - @=? - (Bytes.splitInit 0x0A (Ascii.fromString "hello\nworld\nthere")) - , testProperty "replace" $ \(ws :: ASCIIString) (xs :: ASCIIString) (ys :: ASCIIString) (zs :: ASCIIString) -> - let ws' = asciiStringToBytes ws - xs' = asciiStringToBytes xs - ys' = asciiStringToBytes ys - zs' = asciiStringToBytes zs - ws'' = asciiStringToText ws - xs'' = asciiStringToText xs - ys'' = asciiStringToText ys - zs'' = asciiStringToText zs - in case ws of - ASCIIString [] -> property Discard - _ -> - Bytes.replace ws' xs' (ys' <> ws' <> zs') - === - Bytes.fromByteString (Text.Encoding.encodeUtf8 (Text.replace ws'' xs'' (ys'' <> ws'' <> zs''))) - , testProperty "splitEnd1" $ \(x :: Word8) (xs :: [Word8]) -> - case ByteString.split x (ByteString.pack xs) of - [] -> Bytes.splitEnd1 x (slicedPack xs) === Nothing - [_] -> Bytes.splitEnd1 x (slicedPack xs) === Nothing - [y1,z1] -> case Bytes.splitEnd1 x (slicedPack xs) of - Nothing -> property False - Just (y2,z2) -> (y1,z1) === (bytesToByteString y2, bytesToByteString z2) - _ -> property Discard - , testProperty "split1" $ \(x :: Word8) (xs :: [Word8]) -> - case ByteString.split x (ByteString.pack xs) of - [] -> Bytes.split1 x (slicedPack xs) === Nothing - [_] -> Bytes.split1 x (slicedPack xs) === Nothing - [y1,z1] -> case Bytes.split1 x (slicedPack xs) of - Nothing -> property False - Just (y2,z2) -> (y1,z1) === (bytesToByteString y2, bytesToByteString z2) - _ -> property Discard - , testProperty "splitTetragram1A" $ \(w0 :: Word8) (w1 :: Word8) (w2 :: Word8) (w3 :: Word8) (xs :: [Word8]) -> - (w0 /= 0xEF && w1 /= 0xEF && w2 /= 0xEF && w3 /= 0xEF) - ==> - case Bytes.splitTetragram1 w0 w1 w2 w3 (slicedPack (0xEF : w0 : w1 : w2 : w3 : xs)) of - Nothing -> property False - Just (pre,post) -> - (pre,post) === (Exts.fromList [0xEF], Exts.fromList xs) - , testProperty "splitTetragram1B" $ \(w0 :: Word8) (w1 :: Word8) (w2 :: Word8) (w3 :: Word8) (xs :: [Word8]) -> - (w0 /= 0xEF && w1 /= 0xEF && w2 /= 0xEF && w3 /= 0xEF) - ==> - case Bytes.splitTetragram1 w0 w1 w2 w3 (slicedPack (0xEF : 0xEF : 0xEF : 0xEF : w0 : w1 : w2 : w3 : xs)) of - Nothing -> property False - Just (pre,post) -> - (pre,post) === (Exts.fromList (List.replicate 4 0xEF), Exts.fromList xs) - , testProperty "splitTetragram1C" $ \(w0 :: Word8) (w1 :: Word8) (w2 :: Word8) (xs :: [Word8]) -> - (w0 /= 0xEF && w1 /= 0xEF && w2 /= 0xEF) - ==> - case Bytes.splitTetragram1 w0 w1 w2 0xEF (slicedPack (xs ++ [w0, w1, w2, 0xEF])) of - Nothing -> property False - Just (pre,post) -> - (pre,post) === (Exts.fromList xs, mempty) - , testProperty "splitTetragram1D" $ \(w0 :: Word8) (w1 :: Word8) (w2 :: Word8) (xs :: [Word8]) -> - (w0 /= 0xEF && w1 /= 0xEF && w2 /= 0xEF) - ==> - case Bytes.splitTetragram1 w0 w1 w2 0xEF (slicedPack (xs ++ [w0, w1, w2, 0xEF, 0xEF])) of - Nothing -> property False - Just (pre,post) -> - (pre,post) === (Exts.fromList xs, Exts.fromList [0xEF]) - , testProperty "split2" $ \(xs :: [Word8]) (ys :: [Word8]) (zs :: [Word8]) -> - (all (/=0xEF) xs && all (/=0xEF) ys && all (/=0xEF) zs) - ==> - case Bytes.split2 0xEF (slicedPack (xs ++ [0xEF] ++ ys ++ [0xEF] ++ zs)) of - Just r -> r === (slicedPack xs, slicedPack ys, slicedPack zs) - Nothing -> property False - , testProperty "split3" $ \(ws :: [Word8]) (xs :: [Word8]) (ys :: [Word8]) (zs :: [Word8])-> - (all (/=0xEF) ws && all (/=0xEF) xs && all (/=0xEF) ys && all (/=0xEF) zs) - ==> - case Bytes.split3 0xEF (slicedPack (ws ++ [0xEF] ++ xs ++ [0xEF] ++ ys ++ [0xEF] ++ zs)) of - Just r -> r === (slicedPack ws, slicedPack xs, slicedPack ys, slicedPack zs) - Nothing -> property False - , testProperty "split4" $ \(ws :: [Word8]) (xs :: [Word8]) (ys :: [Word8]) (zs :: [Word8]) (ps :: [Word8]) -> - (all (/=0xEF) ws && all (/=0xEF) xs && all (/=0xEF) ys && all (/=0xEF) zs && all (/=0xEF) ps) - ==> - case Bytes.split4 0xEF (slicedPack (ws ++ [0xEF] ++ xs ++ [0xEF] ++ ys ++ [0xEF] ++ zs ++ [0xEF] ++ ps)) of - Just r -> r === (slicedPack ws, slicedPack xs, slicedPack ys, slicedPack zs, slicedPack ps) - Nothing -> property False - , testGroup "FNV-1a" - [ testGroup "32-bit" - [ testCase "empty" (Bytes.fnv1a32 Bytes.empty @=? 0x811c9dc5) - , testCase "a" (Bytes.fnv1a32 (bytes "a") @=? 0xe40c292c) - , testCase "foobar" (Bytes.fnv1a32 (bytes "foobar") @=? 0xbf9cf968) - ] - , testGroup "64-bit" - [ testCase "empty" (Bytes.fnv1a64 Bytes.empty @=? 0xcbf29ce484222325) - , testCase "a" (Bytes.fnv1a64 (bytes "a") @=? 0xaf63dc4c8601ec8c) - , testCase "foobar" (Bytes.fnv1a64 (bytes "foobar") @=? 0x85944171f73967e8) - , testCase "google.com" (Bytes.fnv1a64 (bytes "google.com") @=? 0xe1a2c1ae38dcdf45) - ] - ] - , testGroup "Chunks" - [ lawsToTest (QCC.eqLaws (Proxy :: Proxy Chunks)) - , lawsToTest (QCC.semigroupLaws (Proxy :: Proxy Chunks)) - , lawsToTest (QCC.monoidLaws (Proxy :: Proxy Chunks)) - , testGroup "concatenation" - [ testProperty "concat=concatU" $ \(c :: Chunks) -> - Chunks.concat c === Bytes.fromByteArray (Chunks.concatU c) - , testProperty "concat-singleton" $ \(b :: Bytes) -> - Chunks.concat (ChunksCons b ChunksNil) === b - , testProperty "concatU-singleton" $ \(b :: Bytes) -> - Chunks.concatU (ChunksCons b ChunksNil) === Bytes.toByteArray b - ] +tests = + testGroup + "Bytes" + [ lawsToTest (QCC.eqLaws (Proxy :: Proxy Bytes)) + , lawsToTest (QCC.ordLaws (Proxy :: Proxy Bytes)) + , lawsToTest (QCC.semigroupLaws (Proxy :: Proxy Bytes)) + , lawsToTest (QCC.monoidLaws (Proxy :: Proxy Bytes)) + , testGroup + "toLowerAsciiByteArrayClone" + [ testCase "A" $ + THU.assertBool "" $ + Bytes.isPrefixOf (bytes "hey") (bytes "hey man") + ] + , testGroup + "isPrefixOf" + [ testCase "A" $ + AsciiExt.toLowerU (bytes "FooBar") + @=? pack "foobar" + , testCase "B" $ + THU.assertBool "" $ + not (Bytes.isPrefixOf (bytes "an") (bytes "hey man")) + ] + , testGroup + "isSuffixOf" + [ testCase "A" $ + THU.assertBool "" $ + Bytes.isSuffixOf (bytes "an") (bytes "hey man") + , testCase "B" $ + THU.assertBool "" $ + not (Bytes.isSuffixOf (bytes "h") (bytes "hey man")) + ] + , testGroup + "isInfixOf" + [ testCase "small pattern A" $ + THU.assertBool "" $ + Bytes.isInfixOf (bytes "y m") (bytes "hey man") + , testCase "small pattern B" $ + THU.assertBool "" $ + Bytes.isInfixOf (bytes "h") (bytes "hey man") + , testCase "small pattern C" $ + THU.assertBool "" $ + Bytes.isInfixOf (bytes "an") (bytes "hey man") + , testCase "small pattern D" $ + THU.assertBool "" $ + not (Bytes.isInfixOf (bytes "Y M") (bytes "hey man")) + , testCase "large pattern A" $ + THU.assertBool "" $ + Bytes.isInfixOf (bytes "Hello, hello!") (bytes "I say hello. Hello, hello! I don't know why you say goodbye, I say hello!") + , testCase "large pattern D" $ + THU.assertBool "" $ + not (Bytes.isInfixOf (bytes "Hello, hello!") (bytes "I don't know why you say goodbye, I say hello!")) + , testCase "edge: empty pattern" $ + THU.assertBool "" $ + Bytes.isInfixOf (bytes "") (bytes "hello hello!") + , testCase "edge: empty string" $ + THU.assertBool "" $ + not (Bytes.isInfixOf (bytes "hello hello!") (bytes "")) + ] + , testGroup + "findIndices" + [ testCase "A" $ + Exts.fromList [4] + @=? Bytes.findIndices (Bytes.fromByteArray (pack "greatest showman")) (Bytes.fromByteArray (pack "the greatest showman of all")) + , testCase "B" $ + Exts.fromList [4] + @=? Bytes.findIndices (bytes "greatest showman") (bytes "the greatest showman of all") + , testCase "C" $ + Exts.fromList [0, 1, 2, 3, 4, 5] + @=? Bytes.findIndices (bytes "a") (bytes "aaaaaa") + , testCase "D" $ + Exts.fromList [0, 2, 4] + @=? Bytes.findIndices (bytes "aa") (bytes "aaaaaaa") + ] + , testGroup + "replace-spec" + [ testCase "A" $ + Bytes.empty + @=? Bytes.replace (Bytes.fromByteArray (pack "hello")) (Bytes.fromByteArray (pack "world")) Bytes.empty + , testCase "B" $ + bytes "xzybbcbbc" + @=? Bytes.replace (bytes "a") (bytes "b") (bytes "xzyabcabc") + , testCase "C" $ + bytes "my favorite month is March!" + @=? Bytes.replace (bytes "November") (bytes "March") (bytes "my favorite month is November!") + , testCase "D" $ + bytes "Saturn, Saturn, Mars, Saturn" + @=? Bytes.replace (bytes "Jupiter") (bytes "Saturn") (bytes "Jupiter, Jupiter, Mars, Jupiter") + ] + , testGroup + "stripOptionalSuffix" + [ testCase "A" $ + Ascii.fromString "hey m" + @=? Bytes.stripOptionalSuffix (bytes "an") (bytes "hey man") + , testCase "B" $ + Ascii.fromString "hey man" + @=? Bytes.stripOptionalSuffix (bytes "h") (bytes "hey man") + ] + , testGroup + "longestCommonPrefix" + [ testProperty "finds prefix" $ \(pre :: Bytes) (a :: Bytes) (b :: Bytes) -> + if + | Just (wa, _) <- Bytes.uncons a + , Just (wb, _) <- Bytes.uncons b + , wa /= wb -> + Bytes.longestCommonPrefix (pre <> a) (pre <> b) === pre + | otherwise -> property Discard + , testProperty "finds no prefix" $ \(a :: Bytes) (b :: Bytes) -> + if + | Just (wa, _) <- Bytes.uncons a + , Just (wb, _) <- Bytes.uncons b + , wa /= wb -> + Bytes.longestCommonPrefix a b === mempty + | otherwise -> property Discard + ] + , testGroup + "dropWhileEnd" + [ testCase "A" $ + Ascii.fromString "aabbcc" + @=? Bytes.dropWhileEnd (== c2w 'b') (bytes "aabbccbb") + ] + , testGroup + "takeWhileEnd" + [ testCase "A" $ + Ascii.fromString "bb" + @=? Bytes.takeWhileEnd (== c2w 'b') (bytes "aabbccbb") + , testCase "B" $ + Ascii.fromString "" + @=? Bytes.takeWhileEnd (/= c2w '\n') (bytes "aabbccbb\n") + , testCase "C" $ + slicedPack [0x1, 0x2, 0x3] + @=? Bytes.takeWhileEnd (/= 0x0) (slicedPack [0x1, 0x0, 0x1, 0x2, 0x3]) + ] + , testProperty "decodeDecWord" $ \(w :: Word) -> + Just w + === Latin1.decodeDecWord (Bytes.unsafeDrop 1 (Exts.fromList (0xFF : (Exts.toList (Latin1.fromString (show w)))))) + , testProperty "elem" $ \(x :: Word8) (xs :: [Word8]) -> + List.elem x xs + === Bytes.elem x (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) + , testProperty "foldl" $ \(x :: Word8) (xs :: [Word8]) -> + List.foldl (-) 0 xs + === Bytes.foldl (-) 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) + , testProperty "foldlM" $ \(x :: Word8) (xs :: [Word8]) -> + let f acc y = (tell [x] >> pure (acc - y)) :: Writer [Word8] Word8 + in Foldable.foldlM f 0 xs + === Bytes.foldlM f 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) + , testProperty "foldrM" $ \(x :: Word8) (xs :: [Word8]) -> + let f acc y = (tell [x] >> pure (acc - y)) :: Writer [Word8] Word8 + in Foldable.foldrM f 0 xs + === Bytes.foldrM f 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) + , testProperty "foldl'" $ \(x :: Word8) (xs :: [Word8]) -> + List.foldl' (-) 0 xs + === Bytes.foldl' (-) 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) + , testProperty "foldr" $ \(x :: Word8) (xs :: [Word8]) -> + Foldable.foldr (-) 0 xs + === Bytes.foldr (-) 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) + , testProperty "foldr'" $ \(x :: Word8) (xs :: [Word8]) -> + Foldable.foldr' (-) 0 xs + === Bytes.foldr' (-) 0 (Bytes.unsafeDrop 1 (Exts.fromList (x : xs))) + , testProperty "count" $ \(x :: Word8) (xs :: [Word8]) -> + ByteString.count x (ByteString.pack xs) + === Bytes.count x (slicedPack xs) + , testProperty "split" $ \(x :: Word8) (xs :: [Word8]) -> + not (List.null xs) + ==> ByteString.split x (ByteString.pack xs) + === map bytesToByteString (Bytes.split x (slicedPack xs)) + , testProperty "intercalate" $ \(x :: Bytes) (xs :: [Bytes]) -> + mconcat (List.intersperse x xs) + === Bytes.intercalate x xs + , testProperty "concatArray" $ \(xs :: [Bytes]) -> + mconcat xs + === Bytes.concatArray (Exts.fromList xs) + , testProperty "splitNonEmpty" $ \(x :: Word8) (xs :: [Word8]) -> + Bytes.split x (slicedPack xs) + === Foldable.toList (Bytes.splitNonEmpty x (slicedPack xs)) + , testProperty "splitInit" $ \(x :: Word8) (xs :: [Word8]) -> case xs of + [] -> Bytes.splitInit x (slicedPack xs) === [] + _ -> + List.init (ByteString.split x (ByteString.pack xs)) + === map bytesToByteString (Bytes.splitInit x (slicedPack xs)) + , testProperty "splitU" $ \(x :: Word8) (xs :: [Word8]) -> + not (List.null xs) + ==> Bytes.split x (slicedPack xs) + === map Bytes.fromByteArray (Exts.toList (Bytes.splitU x (slicedPack xs))) + , testCase "splitInit-A" $ + [Ascii.fromString "hello", Ascii.fromString "world"] + @=? (Bytes.splitInit 0x0A (Ascii.fromString "hello\nworld\n")) + , testCase "splitInit-B" $ + [Ascii.fromString "hello", Ascii.fromString "world"] + @=? (Bytes.splitInit 0x0A (Ascii.fromString "hello\nworld\nthere")) + , testProperty "replace" $ \(ws :: ASCIIString) (xs :: ASCIIString) (ys :: ASCIIString) (zs :: ASCIIString) -> + let ws' = asciiStringToBytes ws + xs' = asciiStringToBytes xs + ys' = asciiStringToBytes ys + zs' = asciiStringToBytes zs + ws'' = asciiStringToText ws + xs'' = asciiStringToText xs + ys'' = asciiStringToText ys + zs'' = asciiStringToText zs + in case ws of + ASCIIString [] -> property Discard + _ -> + Bytes.replace ws' xs' (ys' <> ws' <> zs') + === Bytes.fromByteString (Text.Encoding.encodeUtf8 (Text.replace ws'' xs'' (ys'' <> ws'' <> zs''))) + , testProperty "splitEnd1" $ \(x :: Word8) (xs :: [Word8]) -> + case ByteString.split x (ByteString.pack xs) of + [] -> Bytes.splitEnd1 x (slicedPack xs) === Nothing + [_] -> Bytes.splitEnd1 x (slicedPack xs) === Nothing + [y1, z1] -> case Bytes.splitEnd1 x (slicedPack xs) of + Nothing -> property False + Just (y2, z2) -> (y1, z1) === (bytesToByteString y2, bytesToByteString z2) + _ -> property Discard + , testProperty "split1" $ \(x :: Word8) (xs :: [Word8]) -> + case ByteString.split x (ByteString.pack xs) of + [] -> Bytes.split1 x (slicedPack xs) === Nothing + [_] -> Bytes.split1 x (slicedPack xs) === Nothing + [y1, z1] -> case Bytes.split1 x (slicedPack xs) of + Nothing -> property False + Just (y2, z2) -> (y1, z1) === (bytesToByteString y2, bytesToByteString z2) + _ -> property Discard + , testProperty "splitTetragram1A" $ \(w0 :: Word8) (w1 :: Word8) (w2 :: Word8) (w3 :: Word8) (xs :: [Word8]) -> + (w0 /= 0xEF && w1 /= 0xEF && w2 /= 0xEF && w3 /= 0xEF) + ==> case Bytes.splitTetragram1 w0 w1 w2 w3 (slicedPack (0xEF : w0 : w1 : w2 : w3 : xs)) of + Nothing -> property False + Just (pre, post) -> + (pre, post) === (Exts.fromList [0xEF], Exts.fromList xs) + , testProperty "splitTetragram1B" $ \(w0 :: Word8) (w1 :: Word8) (w2 :: Word8) (w3 :: Word8) (xs :: [Word8]) -> + (w0 /= 0xEF && w1 /= 0xEF && w2 /= 0xEF && w3 /= 0xEF) + ==> case Bytes.splitTetragram1 w0 w1 w2 w3 (slicedPack (0xEF : 0xEF : 0xEF : 0xEF : w0 : w1 : w2 : w3 : xs)) of + Nothing -> property False + Just (pre, post) -> + (pre, post) === (Exts.fromList (List.replicate 4 0xEF), Exts.fromList xs) + , testProperty "splitTetragram1C" $ \(w0 :: Word8) (w1 :: Word8) (w2 :: Word8) (xs :: [Word8]) -> + (w0 /= 0xEF && w1 /= 0xEF && w2 /= 0xEF) + ==> case Bytes.splitTetragram1 w0 w1 w2 0xEF (slicedPack (xs ++ [w0, w1, w2, 0xEF])) of + Nothing -> property False + Just (pre, post) -> + (pre, post) === (Exts.fromList xs, mempty) + , testProperty "splitTetragram1D" $ \(w0 :: Word8) (w1 :: Word8) (w2 :: Word8) (xs :: [Word8]) -> + (w0 /= 0xEF && w1 /= 0xEF && w2 /= 0xEF) + ==> case Bytes.splitTetragram1 w0 w1 w2 0xEF (slicedPack (xs ++ [w0, w1, w2, 0xEF, 0xEF])) of + Nothing -> property False + Just (pre, post) -> + (pre, post) === (Exts.fromList xs, Exts.fromList [0xEF]) + , testProperty "split2" $ \(xs :: [Word8]) (ys :: [Word8]) (zs :: [Word8]) -> + (all (/= 0xEF) xs && all (/= 0xEF) ys && all (/= 0xEF) zs) + ==> case Bytes.split2 0xEF (slicedPack (xs ++ [0xEF] ++ ys ++ [0xEF] ++ zs)) of + Just r -> r === (slicedPack xs, slicedPack ys, slicedPack zs) + Nothing -> property False + , testProperty "split3" $ \(ws :: [Word8]) (xs :: [Word8]) (ys :: [Word8]) (zs :: [Word8]) -> + (all (/= 0xEF) ws && all (/= 0xEF) xs && all (/= 0xEF) ys && all (/= 0xEF) zs) + ==> case Bytes.split3 0xEF (slicedPack (ws ++ [0xEF] ++ xs ++ [0xEF] ++ ys ++ [0xEF] ++ zs)) of + Just r -> r === (slicedPack ws, slicedPack xs, slicedPack ys, slicedPack zs) + Nothing -> property False + , testProperty "split4" $ \(ws :: [Word8]) (xs :: [Word8]) (ys :: [Word8]) (zs :: [Word8]) (ps :: [Word8]) -> + (all (/= 0xEF) ws && all (/= 0xEF) xs && all (/= 0xEF) ys && all (/= 0xEF) zs && all (/= 0xEF) ps) + ==> case Bytes.split4 0xEF (slicedPack (ws ++ [0xEF] ++ xs ++ [0xEF] ++ ys ++ [0xEF] ++ zs ++ [0xEF] ++ ps)) of + Just r -> r === (slicedPack ws, slicedPack xs, slicedPack ys, slicedPack zs, slicedPack ps) + Nothing -> property False + , testGroup + "FNV-1a" + [ testGroup + "32-bit" + [ testCase "empty" (Bytes.fnv1a32 Bytes.empty @=? 0x811c9dc5) + , testCase "a" (Bytes.fnv1a32 (bytes "a") @=? 0xe40c292c) + , testCase "foobar" (Bytes.fnv1a32 (bytes "foobar") @=? 0xbf9cf968) + ] + , testGroup + "64-bit" + [ testCase "empty" (Bytes.fnv1a64 Bytes.empty @=? 0xcbf29ce484222325) + , testCase "a" (Bytes.fnv1a64 (bytes "a") @=? 0xaf63dc4c8601ec8c) + , testCase "foobar" (Bytes.fnv1a64 (bytes "foobar") @=? 0x85944171f73967e8) + , testCase "google.com" (Bytes.fnv1a64 (bytes "google.com") @=? 0xe1a2c1ae38dcdf45) + ] + ] + , testGroup + "Chunks" + [ lawsToTest (QCC.eqLaws (Proxy :: Proxy Chunks)) + , lawsToTest (QCC.semigroupLaws (Proxy :: Proxy Chunks)) + , lawsToTest (QCC.monoidLaws (Proxy :: Proxy Chunks)) + , testGroup + "concatenation" + [ testProperty "concat=concatU" $ \(c :: Chunks) -> + Chunks.concat c === Bytes.fromByteArray (Chunks.concatU c) + , testProperty "concat-singleton" $ \(b :: Bytes) -> + Chunks.concat (ChunksCons b ChunksNil) === b + , testProperty "concatU-singleton" $ \(b :: Bytes) -> + Chunks.concatU (ChunksCons b ChunksNil) === Bytes.toByteArray b + ] + ] ] - ] bytes :: String -> Bytes bytes s = let b = pack ('x' : s) in Bytes b 1 (PM.sizeofByteArray b - 1) @@ -359,8 +355,8 @@ bytesToByteString = ByteString.pack . Bytes.foldr (:) [] instance Arbitrary Bytes where arbitrary = do xs :: [Word8] <- TQC.arbitrary - front <- TQC.choose (0,2) - back <- TQC.choose (0,2) + front <- TQC.choose (0, 2) + back <- TQC.choose (0, 2) let frontPad = replicate front (254 :: Word8) let backPad = replicate back (254 :: Word8) let raw = Exts.fromList (frontPad ++ xs ++ backPad) @@ -369,13 +365,17 @@ instance Arbitrary Bytes where instance Arbitrary Chunks where arbitrary = do xs :: [[Word8]] <- TQC.arbitrary - let ys = map - (\x -> Exts.fromList ([255] ++ x ++ [255])) - xs - zs = foldr - (\b cs -> - ChunksCons (Bytes b 1 (PM.sizeofByteArray b - 2)) cs - ) ChunksNil ys + let ys = + map + (\x -> Exts.fromList ([255] ++ x ++ [255])) + xs + zs = + foldr + ( \b cs -> + ChunksCons (Bytes b 1 (PM.sizeofByteArray b - 2)) cs + ) + ChunksNil + ys pure zs lawsToTest :: QCC.Laws -> TestTree