From a1e22f13a8b0366dca85b465f3c709c0a21fd925 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 19 Apr 2022 17:29:25 +0100 Subject: [PATCH 01/28] Initial port of Scala CHAMP HashMap --- .../cats/compat/HashMapCompat.scala | 59 + .../scala-2.13+/cats/data/HashMapCompat.scala | 44 + core/src/main/scala/cats/Foldable.scala | 7 +- core/src/main/scala/cats/data/HashMap.scala | 1152 +++++++++++++++++ .../cats/kernel/compat/HashCompat.scala | 21 + .../cats/kernel/compat/HashCompat.scala | 22 + .../cats/laws/discipline/arbitrary.scala | 10 + .../test/scala/cats/tests/HashMapSuite.scala | 292 +++++ 8 files changed, 1605 insertions(+), 2 deletions(-) create mode 100644 core/src/main/scala-2.12/cats/compat/HashMapCompat.scala create mode 100644 core/src/main/scala-2.13+/cats/data/HashMapCompat.scala create mode 100644 core/src/main/scala/cats/data/HashMap.scala create mode 100644 tests/shared/src/test/scala/cats/tests/HashMapSuite.scala diff --git a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala new file mode 100644 index 0000000000..d08f14fd87 --- /dev/null +++ b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2015 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package cats.data + +private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => + + /** + * Creates a new map with all key-value pairs of this map, and all key-value pairs of `traversable`. + * + * @param traversable the collection of key-value pairs to be added. + * @return a new map that contains all key-value pairs of this map and `traversable`. + */ + final def concat[VV >: V](traversable: TraversableOnce[(K, VV)]): HashMap[K, VV] = { + val newRootNode = traversable.foldLeft(rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + node.add(k, self.hashKey.hash(k), v, 0) + } + + if (newRootNode eq rootNode) + this + else + new HashMap(newRootNode) + } + + /** + * Creates a new map with all key-value pairs of this map, and all key-value pairs of `map`. + * + * @param traversable the collection of key-value pairs to be added. + * @return a new map that contains all key-value pairs of this map and `traversable`. + */ + final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = { + val newRootNode = hm.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + node.add(k, self.hashKey.hash(k), v, 0) + } + + if (newRootNode eq self.rootNode) + this + else + new HashMap(newRootNode) + } +} diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala new file mode 100644 index 0000000000..4e779ae171 --- /dev/null +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2015 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package cats.data + +private[data] trait HashMapCompat[K, +V] extends IterableOnce[(K, V)] { self: HashMap[K, V] => + + override def knownSize = self.size + + /** + * Creates a new map with all key-value pairs of this map, and all key-value pairs of `iterable`. + * + * @param iterable the collection of key-value pairs to be added. + * @return a new map that contains all key-value pairs of this map and `iterable`. + */ + final def concat[VV >: V](iterable: IterableOnce[(K, VV)]): HashMap[K, VV] = { + val newRootNode = iterable.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + node.add(k, self.hashKey.hash(k), v, 0) + } + + if (newRootNode eq self.rootNode) + this + else + new HashMap(newRootNode) + } +} diff --git a/core/src/main/scala/cats/Foldable.scala b/core/src/main/scala/cats/Foldable.scala index 01cd784428..f42839cff8 100644 --- a/core/src/main/scala/cats/Foldable.scala +++ b/core/src/main/scala/cats/Foldable.scala @@ -952,11 +952,14 @@ trait Foldable[F[_]] extends UnorderedFoldable[F] with FoldableNFunctions[F] { s object Foldable { private val sentinel: Function1[Any, Any] = new scala.runtime.AbstractFunction1[Any, Any] { def apply(a: Any) = this } - def iterateRight[A, B](iterable: Iterable[A], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = { + def iterateRight[A, B](iterable: Iterable[A], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = + iterateRight(iterable.iterator, lb)(f) + + private[cats] def iterateRight[A, B](iterator: Iterator[A], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = { def loop(it: Iterator[A]): Eval[B] = Eval.defer(if (it.hasNext) f(it.next(), loop(it)) else lb) - Eval.always(iterable.iterator).flatMap(loop) + Eval.always(iterator).flatMap(loop) } def iterateRightDefer[G[_]: Defer, A, B](iterable: Iterable[A], lb: G[B])(f: (A, G[B]) => G[B]): G[B] = { diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala new file mode 100644 index 0000000000..bead2380e2 --- /dev/null +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -0,0 +1,1152 @@ +/* + * Copyright (c) 2015 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package cats.data + +import cats.Always +import cats.CommutativeApplicative +import cats.Eval +import cats.Foldable +import cats.Semigroup +import cats.Show +import cats.UnorderedTraverse +import cats.kernel.CommutativeMonoid +import cats.kernel.CommutativeSemigroup +import cats.kernel.Monoid +import cats.kernel.Eq +import cats.kernel.Hash +import cats.kernel.instances.StaticMethods +import cats.syntax.eq._ +import java.util.Arrays + +/** + * An immutable hash map using [[cats.kernel.Hash]] for hashing. + * + * Implemented using the CHAMP encoding. + * @see [[https://michael.steindorfer.name/publications/phd-thesis-efficient-immutable-collections.pdf Efficient Immutable Collections]] + * + * @tparam K the type of the keys contained in this hash map. + * @tparam V the type of the values contained in this hash map. + * @param hashKey the [[cats.kernel.Hash]] instance used for hashing keys. + */ +final class HashMap[K, +V] private[data] (private val rootNode: HashMap.Node[K, V])(implicit val hashKey: Hash[K]) + extends HashMapCompat[K, V] { + + /** + * An iterator for this map that can be used only once. + * + * @return an iterator that iterates through the key-value pairs of this map. + */ + final def iterator: Iterator[(K, V)] = + new HashMap.Iterator(rootNode) + + /** + * A reverse iterator for this map that can be used only once. + * + * @return an iterator that iterates through this map in the reverse order of [[HashMap#iterator]]. + */ + final def reverseIterator: Iterator[(K, V)] = + new HashMap.ReverseIterator(rootNode) + + /** + * The size of this map. + * + * @return the number of elements in this map. + */ + final def size: Int = rootNode.size + + /** + * Tests whether the map is empty. + * + * @return `true` if the map contains no elements, `false` otherwise. + */ + final def isEmpty: Boolean = size == 0 + + /** + * Tests whether the map is not empty. + * + * @return `true` if the map contains at least one element, `false` otherwise. + */ + final def nonEmpty: Boolean = !isEmpty + + /** + * Apply `f` to each key-value pair for its side effects. + * + * @param f the function to apply to each key-value pair. + */ + final def foreach[U](f: (K, V) => U): Unit = + rootNode.foreach(f) + + /** + * Test whether the map contains `key`. + * + * @param key the key to check for map membership. + * @return `true` if the map contains `key`, `false` otherwise. + */ + final def contains(key: K): Boolean = + rootNode.contains(key, hashKey.hash(key), 0) + + /** + * Get the value associated with `key` in this map. + * + * @param key the key to check for map membership. + * @return A [[scala.Some]] containing the value if present, else [[scala.None]]. + */ + final def get(key: K): Option[V] = + rootNode.get(key, hashKey.hash(key), 0) + + /** + * Get the value associated with `key` in this map, or `default` if not present. + * + * @param key the key to check for map membership. + * @param default the value to use in case `key` is not present. + * @return the value if present, else `default`. + */ + final def getOrElse[VV >: V](key: K, default: => VV): VV = + get(key).getOrElse(default) + + /** + * Creates a new map with an additional key-value pair, unless the key is already present, + * in which case the value for `key` is replaced by `value`. + * + * @param key the key to be added. + * @param value the value to be added. + * @return a new map that contains all key-value pairs of this map and that also contains a mapping from `key` to `value`. + */ + final def add[VV >: V](key: K, value: VV): HashMap[K, VV] = { + val keyHash = hashKey.hash(key) + val newRootNode = rootNode.add(key, keyHash, value, 0) + + if (newRootNode eq rootNode) + this + else + new HashMap(newRootNode) + } + + /** + * Creates a new map with the given key removed from the map. + * + * @param key the key to be removed. + * @return a new map that contains all elements of this map but that does not contain `key`. + */ + final def remove(key: K): HashMap[K, V] = { + val keyHash = hashKey.hash(key) + val newRootNode = rootNode.remove(key, keyHash, 0) + + if (newRootNode eq rootNode) + this + else + new HashMap(newRootNode) + } + + /** + * Typesafe equality operator. + * + * This method is similar to [[scala.Any#==]] except that it only allows two [[cats.data.HashMap]] + * values of the same key-value type to be compared to each other, and uses equality provided + * by [[cats.kernel.Eq]] instances, rather than using the universal equality provided by + * [[java.lang.Object#equals]]. + * + * @param that the [[cats.data.HashMap]] to check for equality with this map. + * @param eqValue the [[cats.kernel.Eq]] instance to use for comparing values. + * @return `true` if this map and `that` are equal, `false` otherwise. + */ + final def ===[VV >: V](that: HashMap[K, VV])(implicit eqValue: Eq[VV]): Boolean = + (this eq that) || (this.rootNode === that.rootNode) + + final override def equals(that: Any): Boolean = that match { + case map: HashMap[_, _] => + (this eq map) || (this.rootNode == map.rootNode) + case _ => + false + } + + /** + * Compute a hash code value for this map. + * + * This method is similar to [[java.lang.Object#hashCode]] except that it computes a hash code + * according to [[cats.Hash]] instances. + * + * @param hashValue the [[cats.kernel.Hash]] instance to use for hashing values of type `VV`. + * @return a hash code value for this map. + */ + final def hash[VV >: V](implicit hashValue: Hash[VV]): Int = + StaticMethods.unorderedHash(this.iterator: Iterator[(K, VV)]) + + final override def hashCode(): Int = { + implicit val valueHash = Hash.fromUniversalHashCode[V] + StaticMethods.unorderedHash(this.iterator) + } + + /** + * Typesafe stringification operator. + * + * This method is similar to [[java.lang.Object#toString]] except that it stringifies values according + * to [[cats.Show]] instances. + * + * @param showKey the [[cats.Show]] instance to use for showing keys of type `K`. + * @param showValue the [[cats.Show]] instance to use for showing values of type `V`. + * @return a [[java.lang.String]] representation of this map. + */ + final def show[VV >: V](implicit showKey: Show[K], showValue: Show[VV]): String = + iterator.map { case (k, v) => s"${showKey.show(k)} -> ${showValue.show(v)}" }.mkString("HashMap(", ", ", ")") + + final override def toString() = + iterator.map { case (k, v) => s"$k -> $v" }.mkString("HashMap(", ", ", ")") +} + +object HashMap extends HashMapInstances { + + /** + * Creates a new empty [[cats.data.HashMap]] which uses `hashKey` for hashing. + * + * @param hashKey the [[cats.kernel.Hash]] instance used for hashing keys. + * @return a new empty [[cats.data.HashMap]]. + */ + final def empty[K, V](implicit hashKey: Hash[K]): HashMap[K, V] = + new HashMap[K, V](Node.empty[K, V]) + + /** + * Creates a new [[cats.data.HashMap]] which contains all elements of `kvs`. + * + * @param kvs the key-value pairs to add to the [[cats.data.HashMap]]. + * @param hashKey the [[cats.kernel.Hash]] instance used for hashing keys. + * @return a new [[cats.data.HashMap]] which contains all elements of `kvs`. + */ + final def apply[K, V](kvs: (K, V)*)(implicit hashKey: Hash[K]) = + fromSeq(kvs) + + /** + * Creates a new [[cats.data.HashMap]] which contains all elements of `seq`. + * + * @param seq the sequence of elements to add to the [[cats.data.HashMap]]. + * @param hashKey the [[cats.kernel.Hash]] instance used for hashing values. + * @return a new [[cats.data.HashMap]] which contains all elements of `seq`. + */ + final def fromSeq[K, V](seq: Seq[(K, V)])(implicit hashKey: Hash[K]): HashMap[K, V] = { + val rootNode = seq.foldLeft(Node.empty[K, V]) { case (node, (k, v)) => + node.add(k, hashKey.hash(k), v, 0) + } + new HashMap(rootNode) + } + + sealed abstract private class Node[K, +V] { + + /** + * @return The number of value and node elements in the contents array of this trie node. + */ + def allElements: Int + + /** + * @return The number of value elements in the contents array of this trie node. + */ + def valueElements: Int + + /** + * @return The number of node elements in the contents array of this trie node. + */ + def nodeElements: Int + + /** + * @return the number of value elements in this subtree. + */ + def size: Int + + /** + * @param index the index of the value among the value elements of this trie node. + * @return the key element at the provided `index`. + */ + def getKey(index: Int): K + + /** + * @param index the index of the value among the value elements of this trie node. + * @return the value at the provided `index`. + */ + def getValue(index: Int): V + + /** + * @param index the index of the value among the value elements of this trie node. + * @return the key-value element at the provided `index`. + */ + def getMapping(index: Int): (K, V) + + /** + * @param index the index of the node among the node elements of this trie node. + * @return the node element at the provided `index`. + */ + def getNode(index: Int): Node[K, V] + + /** + * @return a [[scala.Boolean]] indicating whether the current trie node contains any node elements. + */ + def hasNodes: Boolean + + /** + * @return a [[scala.Boolean]] indicating whether the current trie node contains any value elements. + */ + def hasValues: Boolean + + /** + * Apply f to each key-value pair of the current trie node and its sub-nodes for its side effects. + * + * @param f + */ + def foreach[U](f: (K, V) => U): Unit + + /** + * Determines whether the current trie node or its sub-nodes contain the provided key. + * + * @param key the key to query + * @param keyHash the hash of the key to query + * @param depth the 0-indexed depth in the trie structure. + * @return a [[scala.Boolean]] indicating whether this [[HashMap.Node]] or any of its child nodes contains the element. + */ + def contains(key: K, keyHash: Int, depth: Int): Boolean + + /** + * Get the value associated with `key` in the current trie node or its sub-nodes. + * + * @param key the key to query + * @param keyHash the hash of the key to query + * @param depth the 0-indexed depth in the trie structure. + * @return a [[scala.Some]] containing the value if present, else [[scala.None]]. + */ + def get(key: K, keyHash: Int, depth: Int): Option[V] + + /** + * The current trie node updated to add the provided key-value pair. + * + * @param newKey the key to add. + * @param newKeyHash the hash of the key to add. + * @param value the value to add. + * @param depth the 0-indexed depth in the trie structure. + * @return a new [[HashMap.Node]] containing the element to add. + */ + def add[VV >: V](newKey: K, newKeyHash: Int, value: VV, depth: Int): Node[K, VV] + + /** + * The current trie node updated to remove the provided key. + * + * @param removeKey the key to remove. + * @param removeKeyHash the hash of the element to remove. + * @param depth the 0-indexed depth in the trie structure. + * @return a new [[HashMap.Node]] with the element removed. + */ + def remove(removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] + + /** + * Typesafe equality operator. + * + * This method is similar to [[scala.Any#==]] except that it only allows two [[cats.data.HashMap.Node]] + * values of the same key-value type to be compared to each other, and uses equality provided + * by [[cats.kernel.Eq]] instances, rather than using the universal equality provided by + * [[java.lang.Object#equals]]. + * + * @param that the [[cats.data.HashMap.Node]] to check for equality with this node. + * @param eqValue the [[cats.kernel.Eq]] instance to use for comparing values. + * @return `true` if this node and `that` are equal, `false` otherwise. + */ + def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean + + /** + * An approximation of the CHAMP "branch size", used for the deletion algorithm. + * + * The branch size indicates the number of elements transitively reachable from this node, but that is expensive to compute. + * + * There are three important cases when implementing the deletion algorithm: + * - a sub-tree has no elements ([[Node.SizeNone]]) + * - a sub-tree has exactly one element ([[Node.SizeOne]]) + * - a sub-tree has more than one element ([[Node.SizeMany]]) + * + * This approximation assumes that nodes contain many elements (because the deletion algorithm inlines singleton nodes). + * + * @return either [[Node.SizeNone]], [[Node.SizeOne]] or [[Node.SizeMany]] + */ + final def sizeHint = { + if (nodeElements > 0) + Node.SizeMany + else + (valueElements: @annotation.switch) match { + case 0 => Node.SizeNone + case 1 => Node.SizeOne + case _ => Node.SizeMany + } + } + } + + /** + * A CHAMP hash collision node. In the event that the hash codes of multiple elements collide, + * this node type is used to collect all of the colliding elements and implement the [[HashMap.Node]] + * interface at a performance cost compared with a [[HashMap.BitMapNode]]. + * + * @tparam A the type of the elements contained in this node. + * @param collisionHash the hash value at which all of the contents of this node collide. + * @param contents the value elements whose hashes collide. + */ + final private class CollisionNode[K, +V]( + val collisionHash: Int, + val contents: Vector[(K, V)] + )(implicit hashKey: Hash[K]) + extends Node[K, V] { + + final def hasNodes: Boolean = false + + final def hasValues: Boolean = true + + final def allElements: Int = valueElements + + final def valueElements: Int = contents.size + + final def nodeElements: Int = 0 + + final def size: Int = contents.size + + final def foreach[U](f: (K, V) => U): Unit = + contents.foreach(f.tupled) + + final def contains(key: K, keyHash: Int, depth: Int): Boolean = + collisionHash == keyHash && contents.exists { case (k, _) => hashKey.eqv(key, k) } + + final def get(key: K, keyHash: Int, depth: Int): Option[V] = + if (collisionHash != keyHash) None + else contents.collectFirst { case (k, v) if hashKey.eqv(key, k) => v } + + final def getKey(index: Int): K = + contents(index)._1 + + final def getValue(index: Int): V = + contents(index)._2 + + final def getMapping(index: Int): (K, V) = + contents(index) + + final def getNode(index: Int): Node[K, V] = + throw new IndexOutOfBoundsException("No sub-nodes present in hash-collision leaf node.") + + final def add[VV >: V](newKey: K, newKeyHash: Int, newValue: VV, depth: Int): Node[K, VV] = + if (contains(newKey, newKeyHash, depth)) + this + else + new CollisionNode(newKeyHash, contents :+ (newKey -> newValue)) + + final override def remove(key: K, keyHash: Int, depth: Int): Node[K, V] = + if (!contains(key, keyHash, depth)) + this + else { + val newContents = contents.filterNot { case (k, _) => hashKey.eqv(key, k) } + if (newContents.size > 1) + new CollisionNode(collisionHash, newContents) + else { + // This is a singleton node so the depth doesn't matter; + // we only need to index into it to inline the value in our parent node + val mask = Node.maskFrom(collisionHash, 0) + val bitPos = Node.bitPosFrom(mask) + val newContentsArray = new Array[Any](Node.StrideLength * newContents.length) + var i = 0 + while (i < newContents.length) { + val (k, v) = newContents(i) + val keyIndex = Node.StrideLength * i + newContentsArray(keyIndex) = k + newContentsArray(keyIndex + 1) = v + i += 1 + } + new BitMapNode[K, V](bitPos, 0, newContentsArray, newContents.size) + } + } + + final def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean = { + (this eq that) || { + that match { + case node: CollisionNode[_, _] => + (this.collisionHash === node.collisionHash) && + (this.contents.size === node.contents.size) && + this.contents.forall { case (kl, vl) => + node.contents.exists { case (kr, vr) => hashKey.eqv(kl, kr) && eqValue.eqv(vl, vr) } + } + case _ => + false + } + } + } + + final override def equals(that: Any): Boolean = that match { + case node: CollisionNode[_, _] => + (this.collisionHash == node.collisionHash) && + (this.contents.size == node.contents.size) && + this.contents.forall(node.contents.contains) + case _ => + false + } + + final override def toString(): String = { + s"""CollisionNode(hash=${collisionHash}, values=${contents.mkString("[", ",", "]")})""" + } + } + + /** + * A CHAMP bitmap node. Stores value element and node element positions in the `contents` array + * in the `valueMap` and `nodeMap` integer bitmaps. + * + * @tparam A the type of the elements contained in this node. + * @param valueMap integer bitmap indicating the notional positions of value elements in the `contents` array. + * @param nodeMap integer bitmap indicating the notional positions of node elements in the `contents` array. + * @param contents an array of `A` value elements and `Node[A]` sub-node elements. + * @param size the number of value elements in this subtree. + */ + final private class BitMapNode[K, +V]( + val valueMap: Int, + val nodeMap: Int, + val contents: Array[Any], + val size: Int + )(implicit hashKey: Hash[K]) + extends Node[K, V] { + + final def hasValues: Boolean = + valueMap != 0 + + final def hasNodes: Boolean = + nodeMap != 0 + + final def allElements: Int = + valueElements + nodeElements + + final def valueElements: Int = + Integer.bitCount(valueMap) + + final def nodeElements: Int = + Integer.bitCount(nodeMap) + + final private def hasNodeAt(bitPos: Int): Boolean = + (nodeMap & bitPos) != 0 + + final private def hasValueAt(bitPos: Int): Boolean = + (valueMap & bitPos) != 0 + + final def getKey(index: Int): K = + contents(Node.StrideLength * index).asInstanceOf[K] + + final def getValue(index: Int): V = + contents(Node.StrideLength * index + 1).asInstanceOf[V] + + final def getMapping(index: Int): (K, V) = + contents(Node.StrideLength * index).asInstanceOf[K] -> + contents(Node.StrideLength * index + 1).asInstanceOf[V] + + final def getNode(index: Int): Node[K, V] = + contents(contents.length - 1 - index).asInstanceOf[Node[K, V]] + + final def foreach[U](f: (K, V) => U): Unit = { + var i = 0 + while (i < valueElements) { + f.tupled(getMapping(i)) + i += 1 + } + + i = 0 + while (i < nodeElements) { + getNode(i).foreach(f) + i += 1 + } + } + + final def contains(key: K, keyHash: Int, depth: Int): Boolean = { + val mask = Node.maskFrom(keyHash, depth) + val bitPos = Node.bitPosFrom(mask) + + if (hasValueAt(bitPos)) { + val index = Node.indexFrom(valueMap, bitPos) + hashKey.eqv(key, getKey(index)) + } else if (hasNodeAt(bitPos)) { + val index = Node.indexFrom(nodeMap, bitPos) + getNode(index).contains(key, keyHash, depth + 1) + } else { + false + } + } + + final def get(key: K, keyHash: Int, depth: Int): Option[V] = { + val mask = Node.maskFrom(keyHash, depth) + val bitPos = Node.bitPosFrom(mask) + + if (hasValueAt(bitPos)) { + val index = Node.indexFrom(valueMap, bitPos) + if (hashKey.eqv(key, getKey(index))) { + Some(getValue(index)) + } else { + None + } + } else if (hasNodeAt(bitPos)) { + val index = Node.indexFrom(nodeMap, bitPos) + getNode(index).get(key, keyHash, depth + 1) + } else { + None + } + } + + final private def mergeValues[VV >: V]( + left: K, + leftHash: Int, + leftValue: VV, + right: K, + rightHash: Int, + rightValue: VV, + depth: Int + ): Node[K, VV] = { + if (depth >= Node.MaxDepth) { + new CollisionNode[K, VV](leftHash, Vector(left -> leftValue, right -> rightValue)) + } else { + val leftMask = Node.maskFrom(leftHash, depth) + val rightMask = Node.maskFrom(rightHash, depth) + if (leftMask != rightMask) { + val valueMap = Node.bitPosFrom(leftMask) | Node.bitPosFrom(rightMask) + if (leftMask < rightMask) { + new BitMapNode[K, VV](valueMap, 0, Array(left, leftValue, right, rightValue), 2) + } else { + new BitMapNode[K, VV](valueMap, 0, Array(right, rightValue, left, leftValue), 2) + } + } else { + val nodeMap = Node.bitPosFrom(leftMask) + val node = mergeValues(left, leftHash, leftValue, right, rightHash, rightValue, depth + 1) + new BitMapNode[K, VV](0, nodeMap, Array(node), node.size) + } + } + } + + final private def mergeValuesIntoNode[VV >: V]( + bitPos: Int, + left: K, + leftHash: Int, + leftValue: VV, + right: K, + rightHash: Int, + rightValue: VV, + depth: Int + ): Node[K, VV] = { + val newNode = mergeValues(left, leftHash, leftValue, right, rightHash, rightValue, depth) + val valueIndex = Node.StrideLength * Node.indexFrom(valueMap, bitPos) + val nodeIndex = contents.length - Node.StrideLength - Node.indexFrom(nodeMap, bitPos) + val newContents = new Array[Any](contents.length - 1) + + System.arraycopy(contents, 0, newContents, 0, valueIndex) + System.arraycopy(contents, valueIndex + Node.StrideLength, newContents, valueIndex, nodeIndex - valueIndex) + + newContents(nodeIndex) = newNode + + System.arraycopy( + contents, + nodeIndex + Node.StrideLength, + newContents, + nodeIndex + 1, + contents.length - nodeIndex - Node.StrideLength + ) + + new BitMapNode[K, V](valueMap ^ bitPos, nodeMap | bitPos, newContents, size + 1) + } + + final private def replaceNode[VV >: V](index: Int, oldNode: Node[K, VV], newNode: Node[K, VV]): Node[K, VV] = { + val targetIndex = contents.length - 1 - index + val newContents = new Array[Any](contents.length) + System.arraycopy(contents, 0, newContents, 0, contents.length) + newContents(targetIndex) = newNode + new BitMapNode[K, V](valueMap, nodeMap, newContents, size + (newNode.size - oldNode.size)) + } + + final private def updateNode[VV >: V]( + bitPos: Int, + newKey: K, + newKeyHash: Int, + newValue: VV, + depth: Int + ): Node[K, VV] = { + val index = Node.indexFrom(nodeMap, bitPos) + val subNode = getNode(index) + val newSubNode = subNode.add(newKey, newKeyHash, newValue, depth + 1) + + if (newSubNode eq subNode) + this + else + replaceNode(index, subNode, newSubNode) + } + + final private def replaceValueAtIndex[VV >: V](index: Int, newValue: VV): Node[K, VV] = { + val valueIndex = Node.StrideLength * index + 1 + val newContents = new Array[Any](contents.length) + System.arraycopy(contents, 0, newContents, 0, contents.length) + newContents(valueIndex) = newValue + new BitMapNode[K, V](valueMap, nodeMap, newContents, size) + } + + final private def updateKeyValue[VV >: V]( + bitPos: Int, + newKey: K, + newKeyHash: Int, + newValue: VV, + depth: Int + ): Node[K, VV] = { + val index = Node.indexFrom(valueMap, bitPos) + val (existingKey, existingValue) = getMapping(index) + if (hashKey.eqv(existingKey, newKey)) { + replaceValueAtIndex(index, newValue) + } else + mergeValuesIntoNode( + bitPos, + existingKey, + hashKey.hash(existingKey), + existingValue, + newKey, + newKeyHash, + newValue, + depth + 1 + ) + } + + final private def appendKeyValue[VV >: V](bitPos: Int, newKey: K, newValue: VV): Node[K, VV] = { + val index = Node.StrideLength * Node.indexFrom(valueMap, bitPos) + val newContents = new Array[Any](contents.length + Node.StrideLength) + System.arraycopy(contents, 0, newContents, 0, index) + newContents(index) = newKey + newContents(index + 1) = newValue + System.arraycopy(contents, index, newContents, index + Node.StrideLength, contents.length - index) + new BitMapNode[K, V](valueMap | bitPos, nodeMap, newContents, size + 1) + } + + final def add[VV >: V](newKey: K, newKeyHash: Int, newValue: VV, depth: Int): Node[K, VV] = { + val mask = Node.maskFrom(newKeyHash, depth) + val bitPos = Node.bitPosFrom(mask) + + if (hasValueAt(bitPos)) { + updateKeyValue(bitPos, newKey, newKeyHash, newValue, depth) + } else if (hasNodeAt(bitPos)) { + updateNode(bitPos, newKey, newKeyHash, newValue, depth) + } else { + appendKeyValue(bitPos, newKey, newValue) + } + } + + final private def removeKeyValue(bitPos: Int, removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] = { + val index = Node.indexFrom(valueMap, bitPos) + val existingKey = getKey(index) + if (!hashKey.eqv(existingKey, removeKey)) { + this + } else if (allElements == 1) { + Node.empty[K, V] + } else { + val keyIndex = Node.StrideLength * index + val newContents = new Array[Any](contents.length - Node.StrideLength) + + // If this element will be propagated or inlined, calculate the new valueMap at depth - 1 + val newBitPos = + if (allElements == 2 && depth > 0) + Node.bitPosFrom(Node.maskFrom(removeKeyHash, depth - 1)) + else + valueMap ^ bitPos + + System.arraycopy(contents, 0, newContents, 0, keyIndex) + + System.arraycopy( + contents, + keyIndex + Node.StrideLength, + newContents, + keyIndex, + contents.length - keyIndex - Node.StrideLength + ) + + new BitMapNode[K, V](newBitPos, nodeMap, newContents, size - 1) + } + } + + final private def inlineSubNodeKeyValue[VV >: V](bitPos: Int, newSubNode: Node[K, VV]): Node[K, VV] = { + val nodeIndex = contents.length - 1 - Node.indexFrom(nodeMap, bitPos) + val keyIndex = Node.StrideLength * Node.indexFrom(valueMap, bitPos) + val newContents = new Array[Any](contents.length + 1) + val (key, value) = newSubNode.getMapping(0) + + System.arraycopy(contents, 0, newContents, 0, keyIndex) + + newContents(keyIndex) = key + newContents(keyIndex + 1) = value + + System.arraycopy(contents, keyIndex, newContents, keyIndex + Node.StrideLength, nodeIndex - keyIndex) + + System.arraycopy( + contents, + nodeIndex + 1, + newContents, + nodeIndex + Node.StrideLength, + contents.length - nodeIndex - 1 + ) + + new BitMapNode[K, V](valueMap | bitPos, nodeMap ^ bitPos, newContents, size - 1) + } + + final private def removeKeyValueFromSubNode( + bitPos: Int, + removeKey: K, + removeKeyHash: Int, + depth: Int + ): Node[K, V] = { + val index = Node.indexFrom(nodeMap, bitPos) + val subNode = getNode(index) + val newSubNode = subNode.remove(removeKey, removeKeyHash, depth + 1) + + if (newSubNode eq subNode) + this + else if (valueElements == 0 && nodeElements == 1) { + if (newSubNode.sizeHint == Node.SizeOne) { + newSubNode + } else { + replaceNode(index, subNode, newSubNode) + } + } else if (newSubNode.sizeHint == Node.SizeOne) { + inlineSubNodeKeyValue(bitPos, newSubNode) + } else { + replaceNode(index, subNode, newSubNode) + } + } + + final override def remove(removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] = { + val mask = Node.maskFrom(removeKeyHash, depth) + val bitPos = Node.bitPosFrom(mask) + + if (hasValueAt(bitPos)) { + removeKeyValue(bitPos, removeKey, removeKeyHash, depth) + } else if (hasNodeAt(bitPos)) { + removeKeyValueFromSubNode(bitPos, removeKey, removeKeyHash, depth) + } else { + this + } + } + + final override def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean = { + (this eq that) || { + that match { + case node: BitMapNode[_, _] => + (this.valueMap === node.valueMap) && + (this.nodeMap === node.nodeMap) && + (this.size === node.size) && { + var i = 0 + while (i < valueElements) { + val (kl, vl) = getMapping(i) + val (kr, vr) = node.getMapping(i) + if (hashKey.neqv(kl, kr) || eqValue.neqv(vl, vr)) return false + i += 1 + } + i = 0 + while (i < nodeElements) { + if (!(getNode(i).===[VV](node.getNode(i)))) return false + i += 1 + } + true + } + case _ => + false + } + } + } + + final override def equals(that: Any): Boolean = that match { + case node: BitMapNode[_, _] => + (this eq node) || { + (this.valueMap == node.valueMap) && + (this.nodeMap == node.nodeMap) && + (this.size == node.size) && + Arrays.equals( + this.contents.asInstanceOf[Array[Object]], + node.contents.asInstanceOf[Array[Object]] + ) + } + case _ => + false + } + + final override def toString(): String = { + val valueMapStr = + ("0" * Integer.numberOfLeadingZeros(if (valueMap != 0) valueMap else 1)) + Integer.toBinaryString(valueMap) + val nodeMapStr = + ("0" * Integer.numberOfLeadingZeros(if (nodeMap != 0) nodeMap else 1)) + Integer.toBinaryString(nodeMap) + + s"""BitMapNode(valueMap=$valueMapStr, nodeMap=$nodeMapStr, contents=${contents.mkString("[", ", ", "]")})""" + } + } + + object Node { + final val StrideLength = 2 + final val BitPartitionSize = 5 + final val BitPartitionMask = (1 << BitPartitionSize) - 1 + final val MaxDepth = 7 + + final val SizeNone = 0 + final val SizeOne = 1 + final val SizeMany = 2 + + /** + * The `mask` is a 5-bit segment of a 32-bit element hash. + * + * The `depth` value is used to determine which segment of the hash we are currently inspecting by shifting the `elementHash` to the right in [[Node.BitPartitionSize]] bit increments. + * + * A 5-bit segment of the hash can represent numbers 0 to 31, which matches the branching factor of the trie structure. + * + * It represents the notional index of the element in the current trie node. + * + * @param elementHash the hash of the element we are operating on. + * @param depth the depth of the current node in the trie structure. + * @return the relevant 5-bit segment of the `elementHash`. + */ + final def maskFrom(elementHash: Int, depth: Int): Int = + (elementHash >>> (depth * Node.BitPartitionSize)) & BitPartitionMask + + /** + * Sets a single bit at the position of the notional index indicated by `mask`. + * + * Used to determine the bit which represents the notional index of a data value or node in the trie node bitmaps. + * + * @param mask the notional index of an element at this depth in the trie. + * @return an integer with a single bit set at the notional index indicated by `mask`. + */ + final def bitPosFrom(mask: Int): Int = + 1 << mask + + /** + * Calculates the absolute index of an element in the contents array of a trie node. + * + * This is calculated by counting how many bits are set to the right of the notional index in the relevant bitmap. + * + * @param bitMap the bitmap indicating either data value or node positions in the contents array. + * @param bitPos the notional index of the element in the trie node. + * @return the absolute index of an element in the contents array. + */ + final def indexFrom(bitMap: Int, bitPos: Int): Int = + Integer.bitCount(bitMap & (bitPos - 1)) + + /** + * Creates a new empty bitmap node. + * + * @param hash the [[cats.kernel.Hash]] instance to use to hash elements. + * @return a new empty bitmap node. + */ + final def empty[K, V](implicit hashKey: Hash[K]): Node[K, V] = + new BitMapNode[K, V](0, 0, Array.empty[Any], 0) + } + + private[HashMap] class Iterator[K, V] extends scala.collection.AbstractIterator[(K, V)] { + private var currentNode: Node[K, V] = null + + private var currentValuesIndex: Int = 0 + private var currentValuesLength: Int = 0 + + private var currentDepth: Int = -1 + + private val nodeStack: Array[Node[K, V]] = + new Array(Node.MaxDepth) + + private val nodeIndicesAndLengths: Array[Int] = + new Array(Node.MaxDepth * 2) + + def this(rootNode: Node[K, V]) = { + this() + if (rootNode.hasNodes) pushNode(rootNode) + if (rootNode.hasValues) pushValues(rootNode) + } + + final private def pushNode(node: Node[K, V]): Unit = { + currentDepth += 1 + + val cursorIndex = currentDepth * 2 + val lengthIndex = currentDepth * 2 + 1 + + nodeStack(currentDepth) = node + + nodeIndicesAndLengths(cursorIndex) = 0 + nodeIndicesAndLengths(lengthIndex) = node.nodeElements + } + + final private def pushValues(node: Node[K, V]): Unit = { + currentNode = node + currentValuesIndex = 0 + currentValuesLength = node.valueElements + } + + final private def getMoreValues(): Boolean = { + var foundMoreValues = false + + while (!foundMoreValues && currentDepth >= 0) { + val cursorIndex = currentDepth * 2 + val lengthIndex = currentDepth * 2 + 1 + + val nodeIndex = nodeIndicesAndLengths(cursorIndex) + val nodeLength = nodeIndicesAndLengths(lengthIndex) + + if (nodeIndex < nodeLength) { + val nextNode = nodeStack(currentDepth) + .getNode(nodeIndex) + + if (nextNode.hasNodes) { + pushNode(nextNode) + } + + if (nextNode.hasValues) { + pushValues(nextNode) + foundMoreValues = true + } + + nodeIndicesAndLengths(cursorIndex) += 1 + + } else { + currentDepth -= 1 + } + } + + foundMoreValues + } + + final override def hasNext: Boolean = + (currentValuesIndex < currentValuesLength) || getMoreValues() + + final override def next(): (K, V) = { + if (!hasNext) throw new NoSuchElementException + val value = currentNode.getMapping(currentValuesIndex) + currentValuesIndex += 1 + value + } + } + + private[HashMap] class ReverseIterator[K, V] extends scala.collection.AbstractIterator[(K, V)] { + private var currentNode: Node[K, V] = null + + private var currentValuesIndex: Int = -1 + + private var currentDepth: Int = -1 + + private val nodeStack: Array[Node[K, V]] = + new Array(Node.MaxDepth + 1) + + private val nodeIndices: Array[Int] = + new Array(Node.MaxDepth + 1) + + def this(rootNode: Node[K, V]) = { + this() + pushNode(rootNode) + getMoreValues() + } + + final private def pushNode(node: Node[K, V]): Unit = { + currentDepth += 1 + nodeStack(currentDepth) = node + nodeIndices(currentDepth) = node.nodeElements - 1 + } + + final private def pushValues(node: Node[K, V]): Unit = { + currentNode = node + currentValuesIndex = node.valueElements - 1 + } + + final private def getMoreValues(): Boolean = { + var foundMoreValues = false + + while (!foundMoreValues && currentDepth >= 0) { + val nodeIndex = nodeIndices(currentDepth) + nodeIndices(currentDepth) -= 1 + + if (nodeIndex >= 0) { + pushNode(nodeStack(currentDepth).getNode(nodeIndex)) + } else { + val currentNode = nodeStack(currentDepth) + currentDepth -= 1 + if (currentNode.hasValues) { + pushValues(currentNode) + foundMoreValues = true + } + } + } + + foundMoreValues + } + + final override def hasNext: Boolean = + (currentValuesIndex >= 0) || getMoreValues() + + final override def next(): (K, V) = { + if (!hasNext) throw new NoSuchElementException + val value = currentNode.getMapping(currentValuesIndex) + currentValuesIndex -= 1 + value + } + } + +} + +sealed abstract private[data] class HashMapInstances extends HashMapInstances1 { + implicit def catsDataUnorderedTraverseForHashMap[K: Hash]: UnorderedTraverse[HashMap[K, *]] = + new UnorderedTraverse[HashMap[K, *]] { + def unorderedFoldMap[U, V](hm: HashMap[K, U])(f: U => V)(implicit V: CommutativeMonoid[V]): V = + V.combineAll(hm.iterator.map { case (_, u) => f(u) }) + + def unorderedTraverse[G[_], U, V](hashMap: HashMap[K, U])(f: U => G[V])(implicit + G: CommutativeApplicative[G] + ): G[HashMap[K, V]] = { + val emptyHm: Eval[G[HashMap[K, V]]] = + Always(G.pure(HashMap.empty[K, V])) + + val gHashMap = Foldable + .iterateRight(hashMap.iterator, emptyHm) { case ((k, u), hm) => + G.map2Eval(f(u), hm) { (v, map) => + map.add(k, v) + } + } + + gHashMap.value + } + } + + implicit def catsDataCommutativeMonoidForHashMap[K: Hash, V: CommutativeSemigroup]: CommutativeMonoid[HashMap[K, V]] = + new HashMapMonoid[K, V] with CommutativeMonoid[HashMap[K, V]] + + implicit def catsDataShowForHashMap[K: Show, V: Show]: Show[HashMap[K, V]] = + Show.show[HashMap[K, V]](_.show) + + implicit def catsDataHashForHashMap[K, V: Hash]: Hash[HashMap[K, V]] = + new Hash[HashMap[K, V]] { + def hash(hm: HashMap[K, V]): Int = hm.hash + def eqv(x: HashMap[K, V], y: HashMap[K, V]): Boolean = x === y + } +} + +sealed abstract private[data] class HashMapInstances1 { + implicit def catsDataMonoidForHashMap[K: Hash, V: Semigroup]: Monoid[HashMap[K, V]] = + new HashMapMonoid[K, V] +} + +class HashMapMonoid[K: Hash, V](implicit V: Semigroup[V]) extends Monoid[HashMap[K, V]] { + + def empty: HashMap[K, V] = HashMap.empty[K, V] + + def combine(xs: HashMap[K, V], ys: HashMap[K, V]): HashMap[K, V] = { + if (xs.size <= ys.size) { + xs.iterator.foldLeft(ys) { case (my, (k, x)) => + my.add(k, Semigroup.maybeCombine(x, my.get(k))) + } + } else { + ys.iterator.foldLeft(xs) { case (mx, (k, y)) => + mx.add(k, Semigroup.maybeCombine(mx.get(k), y)) + } + } + } +} diff --git a/kernel/src/main/scala-2.12/cats/kernel/compat/HashCompat.scala b/kernel/src/main/scala-2.12/cats/kernel/compat/HashCompat.scala index 5f21b95f99..63631d9d47 100644 --- a/kernel/src/main/scala-2.12/cats/kernel/compat/HashCompat.scala +++ b/kernel/src/main/scala-2.12/cats/kernel/compat/HashCompat.scala @@ -80,6 +80,27 @@ private[kernel] class HashCompat { finalizeHash(h, n) } + // adapted from scala.util.hashing.MurmurHash3 + def unorderedHash[A](xs: TraversableOnce[A])(implicit A: Hash[A]): Int = { + import scala.util.hashing.MurmurHash3._ + var a = 0 + var b = 0 + var c = 1 + var n = 0 + xs.foreach { x => + val h = A.hash(x) + a += h + b ^= h + if (h != 0) c *= h + n += 1 + } + var h = setSeed + h = mix(h, a) + h = mix(h, b) + h = mixLast(h, c) + finalizeHash(h, n) + } + // adapted from scala.util.hashing.MurmurHash3 def orderedHash[A](xs: TraversableOnce[A])(implicit A: Hash[A]): Int = { import scala.util.hashing.MurmurHash3._ diff --git a/kernel/src/main/scala-2.13+/cats/kernel/compat/HashCompat.scala b/kernel/src/main/scala-2.13+/cats/kernel/compat/HashCompat.scala index 7ff34228c3..94782d26b3 100644 --- a/kernel/src/main/scala-2.13+/cats/kernel/compat/HashCompat.scala +++ b/kernel/src/main/scala-2.13+/cats/kernel/compat/HashCompat.scala @@ -100,6 +100,28 @@ private[kernel] class HashCompat { else finalizeHash(h, n) } + // adapted from scala.util.hashing.MurmurHash3 + def unorderedHash[A](xs: IterableOnce[A])(implicit A: Hash[A]): Int = { + import scala.util.hashing.MurmurHash3.{finalizeHash, mix, mixLast, setSeed} + + var a, b, n = 0 + var c = 1 + val iterator = xs.iterator + while (iterator.hasNext) { + val x = iterator.next() + val h = A.hash(x) + a += h + b ^= h + c *= h | 1 + n += 1 + } + var h = setSeed + h = mix(h, a) + h = mix(h, b) + h = mixLast(h, c) + finalizeHash(h, n) + } + // adapted from scala.util.hashing.MurmurHash3 def orderedHash[A](xs: IterableOnce[A])(implicit A: Hash[A]): Int = { import scala.util.hashing.MurmurHash3.{finalizeHash, mix, seqSeed} diff --git a/laws/src/main/scala/cats/laws/discipline/arbitrary.scala b/laws/src/main/scala/cats/laws/discipline/arbitrary.scala index c95122856c..53366c370b 100644 --- a/laws/src/main/scala/cats/laws/discipline/arbitrary.scala +++ b/laws/src/main/scala/cats/laws/discipline/arbitrary.scala @@ -415,6 +415,16 @@ object arbitrary extends ArbitraryInstances0 with ScalaVersionSpecific.Arbitrary implicit val catsLawsArbitraryForMiniInt: Arbitrary[MiniInt] = Arbitrary(Gen.oneOf(MiniInt.allValues)) + + implicit def catsLawsArbitraryForHashMap[K, V](implicit + K: Arbitrary[K], + V: Arbitrary[V], + hash: Hash[K] + ): Arbitrary[HashMap[K, V]] = + Arbitrary(getArbitrary[List[(K, V)]].map(HashMap.fromSeq(_)(hash))) + + implicit def catsLawsCogenForHashSet[K, V](implicit K: Cogen[K], V: Cogen[V]): Cogen[HashMap[K, V]] = + Cogen.it[HashMap[K, V], (K, V)](_.iterator) } sealed private[discipline] trait ArbitraryInstances0 { diff --git a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala new file mode 100644 index 0000000000..3f875546c1 --- /dev/null +++ b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala @@ -0,0 +1,292 @@ +/* + * Copyright (c) 2015 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package cats.tests + +import cats.data.HashMap +import cats.kernel.laws.discipline.CommutativeMonoidTests +import cats.kernel.laws.discipline.HashTests +import cats.laws.discipline.arbitrary._ +import cats.laws.discipline.UnorderedTraverseTests +import cats.laws.discipline.SerializableTests +import cats.syntax.eq._ +import org.scalacheck.Arbitrary.arbitrary +import org.scalacheck.Gen +import org.scalacheck.Prop.forAll +import scala.collection.mutable + +class HashMapSuite extends CatsSuite { + // Examples from https://stackoverflow.com/questions/9406775/why-does-string-hashcode-in-java-have-many-conflicts + val collidingStrings = for { + left <- ('A' to 'Y').toList + first = List(left, 'a').mkString + second = List((left + 1).toChar, 'B').mkString + } yield (first, second) + + val collidingKv = Gen.oneOf(collidingStrings).flatMap { case (leftStr, rightStr) => + for { + leftInt <- arbitrary[Int] + rightInt <- arbitrary[Int] + } yield (leftStr -> leftInt, rightStr -> rightInt) + } + + val collidingKvs = Gen.listOf(collidingKv) + + checkAll("HashMap[Int, String]", HashTests[HashMap[Int, String]].hash) + checkAll("Hash[HashMap[Int, String]]", SerializableTests.serializable(HashMap.catsDataHashForHashMap[Int, String])) + + checkAll("HashMap[Int, Int] with Option", + UnorderedTraverseTests[HashMap[Int, *]].unorderedTraverse[Int, Int, Int, Option, Option] + ) + checkAll("UnorderedTraverse[HashMap[Int, *]]", + SerializableTests.serializable(HashMap.catsDataUnorderedTraverseForHashMap[Int]) + ) + + checkAll("HashMap[Int, String]", CommutativeMonoidTests[HashMap[Int, Int]].commutativeMonoid) + checkAll("CommutativeMonoid[HashMap[Int, Int]]", + SerializableTests.serializable(HashMap.catsDataCommutativeMonoidForHashMap[Int, Int]) + ) + + // Key-value pairs with the last binding for each distinct key + def distinctBindings[K, V](kvs: List[(K, V)]) = + kvs.reverse.distinctBy(_._1).reverse + + test("show") { + assert(HashMap("a" -> 1, "b" -> 2, "c" -> 3).show === "HashMap(a -> 1, b -> 2, c -> 3)") + assert(HashMap.empty[String, Int].show === "HashMap()") + } + + test("isEmpty and nonEmpty") { + assert(HashMap.empty[Int, Int].isEmpty) + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + assert(hashMap.isEmpty === kvs.isEmpty) + assert(hashMap.nonEmpty === kvs.nonEmpty) + } + } + + test("===") { + forAll { (kvs: List[(Int, String)]) => + val left = HashMap.fromSeq(kvs) + val right = HashMap.fromSeq(kvs) + assert(left === right) + } + + forAll { (leftKvs: List[(Int, String)], rightKvs: List[(Int, String)]) => + val left = HashMap.fromSeq(leftKvs) + val right = HashMap.fromSeq(rightKvs) + assert((distinctBindings(leftKvs) === distinctBindings(rightKvs)) === (left === right)) + } + } + + test("size") { + assert(HashMap.empty[Int, String].size === 0) + assert(HashMap(1 -> "a", 2 -> "b", 3 -> "c").size === 3) + assert(HashMap("Aa" -> 1, "BB" -> 2).size == 2) + + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + assert(hashMap.size === distinctBindings(kvs).size) + } + } + + test("get") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + distinctBindings(kvs).foreach { case (k, v) => + hashMap.get(k) === Some(v) + } + } + } + + test("concat") { + forAll { (leftKvs: List[(Int, String)], rightKvs: List[(Int, String)]) => + val distinctKvs = distinctBindings(leftKvs ++ rightKvs) + val left = HashMap.fromSeq(leftKvs) + val right = HashMap.fromSeq(rightKvs) + val both = left.concat(right) + assert(both === HashMap.fromSeq(leftKvs ++ rightKvs)) + distinctKvs.foreach { case (k, v) => + assert(both.contains(k)) + assert(both.get(k) === Some(v)) + } + } + } + + property("Empty HashMap never contains") { + forAll { (i: Int) => + assert(!HashMap.empty[Int, String].contains(i)) + } + } + + property("Empty HashMap add consistent with contains") { + forAll { (i: Int, s: String) => + assert(HashMap.empty[Int, String].add(i, s).contains(i)) + } + } + + property("Empty HashMap add and remove consistent with contains") { + forAll { (i: Int, s: String) => + assert(!HashMap.empty[Int, String].add(i, s).remove(i).contains(i)) + } + } + + property("fromSeq consistent with contains") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + + kvs.foreach { case (k, _) => + assert(hashMap.contains(k)) + } + } + } + + property("remove consistent with contains") { + forAll { (kvs: List[(Int, String)]) => + val distinctKvs = distinctBindings(kvs) + val hashMap = HashMap.fromSeq(kvs) + + distinctKvs.foldLeft(hashMap) { case (hm, (i, _)) => + if (!hm.contains(i)) { + println(hm) + println(i) + println(hm.contains(i)) + } + assert(hm.contains(i)) + assert(!hm.remove(i).contains(i)) + hm.remove(i) + } + + () + } + } + + property("add and remove with collisions consistent with contains") { + forAll(collidingKvs) { (collisions: List[((String, Int), (String, Int))]) => + + val distinctCollisions = collisions.distinctBy(_._1._1) + + val hashMap = distinctCollisions.foldLeft(HashMap.empty[String, Int]) { case (hm, ((lk, lv), (rk, rv))) => + hm.add(lk, lv).add(rk, rv) + } + + distinctCollisions.foldLeft(hashMap) { case (hm, ((lk, _), (rk, _))) => + assert(hm.contains(lk)) + assert(hm.contains(rk)) + + val removeL = hm.remove(lk) + assert(removeL.contains(rk)) + assert(!removeL.contains(lk)) + + val removeR = hm.remove(rk) + assert(removeR.contains(lk)) + assert(!removeR.contains(rk)) + + val removeLR = removeL.remove(rk) + assert(!removeLR.contains(lk)) + assert(!removeLR.contains(rk)) + + removeLR + } + + () + } + } + + property("iterator consistent with contains") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + val distinctKvs = distinctBindings(kvs) + + val iterated = mutable.ListBuffer[(Int, String)]() + hashMap.iterator.foreach { iterated += _ } + + assert(distinctKvs.forall(iterated.contains)) + assert(iterated.forall(distinctKvs.contains)) + assert(iterated.toList.distinctBy(_._1) === iterated.toList) + assert(iterated.forall { case (k, _) => hashMap.contains(k) }) + } + } + + property("iterator consistent with reverseIterator") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + + val iterated = mutable.ListBuffer[(Int, String)]() + val reverseIterated = mutable.ListBuffer[(Int, String)]() + hashMap.iterator.foreach { iterated += _ } + hashMap.reverseIterator.foreach { reverseIterated += _ } + + assert(iterated.toList === reverseIterated.toList.reverse) + } + } + + property("foreach consistent with contains") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + val distinctKvs = distinctBindings(kvs) + + val foreached = mutable.ListBuffer[(Int, String)]() + hashMap.foreach { (i: Int, s: String) => foreached += ((i, s)) } + + assert(distinctKvs.forall(foreached.contains)) + assert(foreached.forall(distinctKvs.contains)) + assert(foreached.toList.distinct === foreached.toList) + assert(foreached.forall { case (k, _) => hashMap.contains(k) }) + } + } + + property("foreach and iterator consistent") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + + val iterated = mutable.ListBuffer[(Int, String)]() + val foreached = mutable.ListBuffer[(Int, String)]() + hashMap.iterator.foreach { iterated += _ } + hashMap.foreach { (i: Int, s: String) => foreached += ((i, s)) } + + assert(foreached.forall(iterated.contains)) + assert(iterated.forall(foreached.contains)) + } + } + + property("size consistent with iterator") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + + var size = 0 + hashMap.iterator.foreach { _ => size += 1 } + + assert(hashMap.size === size) + } + } + + property("size consistent with foreach") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromSeq(kvs) + + var size = 0 + hashMap.foreach { case _ => size += 1 } + + assert(hashMap.size === size) + } + } +} From 1881a9b6952af3c6b6c09c8bf1f7ec3f00679793 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Thu, 21 Apr 2022 21:42:04 +0100 Subject: [PATCH 02/28] Update access modifiers for HashMap internals --- .../main/scala-2.12/cats/compat/HashMapCompat.scala | 2 +- core/src/main/scala/cats/data/HashMap.scala | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala index d08f14fd87..973652ab88 100644 --- a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala +++ b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala @@ -30,7 +30,7 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => * @return a new map that contains all key-value pairs of this map and `traversable`. */ final def concat[VV >: V](traversable: TraversableOnce[(K, VV)]): HashMap[K, VV] = { - val newRootNode = traversable.foldLeft(rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + val newRootNode = traversable.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => node.add(k, self.hashKey.hash(k), v, 0) } diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index bead2380e2..ad9b5fc903 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -47,7 +47,7 @@ import java.util.Arrays * @tparam V the type of the values contained in this hash map. * @param hashKey the [[cats.kernel.Hash]] instance used for hashing keys. */ -final class HashMap[K, +V] private[data] (private val rootNode: HashMap.Node[K, V])(implicit val hashKey: Hash[K]) +final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.Node[K, V])(implicit val hashKey: Hash[K]) extends HashMapCompat[K, V] { /** @@ -248,7 +248,7 @@ object HashMap extends HashMapInstances { new HashMap(rootNode) } - sealed abstract private class Node[K, +V] { + sealed abstract private[data] class Node[K, +V] { /** * @return The number of value and node elements in the contents array of this trie node. @@ -401,7 +401,7 @@ object HashMap extends HashMapInstances { * @param collisionHash the hash value at which all of the contents of this node collide. * @param contents the value elements whose hashes collide. */ - final private class CollisionNode[K, +V]( + final private[HashMap] class CollisionNode[K, +V]( val collisionHash: Int, val contents: Vector[(K, V)] )(implicit hashKey: Hash[K]) @@ -511,7 +511,7 @@ object HashMap extends HashMapInstances { * @param contents an array of `A` value elements and `Node[A]` sub-node elements. * @param size the number of value elements in this subtree. */ - final private class BitMapNode[K, +V]( + final private[HashMap] class BitMapNode[K, +V]( val valueMap: Int, val nodeMap: Int, val contents: Array[Any], @@ -887,7 +887,7 @@ object HashMap extends HashMapInstances { } } - object Node { + private[HashMap] object Node { final val StrideLength = 2 final val BitPartitionSize = 5 final val BitPartitionMask = (1 << BitPartitionSize) - 1 From 5f9390213390d52598d2325b00dd65c1bac7895e Mon Sep 17 00:00:00 2001 From: David Gregory Date: Thu, 21 Apr 2022 21:56:45 +0100 Subject: [PATCH 03/28] Fix regression in Foldable#iterateRight by ensuring Iterator is always evaluated --- core/src/main/scala/cats/Foldable.scala | 6 +++--- core/src/main/scala/cats/data/HashMap.scala | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/cats/Foldable.scala b/core/src/main/scala/cats/Foldable.scala index f42839cff8..eafbc76ad4 100644 --- a/core/src/main/scala/cats/Foldable.scala +++ b/core/src/main/scala/cats/Foldable.scala @@ -953,13 +953,13 @@ object Foldable { private val sentinel: Function1[Any, Any] = new scala.runtime.AbstractFunction1[Any, Any] { def apply(a: Any) = this } def iterateRight[A, B](iterable: Iterable[A], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = - iterateRight(iterable.iterator, lb)(f) + iterateRight(Eval.always(iterable.iterator), lb)(f) - private[cats] def iterateRight[A, B](iterator: Iterator[A], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = { + private[cats] def iterateRight[A, B](iterator: Eval[Iterator[A]], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = { def loop(it: Iterator[A]): Eval[B] = Eval.defer(if (it.hasNext) f(it.next(), loop(it)) else lb) - Eval.always(iterator).flatMap(loop) + iterator.flatMap(loop) } def iterateRightDefer[G[_]: Defer, A, B](iterable: Iterable[A], lb: G[B])(f: (A, G[B]) => G[B]): G[B] = { diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index ad9b5fc903..1716efca5c 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -1106,7 +1106,7 @@ sealed abstract private[data] class HashMapInstances extends HashMapInstances1 { Always(G.pure(HashMap.empty[K, V])) val gHashMap = Foldable - .iterateRight(hashMap.iterator, emptyHm) { case ((k, u), hm) => + .iterateRight(Eval.always(hashMap.iterator), emptyHm) { case ((k, u), hm) => G.map2Eval(f(u), hm) { (v, map) => map.add(k, v) } From 2f44919d6284668c9cfdbcd5099aead3c6436126 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Thu, 21 Apr 2022 22:02:25 +0100 Subject: [PATCH 04/28] Appease the formatter --- core/src/main/scala/cats/Foldable.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/cats/Foldable.scala b/core/src/main/scala/cats/Foldable.scala index eafbc76ad4..d9f48ea52a 100644 --- a/core/src/main/scala/cats/Foldable.scala +++ b/core/src/main/scala/cats/Foldable.scala @@ -955,7 +955,9 @@ object Foldable { def iterateRight[A, B](iterable: Iterable[A], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = iterateRight(Eval.always(iterable.iterator), lb)(f) - private[cats] def iterateRight[A, B](iterator: Eval[Iterator[A]], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = { + private[cats] def iterateRight[A, B](iterator: Eval[Iterator[A]], lb: Eval[B])( + f: (A, Eval[B]) => Eval[B] + ): Eval[B] = { def loop(it: Iterator[A]): Eval[B] = Eval.defer(if (it.hasNext) f(it.next(), loop(it)) else lb) From ff0e86c6a9fcc8373544ecdfa4b4248181b2dd1d Mon Sep 17 00:00:00 2001 From: David Gregory Date: Thu, 21 Apr 2022 22:19:05 +0100 Subject: [PATCH 05/28] Remove sizeHint - it serves no purpose now that size is cached in bitmap nodes --- core/src/main/scala/cats/data/HashMap.scala | 39 ++++----------------- 1 file changed, 6 insertions(+), 33 deletions(-) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 1716efca5c..193a14d476 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -365,31 +365,6 @@ object HashMap extends HashMapInstances { * @return `true` if this node and `that` are equal, `false` otherwise. */ def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean - - /** - * An approximation of the CHAMP "branch size", used for the deletion algorithm. - * - * The branch size indicates the number of elements transitively reachable from this node, but that is expensive to compute. - * - * There are three important cases when implementing the deletion algorithm: - * - a sub-tree has no elements ([[Node.SizeNone]]) - * - a sub-tree has exactly one element ([[Node.SizeOne]]) - * - a sub-tree has more than one element ([[Node.SizeMany]]) - * - * This approximation assumes that nodes contain many elements (because the deletion algorithm inlines singleton nodes). - * - * @return either [[Node.SizeNone]], [[Node.SizeOne]] or [[Node.SizeMany]] - */ - final def sizeHint = { - if (nodeElements > 0) - Node.SizeMany - else - (valueElements: @annotation.switch) match { - case 0 => Node.SizeNone - case 1 => Node.SizeOne - case _ => Node.SizeMany - } - } } /** @@ -809,13 +784,13 @@ object HashMap extends HashMapInstances { if (newSubNode eq subNode) this - else if (valueElements == 0 && nodeElements == 1) { - if (newSubNode.sizeHint == Node.SizeOne) { + else if (allElements == 1) { + if (newSubNode.size == 1) { newSubNode } else { replaceNode(index, subNode, newSubNode) } - } else if (newSubNode.sizeHint == Node.SizeOne) { + } else if (newSubNode.size == 1) { inlineSubNodeKeyValue(bitPos, newSubNode) } else { replaceNode(index, subNode, newSubNode) @@ -882,8 +857,10 @@ object HashMap extends HashMapInstances { ("0" * Integer.numberOfLeadingZeros(if (valueMap != 0) valueMap else 1)) + Integer.toBinaryString(valueMap) val nodeMapStr = ("0" * Integer.numberOfLeadingZeros(if (nodeMap != 0) nodeMap else 1)) + Integer.toBinaryString(nodeMap) + val contentsStr = + contents.mkString("[", ", ", "]") - s"""BitMapNode(valueMap=$valueMapStr, nodeMap=$nodeMapStr, contents=${contents.mkString("[", ", ", "]")})""" + s"""BitMapNode(valueMap=$valueMapStr, nodeMap=$nodeMapStr, size=$size, contents=${contentsStr})""" } } @@ -893,10 +870,6 @@ object HashMap extends HashMapInstances { final val BitPartitionMask = (1 << BitPartitionSize) - 1 final val MaxDepth = 7 - final val SizeNone = 0 - final val SizeOne = 1 - final val SizeMany = 2 - /** * The `mask` is a 5-bit segment of a 32-bit element hash. * From c655fddd3b631d966586a5ad0f1b9c263a037c0c Mon Sep 17 00:00:00 2001 From: David Gregory Date: Thu, 21 Apr 2022 22:54:39 +0100 Subject: [PATCH 06/28] Revert sizeHint removal as I was wrong - it prevents erroneous subnode escalation --- core/src/main/scala/cats/data/HashMap.scala | 35 +++++++++++++++++++-- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 193a14d476..618a1cc19a 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -365,6 +365,31 @@ object HashMap extends HashMapInstances { * @return `true` if this node and `that` are equal, `false` otherwise. */ def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean + + /** + * An approximation of the CHAMP "branch size", used for the deletion algorithm. + * + * The branch size indicates the number of elements transitively reachable from this node, but that is expensive to compute. + * + * There are three important cases when implementing the deletion algorithm: + * - a sub-tree has no elements ([[Node.SizeNone]]) + * - a sub-tree has exactly one element ([[Node.SizeOne]]) + * - a sub-tree has more than one element ([[Node.SizeMany]]) + * + * This approximation assumes that nodes contain many elements (because the deletion algorithm inlines singleton nodes). + * + * @return either [[Node.SizeNone]], [[Node.SizeOne]] or [[Node.SizeMany]] + */ + final def sizeHint = { + if (nodeElements > 0) + Node.SizeMany + else + (valueElements: @annotation.switch) match { + case 0 => Node.SizeNone + case 1 => Node.SizeOne + case _ => Node.SizeMany + } + } } /** @@ -784,13 +809,13 @@ object HashMap extends HashMapInstances { if (newSubNode eq subNode) this - else if (allElements == 1) { - if (newSubNode.size == 1) { + else if (valueElements == 0 && nodeElements == 1) { + if (newSubNode.sizeHint == Node.SizeOne) { newSubNode } else { replaceNode(index, subNode, newSubNode) } - } else if (newSubNode.size == 1) { + } else if (newSubNode.sizeHint == Node.SizeOne) { inlineSubNodeKeyValue(bitPos, newSubNode) } else { replaceNode(index, subNode, newSubNode) @@ -870,6 +895,10 @@ object HashMap extends HashMapInstances { final val BitPartitionMask = (1 << BitPartitionSize) - 1 final val MaxDepth = 7 + final val SizeNone = 0 + final val SizeOne = 1 + final val SizeMany = 2 + /** * The `mask` is a 5-bit segment of a 32-bit element hash. * From ef63d19693fc014f5a9472c6017bc28adb877a0b Mon Sep 17 00:00:00 2001 From: David Gregory Date: Thu, 21 Apr 2022 23:42:11 +0100 Subject: [PATCH 07/28] Narrow conditions for subnode escalation --- core/src/main/scala/cats/data/HashMap.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 618a1cc19a..9cdf76af66 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -754,7 +754,7 @@ object HashMap extends HashMapInstances { // If this element will be propagated or inlined, calculate the new valueMap at depth - 1 val newBitPos = - if (allElements == 2 && depth > 0) + if (valueElements == 2 && nodeElements == 0 && depth > 0) Node.bitPosFrom(Node.maskFrom(removeKeyHash, depth - 1)) else valueMap ^ bitPos From 7dabe64d04cb01a4e652c4a2e3f769ba772754c4 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Fri, 22 Apr 2022 00:18:02 +0100 Subject: [PATCH 08/28] Use byteswap hashing in HashMap to improve input hash --- .../scala-2.12/cats/compat/HashMapCompat.scala | 8 +++++--- .../scala-2.13+/cats/data/HashMapCompat.scala | 4 +++- core/src/main/scala/cats/data/HashMap.scala | 16 ++++++++++------ .../src/test/scala/cats/tests/HashMapSuite.scala | 2 +- 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala index 973652ab88..4c7277d6a8 100644 --- a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala +++ b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala @@ -21,6 +21,8 @@ package cats.data +import HashMap.improve + private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => /** @@ -31,10 +33,10 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](traversable: TraversableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = traversable.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.add(k, self.hashKey.hash(k), v, 0) + node.add(k, improve(self.hashKey.hash(k)), v, 0) } - if (newRootNode eq rootNode) + if (newRootNode eq self.rootNode) this else new HashMap(newRootNode) @@ -48,7 +50,7 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = { val newRootNode = hm.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.add(k, self.hashKey.hash(k), v, 0) + node.add(k, improve(self.hashKey.hash(k)), v, 0) } if (newRootNode eq self.rootNode) diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala index 4e779ae171..130740d8e3 100644 --- a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala @@ -23,6 +23,8 @@ package cats.data private[data] trait HashMapCompat[K, +V] extends IterableOnce[(K, V)] { self: HashMap[K, V] => + import HashMap.improve + override def knownSize = self.size /** @@ -33,7 +35,7 @@ private[data] trait HashMapCompat[K, +V] extends IterableOnce[(K, V)] { self: Ha */ final def concat[VV >: V](iterable: IterableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = iterable.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.add(k, self.hashKey.hash(k), v, 0) + node.add(k, improve(self.hashKey.hash(k)), v, 0) } if (newRootNode eq self.rootNode) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 9cdf76af66..4c7385a57b 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -37,6 +37,8 @@ import cats.kernel.instances.StaticMethods import cats.syntax.eq._ import java.util.Arrays +import HashMap.improve + /** * An immutable hash map using [[cats.kernel.Hash]] for hashing. * @@ -102,7 +104,7 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No * @return `true` if the map contains `key`, `false` otherwise. */ final def contains(key: K): Boolean = - rootNode.contains(key, hashKey.hash(key), 0) + rootNode.contains(key, improve(hashKey.hash(key)), 0) /** * Get the value associated with `key` in this map. @@ -111,7 +113,7 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No * @return A [[scala.Some]] containing the value if present, else [[scala.None]]. */ final def get(key: K): Option[V] = - rootNode.get(key, hashKey.hash(key), 0) + rootNode.get(key, improve(hashKey.hash(key)), 0) /** * Get the value associated with `key` in this map, or `default` if not present. @@ -132,7 +134,7 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No * @return a new map that contains all key-value pairs of this map and that also contains a mapping from `key` to `value`. */ final def add[VV >: V](key: K, value: VV): HashMap[K, VV] = { - val keyHash = hashKey.hash(key) + val keyHash = improve(hashKey.hash(key)) val newRootNode = rootNode.add(key, keyHash, value, 0) if (newRootNode eq rootNode) @@ -148,7 +150,7 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No * @return a new map that contains all elements of this map but that does not contain `key`. */ final def remove(key: K): HashMap[K, V] = { - val keyHash = hashKey.hash(key) + val keyHash = improve(hashKey.hash(key)) val newRootNode = rootNode.remove(key, keyHash, 0) if (newRootNode eq rootNode) @@ -214,6 +216,8 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No } object HashMap extends HashMapInstances { + final private[data] def improve(hash: Int): Int = + scala.util.hashing.byteswap32(hash) /** * Creates a new empty [[cats.data.HashMap]] which uses `hashKey` for hashing. @@ -243,7 +247,7 @@ object HashMap extends HashMapInstances { */ final def fromSeq[K, V](seq: Seq[(K, V)])(implicit hashKey: Hash[K]): HashMap[K, V] = { val rootNode = seq.foldLeft(Node.empty[K, V]) { case (node, (k, v)) => - node.add(k, hashKey.hash(k), v, 0) + node.add(k, improve(hashKey.hash(k)), v, 0) } new HashMap(rootNode) } @@ -709,7 +713,7 @@ object HashMap extends HashMapInstances { mergeValuesIntoNode( bitPos, existingKey, - hashKey.hash(existingKey), + improve(hashKey.hash(existingKey)), existingValue, newKey, newKeyHash, diff --git a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala index 3f875546c1..e30c0c0111 100644 --- a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala +++ b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala @@ -70,7 +70,7 @@ class HashMapSuite extends CatsSuite { kvs.reverse.distinctBy(_._1).reverse test("show") { - assert(HashMap("a" -> 1, "b" -> 2, "c" -> 3).show === "HashMap(a -> 1, b -> 2, c -> 3)") + assert(HashMap("a" -> 1, "b" -> 2, "c" -> 3).show === "HashMap(c -> 3, a -> 1, b -> 2)") assert(HashMap.empty[String, Int].show === "HashMap()") } From 58649840b47d40d45f6b2b27222ccf0a0aa548b3 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Mon, 25 Apr 2022 09:31:12 +0100 Subject: [PATCH 09/28] Add copyright notice from Scala standard library to meet license obligations as a derived work --- core/src/main/scala/cats/data/HashMap.scala | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 4c7385a57b..fe115fb318 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -19,6 +19,26 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* This file is derived from https://github.com/scala/scala/blob/v2.13.8/src/library/scala/collection/immutable/HashMap.scala + * Modified by Typelevel for redistribution in Cats. + * + * Copyright EPFL and Lightbend, Inc. + * Scala + * Copyright (c) 2002-2022 EPFL + * Copyright (c) 2011-2022 Lightbend, Inc. + * + * Scala includes software developed at + * LAMP/EPFL (https://lamp.epfl.ch/) and + * Lightbend, Inc. (https://www.lightbend.com/). + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package cats.data import cats.Always From 3fdf9d5b275eb5b10bfc09a988f030012462e415 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 3 May 2022 11:27:03 +0100 Subject: [PATCH 10/28] Add benchmarks for HashMap --- .../scala-2.12/cats/bench/HashMapBench.scala | 171 ++++++++++++++++++ .../scala-2.13+/cats/bench/HashMapBench.scala | 171 ++++++++++++++++++ 2 files changed, 342 insertions(+) create mode 100644 bench/src/main/scala-2.12/cats/bench/HashMapBench.scala create mode 100644 bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala diff --git a/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala new file mode 100644 index 0000000000..1e2629bb9b --- /dev/null +++ b/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala @@ -0,0 +1,171 @@ +package cats.bench + +import cats.data.HashMap +import java.util.concurrent.TimeUnit +import org.openjdk.jmh.annotations._ +import org.openjdk.jmh.infra.Blackhole +import scala.collection.immutable.{HashMap => SHashMap} + +@BenchmarkMode(Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +class HashMapBench { + @Param(Array("0", "1", "2", "3", "4", "7", "8", "15", "16", "17", "39", "282", "4096", "131070", "7312102")) + var size: Int = _ + + var hashMap: HashMap[Long, Int] = _ + var otherHashMap: HashMap[Long, Int] = _ + var scalaMap: SHashMap[Long, Int] = _ + var otherScalaMap: SHashMap[Long, Int] = _ + + def hashMapOfSize(n: Int) = HashMap.fromSeq((1L to (n.toLong)).zipWithIndex) + def scalaMapOfSize(n: Int) = SHashMap((1L to (n.toLong)).zipWithIndex: _*) + + @Setup(Level.Trial) + def init(): Unit = { + hashMap = hashMapOfSize(size) + otherHashMap = hashMapOfSize(size) + scalaMap = scalaMapOfSize(size) + otherScalaMap = scalaMapOfSize(size) + } + + @Benchmark + def hashMapFromSeq(bh: Blackhole): Unit = + bh.consume(hashMapOfSize(size)) + + @Benchmark + def scalaMapFromSeq(bh: Blackhole): Unit = + bh.consume(scalaMapOfSize(size)) + + @Benchmark + @OperationsPerInvocation(1000) + def hashMapAdd(bh: Blackhole): Unit = { + var hs = hashMap + var i = 0 + while (i < 1000) { + hs = hs.add(-i.toLong, i) + i += 1 + } + bh.consume(hs) + } + + @Benchmark + @OperationsPerInvocation(1000) + def scalaMapAdd(bh: Blackhole): Unit = { + var ss = scalaMap + var i = 0 + while (i < 1000) { + ss += (-i.toLong -> i) + i += 1 + } + bh.consume(ss) + } + + @Benchmark + @OperationsPerInvocation(1000) + def hashMapRemove(bh: Blackhole): Unit = { + var hs = hashMap + var i = 0L + while (i < 1000L) { + hs = hs.remove(i) + i += 1L + } + bh.consume(hs) + } + + @Benchmark + @OperationsPerInvocation(1000) + def scalaMapRemove(bh: Blackhole): Unit = { + var ss = scalaMap + var i = 0L + while (i < 1000L) { + ss -= i + i += 1L + } + bh.consume(ss) + } + + @Benchmark + @OperationsPerInvocation(1000) + def hashMapContains(bh: Blackhole): Unit = { + var i = 0L + while (i < 1000L) { + bh.consume(hashMap.contains(i)) + i += 1L + } + } + + @Benchmark + @OperationsPerInvocation(1000) + def scalaMapContains(bh: Blackhole): Unit = { + var i = 0L + while (i < 1000L) { + bh.consume(scalaMap.contains(i)) + i += 1L + } + } + + @Benchmark + @OperationsPerInvocation(1000) + def hashMapGet(bh: Blackhole): Unit = { + var i = 0L + while (i < 1000L) { + bh.consume(hashMap.get(i)) + i += 1L + } + } + + @Benchmark + @OperationsPerInvocation(1000) + def scalaMapGet(bh: Blackhole): Unit = { + var i = 0L + while (i < 1000L) { + bh.consume(scalaMap.get(i)) + i += 1L + } + } + + @Benchmark + def hashMapForeach(bh: Blackhole): Unit = + hashMap.foreach((k, v) => bh.consume((k, v))) + + @Benchmark + def scalaMapForeach(bh: Blackhole): Unit = + scalaMap.foreach(bh.consume(_)) + + @Benchmark + def hashMapIterator(bh: Blackhole): Unit = { + val it = hashMap.iterator + while (it.hasNext) { + bh.consume(it.next()) + } + } + + @Benchmark + def scalaMapIterator(bh: Blackhole): Unit = { + val it = scalaMap.iterator + while (it.hasNext) { + bh.consume(it.next()) + } + } + + @Benchmark + def hashMapConcat(bh: Blackhole): Unit = + bh.consume(hashMap.concat(otherHashMap)) + + @Benchmark + def scalaMapConcat(bh: Blackhole): Unit = + bh.consume(scalaMap ++ otherScalaMap) + + @Benchmark + def hashMapUniversalEquals(bh: Blackhole): Unit = + bh.consume(hashMap == otherHashMap) + + @Benchmark + def hashMapEqEquals(bh: Blackhole): Unit = + bh.consume(hashMap === otherHashMap) + + @Benchmark + def scalaMapUniversalEquals(bh: Blackhole): Unit = + bh.consume(scalaMap == otherScalaMap) +} diff --git a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala new file mode 100644 index 0000000000..d497099203 --- /dev/null +++ b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala @@ -0,0 +1,171 @@ +package cats.bench + +import cats.data.HashMap +import java.util.concurrent.TimeUnit +import org.openjdk.jmh.annotations._ +import org.openjdk.jmh.infra.Blackhole +import scala.collection.immutable.{HashMap => SHashMap} + +@BenchmarkMode(Array(Mode.AverageTime)) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +class HashMapBench { + @Param(Array("0", "1", "2", "3", "4", "7", "8", "15", "16", "17", "39", "282", "4096", "131070", "7312102")) + var size: Int = _ + + var hashMap: HashMap[Long, Int] = _ + var otherHashMap: HashMap[Long, Int] = _ + var scalaMap: SHashMap[Long, Int] = _ + var otherScalaMap: SHashMap[Long, Int] = _ + + def hashMapOfSize(n: Int) = HashMap.fromSeq((1L to (n.toLong)).zipWithIndex) + def scalaMapOfSize(n: Int) = SHashMap.from((1L to (n.toLong)).zipWithIndex) + + @Setup(Level.Trial) + def init(): Unit = { + hashMap = hashMapOfSize(size) + otherHashMap = hashMapOfSize(size) + scalaMap = scalaMapOfSize(size) + otherScalaMap = scalaMapOfSize(size) + } + + @Benchmark + def hashMapFromSeq(bh: Blackhole): Unit = + bh.consume(hashMapOfSize(size)) + + @Benchmark + def scalaMapFromSeq(bh: Blackhole): Unit = + bh.consume(scalaMapOfSize(size)) + + @Benchmark + @OperationsPerInvocation(1000) + def hashMapAdd(bh: Blackhole): Unit = { + var hs = hashMap + var i = 0 + while (i < 1000) { + hs = hs.add(-i.toLong, i) + i += 1 + } + bh.consume(hs) + } + + @Benchmark + @OperationsPerInvocation(1000) + def scalaMapAdd(bh: Blackhole): Unit = { + var ss = scalaMap + var i = 0 + while (i < 1000) { + ss += (-i.toLong -> i) + i += 1 + } + bh.consume(ss) + } + + @Benchmark + @OperationsPerInvocation(1000) + def hashMapRemove(bh: Blackhole): Unit = { + var hs = hashMap + var i = 0L + while (i < 1000L) { + hs = hs.remove(i) + i += 1L + } + bh.consume(hs) + } + + @Benchmark + @OperationsPerInvocation(1000) + def scalaMapRemove(bh: Blackhole): Unit = { + var ss = scalaMap + var i = 0L + while (i < 1000L) { + ss -= i + i += 1L + } + bh.consume(ss) + } + + @Benchmark + @OperationsPerInvocation(1000) + def hashMapContains(bh: Blackhole): Unit = { + var i = 0L + while (i < 1000L) { + bh.consume(hashMap.contains(i)) + i += 1L + } + } + + @Benchmark + @OperationsPerInvocation(1000) + def scalaMapContains(bh: Blackhole): Unit = { + var i = 0L + while (i < 1000L) { + bh.consume(scalaMap.contains(i)) + i += 1L + } + } + + @Benchmark + @OperationsPerInvocation(1000) + def hashMapGet(bh: Blackhole): Unit = { + var i = 0L + while (i < 1000L) { + bh.consume(hashMap.get(i)) + i += 1L + } + } + + @Benchmark + @OperationsPerInvocation(1000) + def scalaMapGet(bh: Blackhole): Unit = { + var i = 0L + while (i < 1000L) { + bh.consume(scalaMap.get(i)) + i += 1L + } + } + + @Benchmark + def hashMapForeach(bh: Blackhole): Unit = + hashMap.foreach((k, v) => bh.consume((k, v))) + + @Benchmark + def scalaMapForeach(bh: Blackhole): Unit = + scalaMap.foreach(bh.consume(_)) + + @Benchmark + def hashMapIterator(bh: Blackhole): Unit = { + val it = hashMap.iterator + while (it.hasNext) { + bh.consume(it.next()) + } + } + + @Benchmark + def scalaMapIterator(bh: Blackhole): Unit = { + val it = scalaMap.iterator + while (it.hasNext) { + bh.consume(it.next()) + } + } + + @Benchmark + def hashMapConcat(bh: Blackhole): Unit = + bh.consume(hashMap.concat(otherHashMap)) + + @Benchmark + def scalaMapConcat(bh: Blackhole): Unit = + bh.consume(scalaMap ++ otherScalaMap) + + @Benchmark + def hashMapUniversalEquals(bh: Blackhole): Unit = + bh.consume(hashMap == otherHashMap) + + @Benchmark + def hashMapEqEquals(bh: Blackhole): Unit = + bh.consume(hashMap === otherHashMap) + + @Benchmark + def scalaMapUniversalEquals(bh: Blackhole): Unit = + bh.consume(scalaMap == otherScalaMap) +} From d5e75965b2eca8317eac898551f2f17a1d55e520 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 3 May 2022 11:27:25 +0100 Subject: [PATCH 11/28] Override concrete UnorderedFoldable operations for HashMap --- .../scala-2.12/cats/bench/HashMapBench.scala | 21 +++++++++++++++++++ .../scala-2.13+/cats/bench/HashMapBench.scala | 21 +++++++++++++++++++ core/src/main/scala/cats/data/HashMap.scala | 6 ++++++ 3 files changed, 48 insertions(+) diff --git a/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala index 1e2629bb9b..09dd8c7b36 100644 --- a/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala +++ b/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2015 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package cats.bench import cats.data.HashMap diff --git a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala index d497099203..74e9055616 100644 --- a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala +++ b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala @@ -1,3 +1,24 @@ +/* + * Copyright (c) 2015 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package cats.bench import cats.data.HashMap diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index fe115fb318..5e3e76b779 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -1122,6 +1122,12 @@ object HashMap extends HashMapInstances { sealed abstract private[data] class HashMapInstances extends HashMapInstances1 { implicit def catsDataUnorderedTraverseForHashMap[K: Hash]: UnorderedTraverse[HashMap[K, *]] = new UnorderedTraverse[HashMap[K, *]] { + override def nonEmpty[A](fa: HashMap[K, A]): Boolean = fa.nonEmpty + + override def isEmpty[A](fa: HashMap[K, A]): Boolean = fa.isEmpty + + override def size[A](fa: HashMap[K, A]): Long = fa.size.toLong + def unorderedFoldMap[U, V](hm: HashMap[K, U])(f: U => V)(implicit V: CommutativeMonoid[V]): V = V.combineAll(hm.iterator.map { case (_, u) => f(u) }) From 186264fc091c2313c46b44cf0337d20a5f027e4e Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 3 May 2022 12:07:53 +0100 Subject: [PATCH 12/28] Override even more concrete UnorderedFoldable operations for HashMap --- core/src/main/scala/cats/data/HashMap.scala | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 5e3e76b779..2800e654a8 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -1128,6 +1128,18 @@ sealed abstract private[data] class HashMapInstances extends HashMapInstances1 { override def size[A](fa: HashMap[K, A]): Long = fa.size.toLong + override def contains_[A](fa: HashMap[K, A], v: A)(implicit ev: Eq[A]): Boolean = + fa.iterator.exists { case (_, value) => ev.eqv(v, value) } + + override def exists[A](fa: HashMap[K, A])(p: A => Boolean): Boolean = + fa.iterator.exists { case (_, value) => p(value) } + + override def forall[A](fa: HashMap[K, A])(p: A => Boolean): Boolean = + fa.iterator.forall { case (_, value) => p(value) } + + override def count[A](fa: HashMap[K, A])(p: A => Boolean): Long = + fa.iterator.foldLeft(0L) { case (c, (_, value)) => if (p(value)) c + 1L else c } + def unorderedFoldMap[U, V](hm: HashMap[K, U])(f: U => V)(implicit V: CommutativeMonoid[V]): V = V.combineAll(hm.iterator.map { case (_, u) => f(u) }) From 151e52d8d632c8f9220f262b3ff73d52113d9a1c Mon Sep 17 00:00:00 2001 From: David Gregory Date: Mon, 16 May 2022 16:36:40 +0100 Subject: [PATCH 13/28] Addressing review comments: * Use primitive HashMap operations when generating random HashMaps * Use a better approach to test HashMap key collisions * Remove reverseIterator * Foldable.iterateRight signature changed * HashMap#add renamed to HashMap#updated * HashMap#remove renamed to HashMap#removed * Added HashMap.fromFoldable * Renamed CHAMP node element count fields * Use NonEmptyVector in CHAMP collision node * Fixed various nits --- .../scala-2.12/cats/bench/HashMapBench.scala | 12 +- .../scala-2.13+/cats/bench/HashMapBench.scala | 10 +- .../cats/compat/HashMapCompat.scala | 4 +- .../scala-2.13+/cats/data/HashMapCompat.scala | 2 +- core/src/main/scala/cats/Foldable.scala | 6 +- core/src/main/scala/cats/data/HashMap.scala | 346 +++++++++--------- .../scala/cats/data/NonEmptyCollection.scala | 1 + .../main/scala/cats/data/NonEmptyList.scala | 10 + .../main/scala/cats/data/NonEmptySeq.scala | 2 + .../main/scala/cats/data/NonEmptyVector.scala | 2 + .../cats/kernel/compat/HashCompat.scala | 2 +- .../cats/kernel/instances/MapInstances.scala | 10 +- .../cats/laws/discipline/arbitrary.scala | 42 ++- .../test/scala/cats/tests/HashMapSuite.scala | 227 +++++------- 14 files changed, 350 insertions(+), 326 deletions(-) diff --git a/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala index 09dd8c7b36..bdeb9125a1 100644 --- a/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala +++ b/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala @@ -60,11 +60,11 @@ class HashMapBench { @Benchmark @OperationsPerInvocation(1000) - def hashMapAdd(bh: Blackhole): Unit = { + def hashMapUpdated(bh: Blackhole): Unit = { var hs = hashMap var i = 0 while (i < 1000) { - hs = hs.add(-i.toLong, i) + hs = hs.updated(-i.toLong, i) i += 1 } bh.consume(hs) @@ -72,7 +72,7 @@ class HashMapBench { @Benchmark @OperationsPerInvocation(1000) - def scalaMapAdd(bh: Blackhole): Unit = { + def scalaMapUpdated(bh: Blackhole): Unit = { var ss = scalaMap var i = 0 while (i < 1000) { @@ -84,11 +84,11 @@ class HashMapBench { @Benchmark @OperationsPerInvocation(1000) - def hashMapRemove(bh: Blackhole): Unit = { + def hashMapRemoved(bh: Blackhole): Unit = { var hs = hashMap var i = 0L while (i < 1000L) { - hs = hs.remove(i) + hs = hs.removed(i) i += 1L } bh.consume(hs) @@ -96,7 +96,7 @@ class HashMapBench { @Benchmark @OperationsPerInvocation(1000) - def scalaMapRemove(bh: Blackhole): Unit = { + def scalaMapRemoved(bh: Blackhole): Unit = { var ss = scalaMap var i = 0L while (i < 1000L) { diff --git a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala index 74e9055616..250033f811 100644 --- a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala +++ b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala @@ -60,11 +60,11 @@ class HashMapBench { @Benchmark @OperationsPerInvocation(1000) - def hashMapAdd(bh: Blackhole): Unit = { + def hashMapUpdated(bh: Blackhole): Unit = { var hs = hashMap var i = 0 while (i < 1000) { - hs = hs.add(-i.toLong, i) + hs = hs.updated(-i.toLong, i) i += 1 } bh.consume(hs) @@ -72,7 +72,7 @@ class HashMapBench { @Benchmark @OperationsPerInvocation(1000) - def scalaMapAdd(bh: Blackhole): Unit = { + def scalaMapUpdated(bh: Blackhole): Unit = { var ss = scalaMap var i = 0 while (i < 1000) { @@ -84,11 +84,11 @@ class HashMapBench { @Benchmark @OperationsPerInvocation(1000) - def hashMapRemove(bh: Blackhole): Unit = { + def hashMapRemoved(bh: Blackhole): Unit = { var hs = hashMap var i = 0L while (i < 1000L) { - hs = hs.remove(i) + hs = hs.removed(i) i += 1L } bh.consume(hs) diff --git a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala index 4c7277d6a8..038fcc8546 100644 --- a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala +++ b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala @@ -33,7 +33,7 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](traversable: TraversableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = traversable.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.add(k, improve(self.hashKey.hash(k)), v, 0) + node.updated(k, improve(self.hashKey.hash(k)), v, 0) } if (newRootNode eq self.rootNode) @@ -50,7 +50,7 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = { val newRootNode = hm.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.add(k, improve(self.hashKey.hash(k)), v, 0) + node.updated(k, improve(self.hashKey.hash(k)), v, 0) } if (newRootNode eq self.rootNode) diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala index 130740d8e3..71a2fcbe9d 100644 --- a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala @@ -35,7 +35,7 @@ private[data] trait HashMapCompat[K, +V] extends IterableOnce[(K, V)] { self: Ha */ final def concat[VV >: V](iterable: IterableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = iterable.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.add(k, improve(self.hashKey.hash(k)), v, 0) + node.updated(k, improve(self.hashKey.hash(k)), v, 0) } if (newRootNode eq self.rootNode) diff --git a/core/src/main/scala/cats/Foldable.scala b/core/src/main/scala/cats/Foldable.scala index d9f48ea52a..a76a027fdd 100644 --- a/core/src/main/scala/cats/Foldable.scala +++ b/core/src/main/scala/cats/Foldable.scala @@ -953,15 +953,15 @@ object Foldable { private val sentinel: Function1[Any, Any] = new scala.runtime.AbstractFunction1[Any, Any] { def apply(a: Any) = this } def iterateRight[A, B](iterable: Iterable[A], lb: Eval[B])(f: (A, Eval[B]) => Eval[B]): Eval[B] = - iterateRight(Eval.always(iterable.iterator), lb)(f) + iterateRight(() => iterable.iterator, lb)(f) - private[cats] def iterateRight[A, B](iterator: Eval[Iterator[A]], lb: Eval[B])( + private[cats] def iterateRight[A, B](mkIterator: () => Iterator[A], lb: Eval[B])( f: (A, Eval[B]) => Eval[B] ): Eval[B] = { def loop(it: Iterator[A]): Eval[B] = Eval.defer(if (it.hasNext) f(it.next(), loop(it)) else lb) - iterator.flatMap(loop) + Eval.always(mkIterator()).flatMap(loop) } def iterateRightDefer[G[_]: Defer, A, B](iterable: Iterable[A], lb: G[B])(f: (A, G[B]) => G[B]): G[B] = { diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 2800e654a8..d9c5b96e74 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -41,6 +41,8 @@ package cats.data +import cats.kernel.compat.scalaVersionSpecific._ + import cats.Always import cats.CommutativeApplicative import cats.Eval @@ -81,12 +83,12 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No new HashMap.Iterator(rootNode) /** - * A reverse iterator for this map that can be used only once. + * An iterator for the keys of this map that can be used only once. * - * @return an iterator that iterates through this map in the reverse order of [[HashMap#iterator]]. + * @return an iterator that iterates through the keys of this map. */ - final def reverseIterator: Iterator[(K, V)] = - new HashMap.ReverseIterator(rootNode) + final def keysIterator: Iterator[K] = + iterator.map { case (k, _) => k } /** * The size of this map. @@ -153,9 +155,9 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No * @param value the value to be added. * @return a new map that contains all key-value pairs of this map and that also contains a mapping from `key` to `value`. */ - final def add[VV >: V](key: K, value: VV): HashMap[K, VV] = { + final def updated[VV >: V](key: K, value: VV): HashMap[K, VV] = { val keyHash = improve(hashKey.hash(key)) - val newRootNode = rootNode.add(key, keyHash, value, 0) + val newRootNode = rootNode.updated(key, keyHash, value, 0) if (newRootNode eq rootNode) this @@ -169,9 +171,9 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No * @param key the key to be removed. * @return a new map that contains all elements of this map but that does not contain `key`. */ - final def remove(key: K): HashMap[K, V] = { + final def removed(key: K): HashMap[K, V] = { val keyHash = improve(hashKey.hash(key)) - val newRootNode = rootNode.remove(key, keyHash, 0) + val newRootNode = rootNode.removed(key, keyHash, 0) if (newRootNode eq rootNode) this @@ -267,7 +269,41 @@ object HashMap extends HashMapInstances { */ final def fromSeq[K, V](seq: Seq[(K, V)])(implicit hashKey: Hash[K]): HashMap[K, V] = { val rootNode = seq.foldLeft(Node.empty[K, V]) { case (node, (k, v)) => - node.add(k, improve(hashKey.hash(k)), v, 0) + node.updated(k, improve(hashKey.hash(k)), v, 0) + } + new HashMap(rootNode) + } + + /** + * Creates a new [[cats.data.HashMap]] which contains all elements of `iterable`. + * + * @param iterable the iterable source of elements to add to the [[cats.data.HashMap]]. + * @param hashKey the [[cats.kernel.Hash]] instance used for hashing values. + * @return a new [[cats.data.HashMap]] which contains all elements of `seq`. + */ + final def fromIterableOnce[K, V](iterable: IterableOnce[(K, V)])(implicit hashKey: Hash[K]): HashMap[K, V] = { + iterable match { + case seq: Seq[(K, V) @unchecked] => + fromSeq(seq) + case notSeq => + val rootNode = notSeq.iterator.foldLeft(Node.empty[K, V]) { case (node, (k, v)) => + node.updated(k, improve(hashKey.hash(k)), v, 0) + } + new HashMap(rootNode) + } + } + + /** + * Creates a new [[cats.data.HashMap]] which contains all elements of `fkv`. + * + * @param fkv the [[cats.Foldable]] structure of elements to add to the [[cats.data.HashMap]]. + * @param F the [[cats.Foldable]] instance used for folding the structure. + * @param hashKey the [[cats.kernel.Hash]] instance used for hashing values. + * @return a new [[cats.data.HashMap]] which contains all elements of `seq`. + */ + final def fromFoldable[F[_], K, V](fkv: F[(K, V)])(implicit F: Foldable[F], hashKey: Hash[K]): HashMap[K, V] = { + val rootNode = F.foldLeft(fkv, Node.empty[K, V]) { case (node, (k, v)) => + node.updated(k, improve(hashKey.hash(k)), v, 0) } new HashMap(rootNode) } @@ -277,17 +313,17 @@ object HashMap extends HashMapInstances { /** * @return The number of value and node elements in the contents array of this trie node. */ - def allElements: Int + def allElementsCount: Int /** * @return The number of value elements in the contents array of this trie node. */ - def valueElements: Int + def keyValueCount: Int /** * @return The number of node elements in the contents array of this trie node. */ - def nodeElements: Int + def nodeCount: Int /** * @return the number of value elements in this subtree. @@ -326,7 +362,7 @@ object HashMap extends HashMapInstances { /** * @return a [[scala.Boolean]] indicating whether the current trie node contains any value elements. */ - def hasValues: Boolean + def hasKeyValues: Boolean /** * Apply f to each key-value pair of the current trie node and its sub-nodes for its side effects. @@ -364,7 +400,7 @@ object HashMap extends HashMapInstances { * @param depth the 0-indexed depth in the trie structure. * @return a new [[HashMap.Node]] containing the element to add. */ - def add[VV >: V](newKey: K, newKeyHash: Int, value: VV, depth: Int): Node[K, VV] + def updated[VV >: V](newKey: K, newKeyHash: Int, value: VV, depth: Int): Node[K, VV] /** * The current trie node updated to remove the provided key. @@ -374,7 +410,7 @@ object HashMap extends HashMapInstances { * @param depth the 0-indexed depth in the trie structure. * @return a new [[HashMap.Node]] with the element removed. */ - def remove(removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] + def removed(removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] /** * Typesafe equality operator. @@ -405,10 +441,10 @@ object HashMap extends HashMapInstances { * @return either [[Node.SizeNone]], [[Node.SizeOne]] or [[Node.SizeMany]] */ final def sizeHint = { - if (nodeElements > 0) + if (nodeCount > 0) Node.SizeMany else - (valueElements: @annotation.switch) match { + (keyValueCount: @annotation.switch) match { case 0 => Node.SizeNone case 1 => Node.SizeOne case _ => Node.SizeMany @@ -421,30 +457,33 @@ object HashMap extends HashMapInstances { * this node type is used to collect all of the colliding elements and implement the [[HashMap.Node]] * interface at a performance cost compared with a [[HashMap.BitMapNode]]. * - * @tparam A the type of the elements contained in this node. + * @tparam K the type of the keys contained in this node. + * @tparam V the type of the values contained in this node. * @param collisionHash the hash value at which all of the contents of this node collide. * @param contents the value elements whose hashes collide. */ final private[HashMap] class CollisionNode[K, +V]( val collisionHash: Int, - val contents: Vector[(K, V)] + val contents: NonEmptyVector[(K, V)] )(implicit hashKey: Hash[K]) extends Node[K, V] { final def hasNodes: Boolean = false - final def hasValues: Boolean = true + final def hasKeyValues: Boolean = true - final def allElements: Int = valueElements + final def allElementsCount: Int = keyValueCount - final def valueElements: Int = contents.size + final def keyValueCount: Int = contents.length - final def nodeElements: Int = 0 + final def nodeCount: Int = 0 - final def size: Int = contents.size + final def size: Int = contents.length - final def foreach[U](f: (K, V) => U): Unit = - contents.foreach(f.tupled) + final def foreach[U](f: (K, V) => U): Unit = { + val fnTupled = f.tupled + contents.iterator.foreach(fnTupled) + } final def contains(key: K, keyHash: Int, depth: Int): Boolean = collisionHash == keyHash && contents.exists { case (k, _) => hashKey.eqv(key, k) } @@ -454,30 +493,32 @@ object HashMap extends HashMapInstances { else contents.collectFirst { case (k, v) if hashKey.eqv(key, k) => v } final def getKey(index: Int): K = - contents(index)._1 + contents.getUnsafe(index)._1 final def getValue(index: Int): V = - contents(index)._2 + contents.getUnsafe(index)._2 final def getMapping(index: Int): (K, V) = - contents(index) + contents.getUnsafe(index) final def getNode(index: Int): Node[K, V] = throw new IndexOutOfBoundsException("No sub-nodes present in hash-collision leaf node.") - final def add[VV >: V](newKey: K, newKeyHash: Int, newValue: VV, depth: Int): Node[K, VV] = - if (contains(newKey, newKeyHash, depth)) - this - else + final def updated[VV >: V](newKey: K, newKeyHash: Int, newValue: VV, depth: Int): Node[K, VV] = + if (!contains(newKey, newKeyHash, depth)) new CollisionNode(newKeyHash, contents :+ (newKey -> newValue)) + else { + val newContents = contents.filterNot { case (k, _) => hashKey.eqv(newKey, k) } + new CollisionNode[K, VV](collisionHash, NonEmptyVector(newKey -> newValue, newContents)) + } - final override def remove(key: K, keyHash: Int, depth: Int): Node[K, V] = + final override def removed(key: K, keyHash: Int, depth: Int): Node[K, V] = if (!contains(key, keyHash, depth)) this else { val newContents = contents.filterNot { case (k, _) => hashKey.eqv(key, k) } - if (newContents.size > 1) - new CollisionNode(collisionHash, newContents) + if (newContents.lengthCompare(1) > 0) + new CollisionNode(collisionHash, NonEmptyVector.fromVectorUnsafe(newContents)) else { // This is a singleton node so the depth doesn't matter; // we only need to index into it to inline the value in our parent node @@ -485,14 +526,13 @@ object HashMap extends HashMapInstances { val bitPos = Node.bitPosFrom(mask) val newContentsArray = new Array[Any](Node.StrideLength * newContents.length) var i = 0 - while (i < newContents.length) { - val (k, v) = newContents(i) + newContents.foreach { case (k, v) => val keyIndex = Node.StrideLength * i newContentsArray(keyIndex) = k newContentsArray(keyIndex + 1) = v i += 1 } - new BitMapNode[K, V](bitPos, 0, newContentsArray, newContents.size) + new BitMapNode[K, V](bitPos, 0, newContentsArray, newContents.length) } } @@ -501,7 +541,7 @@ object HashMap extends HashMapInstances { that match { case node: CollisionNode[_, _] => (this.collisionHash === node.collisionHash) && - (this.contents.size === node.contents.size) && + (this.contents.length === node.contents.length) && this.contents.forall { case (kl, vl) => node.contents.exists { case (kr, vr) => hashKey.eqv(kl, kr) && eqValue.eqv(vl, vr) } } @@ -514,55 +554,70 @@ object HashMap extends HashMapInstances { final override def equals(that: Any): Boolean = that match { case node: CollisionNode[_, _] => (this.collisionHash == node.collisionHash) && - (this.contents.size == node.contents.size) && - this.contents.forall(node.contents.contains) + (this.contents.length == node.contents.length) && + this.contents.forall(kv => node.contents.exists(_ == kv)) case _ => false } final override def toString(): String = { - s"""CollisionNode(hash=${collisionHash}, values=${contents.mkString("[", ",", "]")})""" + s"""CollisionNode(hash=${collisionHash}, values=${contents.iterator.mkString("[", ",", "]")})""" } } /** - * A CHAMP bitmap node. Stores value element and node element positions in the `contents` array - * in the `valueMap` and `nodeMap` integer bitmaps. + * A CHAMP bitmap node. Stores key-value pair and node positions in the `contents` array in the `keyValueMap` and + * `nodeMap` integer bitmaps respectively. * - * @tparam A the type of the elements contained in this node. - * @param valueMap integer bitmap indicating the notional positions of value elements in the `contents` array. + * The index of an element is calculated from a 5-bit segment of the hash of the key. The segment to use is + * determined according to the depth in the structure, starting with the least significant bits at the root level. + * + * When there are collisions in the 5-bit segment of the hash at the current depth in the structure, a new subnode + * must be created in order to store the colliding elements. In this subnode, the next 5-bit segment is used to + * determine the order of elements. + * + * Key-value pairs are stored at consecutive indices in the array, indexed from the start of the array + * and ordered according to the relative indices calculated from the hash of the key. + * + * Sub-nodes are stored at the end of the array, indexed from the end of the array and ordered according + * to the relative indices calculated from the hash of their keys. As a result of this indexing method + * they are stored in reverse order. + * + * @tparam K the type of the keys contained in this node. + * @tparam V the type of the values contained in this node. + * @param keyValueMap integer bitmap indicating the notional positions of key-value elements in the `contents` array. * @param nodeMap integer bitmap indicating the notional positions of node elements in the `contents` array. * @param contents an array of `A` value elements and `Node[A]` sub-node elements. * @param size the number of value elements in this subtree. */ final private[HashMap] class BitMapNode[K, +V]( - val valueMap: Int, + val keyValueMap: Int, val nodeMap: Int, val contents: Array[Any], val size: Int )(implicit hashKey: Hash[K]) extends Node[K, V] { - final def hasValues: Boolean = - valueMap != 0 + final def hasKeyValues: Boolean = + keyValueMap != 0 final def hasNodes: Boolean = nodeMap != 0 - final def allElements: Int = - valueElements + nodeElements + final def allElementsCount: Int = + keyValueCount + nodeCount - final def valueElements: Int = - Integer.bitCount(valueMap) + final def keyValueCount: Int = + Integer.bitCount(keyValueMap) - final def nodeElements: Int = + final def nodeCount: Int = Integer.bitCount(nodeMap) final private def hasNodeAt(bitPos: Int): Boolean = (nodeMap & bitPos) != 0 - final private def hasValueAt(bitPos: Int): Boolean = - (valueMap & bitPos) != 0 + final private def hasKeyValueAt(bitPos: Int): Boolean = + (keyValueMap & bitPos) != 0 final def getKey(index: Int): K = contents(Node.StrideLength * index).asInstanceOf[K] @@ -570,22 +625,25 @@ object HashMap extends HashMapInstances { final def getValue(index: Int): V = contents(Node.StrideLength * index + 1).asInstanceOf[V] - final def getMapping(index: Int): (K, V) = - contents(Node.StrideLength * index).asInstanceOf[K] -> - contents(Node.StrideLength * index + 1).asInstanceOf[V] + final def getMapping(index: Int): (K, V) = { + val keyValueOffset = Node.StrideLength * index + contents(keyValueOffset).asInstanceOf[K] -> + contents(keyValueOffset + 1).asInstanceOf[V] + } final def getNode(index: Int): Node[K, V] = contents(contents.length - 1 - index).asInstanceOf[Node[K, V]] final def foreach[U](f: (K, V) => U): Unit = { var i = 0 - while (i < valueElements) { - f.tupled(getMapping(i)) + val fnTupled = f.tupled + while (i < keyValueCount) { + fnTupled(getMapping(i)) i += 1 } i = 0 - while (i < nodeElements) { + while (i < nodeCount) { getNode(i).foreach(f) i += 1 } @@ -595,8 +653,8 @@ object HashMap extends HashMapInstances { val mask = Node.maskFrom(keyHash, depth) val bitPos = Node.bitPosFrom(mask) - if (hasValueAt(bitPos)) { - val index = Node.indexFrom(valueMap, bitPos) + if (hasKeyValueAt(bitPos)) { + val index = Node.indexFrom(keyValueMap, bitPos) hashKey.eqv(key, getKey(index)) } else if (hasNodeAt(bitPos)) { val index = Node.indexFrom(nodeMap, bitPos) @@ -610,8 +668,8 @@ object HashMap extends HashMapInstances { val mask = Node.maskFrom(keyHash, depth) val bitPos = Node.bitPosFrom(mask) - if (hasValueAt(bitPos)) { - val index = Node.indexFrom(valueMap, bitPos) + if (hasKeyValueAt(bitPos)) { + val index = Node.indexFrom(keyValueMap, bitPos) if (hashKey.eqv(key, getKey(index))) { Some(getValue(index)) } else { @@ -635,16 +693,16 @@ object HashMap extends HashMapInstances { depth: Int ): Node[K, VV] = { if (depth >= Node.MaxDepth) { - new CollisionNode[K, VV](leftHash, Vector(left -> leftValue, right -> rightValue)) + new CollisionNode[K, VV](leftHash, NonEmptyVector.of(left -> leftValue, right -> rightValue)) } else { val leftMask = Node.maskFrom(leftHash, depth) val rightMask = Node.maskFrom(rightHash, depth) if (leftMask != rightMask) { - val valueMap = Node.bitPosFrom(leftMask) | Node.bitPosFrom(rightMask) + val keyValueMap = Node.bitPosFrom(leftMask) | Node.bitPosFrom(rightMask) if (leftMask < rightMask) { - new BitMapNode[K, VV](valueMap, 0, Array(left, leftValue, right, rightValue), 2) + new BitMapNode[K, VV](keyValueMap, 0, Array(left, leftValue, right, rightValue), 2) } else { - new BitMapNode[K, VV](valueMap, 0, Array(right, rightValue, left, leftValue), 2) + new BitMapNode[K, VV](keyValueMap, 0, Array(right, rightValue, left, leftValue), 2) } } else { val nodeMap = Node.bitPosFrom(leftMask) @@ -665,7 +723,7 @@ object HashMap extends HashMapInstances { depth: Int ): Node[K, VV] = { val newNode = mergeValues(left, leftHash, leftValue, right, rightHash, rightValue, depth) - val valueIndex = Node.StrideLength * Node.indexFrom(valueMap, bitPos) + val valueIndex = Node.StrideLength * Node.indexFrom(keyValueMap, bitPos) val nodeIndex = contents.length - Node.StrideLength - Node.indexFrom(nodeMap, bitPos) val newContents = new Array[Any](contents.length - 1) @@ -682,7 +740,7 @@ object HashMap extends HashMapInstances { contents.length - nodeIndex - Node.StrideLength ) - new BitMapNode[K, V](valueMap ^ bitPos, nodeMap | bitPos, newContents, size + 1) + new BitMapNode[K, V](keyValueMap ^ bitPos, nodeMap | bitPos, newContents, size + 1) } final private def replaceNode[VV >: V](index: Int, oldNode: Node[K, VV], newNode: Node[K, VV]): Node[K, VV] = { @@ -690,7 +748,7 @@ object HashMap extends HashMapInstances { val newContents = new Array[Any](contents.length) System.arraycopy(contents, 0, newContents, 0, contents.length) newContents(targetIndex) = newNode - new BitMapNode[K, V](valueMap, nodeMap, newContents, size + (newNode.size - oldNode.size)) + new BitMapNode[K, V](keyValueMap, nodeMap, newContents, size + (newNode.size - oldNode.size)) } final private def updateNode[VV >: V]( @@ -702,7 +760,7 @@ object HashMap extends HashMapInstances { ): Node[K, VV] = { val index = Node.indexFrom(nodeMap, bitPos) val subNode = getNode(index) - val newSubNode = subNode.add(newKey, newKeyHash, newValue, depth + 1) + val newSubNode = subNode.updated(newKey, newKeyHash, newValue, depth + 1) if (newSubNode eq subNode) this @@ -715,7 +773,7 @@ object HashMap extends HashMapInstances { val newContents = new Array[Any](contents.length) System.arraycopy(contents, 0, newContents, 0, contents.length) newContents(valueIndex) = newValue - new BitMapNode[K, V](valueMap, nodeMap, newContents, size) + new BitMapNode[K, V](keyValueMap, nodeMap, newContents, size) } final private def updateKeyValue[VV >: V]( @@ -725,7 +783,7 @@ object HashMap extends HashMapInstances { newValue: VV, depth: Int ): Node[K, VV] = { - val index = Node.indexFrom(valueMap, bitPos) + val index = Node.indexFrom(keyValueMap, bitPos) val (existingKey, existingValue) = getMapping(index) if (hashKey.eqv(existingKey, newKey)) { replaceValueAtIndex(index, newValue) @@ -743,20 +801,20 @@ object HashMap extends HashMapInstances { } final private def appendKeyValue[VV >: V](bitPos: Int, newKey: K, newValue: VV): Node[K, VV] = { - val index = Node.StrideLength * Node.indexFrom(valueMap, bitPos) + val index = Node.StrideLength * Node.indexFrom(keyValueMap, bitPos) val newContents = new Array[Any](contents.length + Node.StrideLength) System.arraycopy(contents, 0, newContents, 0, index) newContents(index) = newKey newContents(index + 1) = newValue System.arraycopy(contents, index, newContents, index + Node.StrideLength, contents.length - index) - new BitMapNode[K, V](valueMap | bitPos, nodeMap, newContents, size + 1) + new BitMapNode[K, V](keyValueMap | bitPos, nodeMap, newContents, size + 1) } - final def add[VV >: V](newKey: K, newKeyHash: Int, newValue: VV, depth: Int): Node[K, VV] = { + final def updated[VV >: V](newKey: K, newKeyHash: Int, newValue: VV, depth: Int): Node[K, VV] = { val mask = Node.maskFrom(newKeyHash, depth) val bitPos = Node.bitPosFrom(mask) - if (hasValueAt(bitPos)) { + if (hasKeyValueAt(bitPos)) { updateKeyValue(bitPos, newKey, newKeyHash, newValue, depth) } else if (hasNodeAt(bitPos)) { updateNode(bitPos, newKey, newKeyHash, newValue, depth) @@ -766,22 +824,24 @@ object HashMap extends HashMapInstances { } final private def removeKeyValue(bitPos: Int, removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] = { - val index = Node.indexFrom(valueMap, bitPos) + val index = Node.indexFrom(keyValueMap, bitPos) val existingKey = getKey(index) if (!hashKey.eqv(existingKey, removeKey)) { this - } else if (allElements == 1) { + } else if (allElementsCount == 1) { Node.empty[K, V] } else { val keyIndex = Node.StrideLength * index val newContents = new Array[Any](contents.length - Node.StrideLength) - // If this element will be propagated or inlined, calculate the new valueMap at depth - 1 + /* Single-element nodes are inlined. If this element should be propagated or + * inlined once an entry is deleted, calculate the new keyValueMap at depth - 1 + */ val newBitPos = - if (valueElements == 2 && nodeElements == 0 && depth > 0) + if (keyValueCount == 2 && nodeCount == 0 && depth > 0) Node.bitPosFrom(Node.maskFrom(removeKeyHash, depth - 1)) else - valueMap ^ bitPos + keyValueMap ^ bitPos System.arraycopy(contents, 0, newContents, 0, keyIndex) @@ -799,7 +859,7 @@ object HashMap extends HashMapInstances { final private def inlineSubNodeKeyValue[VV >: V](bitPos: Int, newSubNode: Node[K, VV]): Node[K, VV] = { val nodeIndex = contents.length - 1 - Node.indexFrom(nodeMap, bitPos) - val keyIndex = Node.StrideLength * Node.indexFrom(valueMap, bitPos) + val keyIndex = Node.StrideLength * Node.indexFrom(keyValueMap, bitPos) val newContents = new Array[Any](contents.length + 1) val (key, value) = newSubNode.getMapping(0) @@ -818,7 +878,7 @@ object HashMap extends HashMapInstances { contents.length - nodeIndex - 1 ) - new BitMapNode[K, V](valueMap | bitPos, nodeMap ^ bitPos, newContents, size - 1) + new BitMapNode[K, V](keyValueMap | bitPos, nodeMap ^ bitPos, newContents, size - 1) } final private def removeKeyValueFromSubNode( @@ -829,11 +889,11 @@ object HashMap extends HashMapInstances { ): Node[K, V] = { val index = Node.indexFrom(nodeMap, bitPos) val subNode = getNode(index) - val newSubNode = subNode.remove(removeKey, removeKeyHash, depth + 1) + val newSubNode = subNode.removed(removeKey, removeKeyHash, depth + 1) if (newSubNode eq subNode) this - else if (valueElements == 0 && nodeElements == 1) { + else if (keyValueCount == 0 && nodeCount == 1) { if (newSubNode.sizeHint == Node.SizeOne) { newSubNode } else { @@ -846,11 +906,11 @@ object HashMap extends HashMapInstances { } } - final override def remove(removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] = { + final override def removed(removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] = { val mask = Node.maskFrom(removeKeyHash, depth) val bitPos = Node.bitPosFrom(mask) - if (hasValueAt(bitPos)) { + if (hasKeyValueAt(bitPos)) { removeKeyValue(bitPos, removeKey, removeKeyHash, depth) } else if (hasNodeAt(bitPos)) { removeKeyValueFromSubNode(bitPos, removeKey, removeKeyHash, depth) @@ -863,18 +923,18 @@ object HashMap extends HashMapInstances { (this eq that) || { that match { case node: BitMapNode[_, _] => - (this.valueMap === node.valueMap) && + (this.keyValueMap === node.keyValueMap) && (this.nodeMap === node.nodeMap) && (this.size === node.size) && { var i = 0 - while (i < valueElements) { + while (i < keyValueCount) { val (kl, vl) = getMapping(i) val (kr, vr) = node.getMapping(i) if (hashKey.neqv(kl, kr) || eqValue.neqv(vl, vr)) return false i += 1 } i = 0 - while (i < nodeElements) { + while (i < nodeCount) { if (!(getNode(i).===[VV](node.getNode(i)))) return false i += 1 } @@ -889,7 +949,7 @@ object HashMap extends HashMapInstances { final override def equals(that: Any): Boolean = that match { case node: BitMapNode[_, _] => (this eq node) || { - (this.valueMap == node.valueMap) && + (this.keyValueMap == node.keyValueMap) && (this.nodeMap == node.nodeMap) && (this.size == node.size) && Arrays.equals( @@ -902,14 +962,16 @@ object HashMap extends HashMapInstances { } final override def toString(): String = { - val valueMapStr = - ("0" * Integer.numberOfLeadingZeros(if (valueMap != 0) valueMap else 1)) + Integer.toBinaryString(valueMap) + val keyValueMapStr = + ("0" * Integer.numberOfLeadingZeros(if (keyValueMap != 0) keyValueMap else 1)) + Integer.toBinaryString( + keyValueMap + ) val nodeMapStr = ("0" * Integer.numberOfLeadingZeros(if (nodeMap != 0) nodeMap else 1)) + Integer.toBinaryString(nodeMap) val contentsStr = contents.mkString("[", ", ", "]") - s"""BitMapNode(valueMap=$valueMapStr, nodeMap=$nodeMapStr, size=$size, contents=${contentsStr})""" + s"""BitMapNode(keyValueMap=$keyValueMapStr, nodeMap=$nodeMapStr, size=$size, contents=${contentsStr})""" } } @@ -989,7 +1051,7 @@ object HashMap extends HashMapInstances { def this(rootNode: Node[K, V]) = { this() if (rootNode.hasNodes) pushNode(rootNode) - if (rootNode.hasValues) pushValues(rootNode) + if (rootNode.hasKeyValues) pushValues(rootNode) } final private def pushNode(node: Node[K, V]): Unit = { @@ -1001,13 +1063,13 @@ object HashMap extends HashMapInstances { nodeStack(currentDepth) = node nodeIndicesAndLengths(cursorIndex) = 0 - nodeIndicesAndLengths(lengthIndex) = node.nodeElements + nodeIndicesAndLengths(lengthIndex) = node.nodeCount } final private def pushValues(node: Node[K, V]): Unit = { currentNode = node currentValuesIndex = 0 - currentValuesLength = node.valueElements + currentValuesLength = node.keyValueCount } final private def getMoreValues(): Boolean = { @@ -1028,7 +1090,7 @@ object HashMap extends HashMapInstances { pushNode(nextNode) } - if (nextNode.hasValues) { + if (nextNode.hasKeyValues) { pushValues(nextNode) foundMoreValues = true } @@ -1053,70 +1115,6 @@ object HashMap extends HashMapInstances { value } } - - private[HashMap] class ReverseIterator[K, V] extends scala.collection.AbstractIterator[(K, V)] { - private var currentNode: Node[K, V] = null - - private var currentValuesIndex: Int = -1 - - private var currentDepth: Int = -1 - - private val nodeStack: Array[Node[K, V]] = - new Array(Node.MaxDepth + 1) - - private val nodeIndices: Array[Int] = - new Array(Node.MaxDepth + 1) - - def this(rootNode: Node[K, V]) = { - this() - pushNode(rootNode) - getMoreValues() - } - - final private def pushNode(node: Node[K, V]): Unit = { - currentDepth += 1 - nodeStack(currentDepth) = node - nodeIndices(currentDepth) = node.nodeElements - 1 - } - - final private def pushValues(node: Node[K, V]): Unit = { - currentNode = node - currentValuesIndex = node.valueElements - 1 - } - - final private def getMoreValues(): Boolean = { - var foundMoreValues = false - - while (!foundMoreValues && currentDepth >= 0) { - val nodeIndex = nodeIndices(currentDepth) - nodeIndices(currentDepth) -= 1 - - if (nodeIndex >= 0) { - pushNode(nodeStack(currentDepth).getNode(nodeIndex)) - } else { - val currentNode = nodeStack(currentDepth) - currentDepth -= 1 - if (currentNode.hasValues) { - pushValues(currentNode) - foundMoreValues = true - } - } - } - - foundMoreValues - } - - final override def hasNext: Boolean = - (currentValuesIndex >= 0) || getMoreValues() - - final override def next(): (K, V) = { - if (!hasNext) throw new NoSuchElementException - val value = currentNode.getMapping(currentValuesIndex) - currentValuesIndex -= 1 - value - } - } - } sealed abstract private[data] class HashMapInstances extends HashMapInstances1 { @@ -1150,9 +1148,9 @@ sealed abstract private[data] class HashMapInstances extends HashMapInstances1 { Always(G.pure(HashMap.empty[K, V])) val gHashMap = Foldable - .iterateRight(Eval.always(hashMap.iterator), emptyHm) { case ((k, u), hm) => + .iterateRight(() => hashMap.iterator, emptyHm) { case ((k, u), hm) => G.map2Eval(f(u), hm) { (v, map) => - map.add(k, v) + map.updated(k, v) } } @@ -1185,11 +1183,17 @@ class HashMapMonoid[K: Hash, V](implicit V: Semigroup[V]) extends Monoid[HashMap def combine(xs: HashMap[K, V], ys: HashMap[K, V]): HashMap[K, V] = { if (xs.size <= ys.size) { xs.iterator.foldLeft(ys) { case (my, (k, x)) => - my.add(k, Semigroup.maybeCombine(x, my.get(k))) + my.get(k) match { + case Some(y) => my.updated(k, V.combine(x, y)) + case None => my.updated(k, x) + } } } else { ys.iterator.foldLeft(xs) { case (mx, (k, y)) => - mx.add(k, Semigroup.maybeCombine(mx.get(k), y)) + mx.get(k) match { + case Some(x) => mx.updated(k, V.combine(x, y)) + case None => mx.updated(k, y) + } } } } diff --git a/core/src/main/scala/cats/data/NonEmptyCollection.scala b/core/src/main/scala/cats/data/NonEmptyCollection.scala index d48b435b85..72c158a84e 100644 --- a/core/src/main/scala/cats/data/NonEmptyCollection.scala +++ b/core/src/main/scala/cats/data/NonEmptyCollection.scala @@ -40,6 +40,7 @@ private[cats] trait NonEmptyCollection[+A, U[+_], NE[+_]] extends Any { def filter(f: A => Boolean): U[A] def filterNot(p: A => Boolean): U[A] def collect[B](pf: PartialFunction[A, B]): U[B] + def collectFirst[B](pf: PartialFunction[A, B]): Option[B] def find(p: A => Boolean): Option[A] def exists(p: A => Boolean): Boolean def forall(p: A => Boolean): Boolean diff --git a/core/src/main/scala/cats/data/NonEmptyList.scala b/core/src/main/scala/cats/data/NonEmptyList.scala index 863a65f5a4..08e89e8906 100644 --- a/core/src/main/scala/cats/data/NonEmptyList.scala +++ b/core/src/main/scala/cats/data/NonEmptyList.scala @@ -216,6 +216,16 @@ final case class NonEmptyList[+A](head: A, tail: List[A]) extends NonEmptyCollec tail.collect(pf) } + /** + * Find the first element matching the partial function, if one exists + */ + def collectFirst[B](pf: PartialFunction[A, B]): Option[B] = + if (pf.isDefinedAt(head)) { + Some(pf.apply(head)) + } else { + tail.collectFirst(pf) + } + /** * Find the first element matching the predicate, if one exists */ diff --git a/core/src/main/scala/cats/data/NonEmptySeq.scala b/core/src/main/scala/cats/data/NonEmptySeq.scala index 442e430a45..fce3b21f0e 100644 --- a/core/src/main/scala/cats/data/NonEmptySeq.scala +++ b/core/src/main/scala/cats/data/NonEmptySeq.scala @@ -97,6 +97,8 @@ final class NonEmptySeq[+A] private (val toSeq: Seq[A]) extends AnyVal with NonE def collect[B](pf: PartialFunction[A, B]): Seq[B] = toSeq.collect(pf) + def collectFirst[B](pf: PartialFunction[A, B]): Option[B] = toSeq.collectFirst(pf) + /** * Alias for [[concat]] */ diff --git a/core/src/main/scala/cats/data/NonEmptyVector.scala b/core/src/main/scala/cats/data/NonEmptyVector.scala index f5f1527042..1a0a618cd0 100644 --- a/core/src/main/scala/cats/data/NonEmptyVector.scala +++ b/core/src/main/scala/cats/data/NonEmptyVector.scala @@ -99,6 +99,8 @@ final class NonEmptyVector[+A] private (val toVector: Vector[A]) def collect[B](pf: PartialFunction[A, B]): Vector[B] = toVector.collect(pf) + def collectFirst[B](pf: PartialFunction[A, B]): Option[B] = toVector.collectFirst(pf) + /** * Alias for [[concat]] */ diff --git a/kernel/src/main/scala-2.12/cats/kernel/compat/HashCompat.scala b/kernel/src/main/scala-2.12/cats/kernel/compat/HashCompat.scala index 63631d9d47..09c1c9fcb8 100644 --- a/kernel/src/main/scala-2.12/cats/kernel/compat/HashCompat.scala +++ b/kernel/src/main/scala-2.12/cats/kernel/compat/HashCompat.scala @@ -91,7 +91,7 @@ private[kernel] class HashCompat { val h = A.hash(x) a += h b ^= h - if (h != 0) c *= h + c *= h | 1 n += 1 } var h = setSeed diff --git a/kernel/src/main/scala/cats/kernel/instances/MapInstances.scala b/kernel/src/main/scala/cats/kernel/instances/MapInstances.scala index f55ec241b5..d6bd7e2b81 100644 --- a/kernel/src/main/scala/cats/kernel/instances/MapInstances.scala +++ b/kernel/src/main/scala/cats/kernel/instances/MapInstances.scala @@ -103,11 +103,17 @@ class MapMonoid[K, V](implicit V: Semigroup[V]) extends Monoid[Map[K, V]] { def combine(xs: Map[K, V], ys: Map[K, V]): Map[K, V] = if (xs.size <= ys.size) { xs.foldLeft(ys) { case (my, (k, x)) => - my.updated(k, Semigroup.maybeCombine(x, my.get(k))) + my.get(k) match { + case Some(y) => my.updated(k, V.combine(x, y)) + case None => my.updated(k, x) + } } } else { ys.foldLeft(xs) { case (mx, (k, y)) => - mx.updated(k, Semigroup.maybeCombine(mx.get(k), y)) + mx.get(k) match { + case Some(x) => mx.updated(k, V.combine(x, y)) + case None => mx.updated(k, y) + } } } diff --git a/laws/src/main/scala/cats/laws/discipline/arbitrary.scala b/laws/src/main/scala/cats/laws/discipline/arbitrary.scala index 53366c370b..20fbe2d459 100644 --- a/laws/src/main/scala/cats/laws/discipline/arbitrary.scala +++ b/laws/src/main/scala/cats/laws/discipline/arbitrary.scala @@ -420,8 +420,46 @@ object arbitrary extends ArbitraryInstances0 with ScalaVersionSpecific.Arbitrary K: Arbitrary[K], V: Arbitrary[V], hash: Hash[K] - ): Arbitrary[HashMap[K, V]] = - Arbitrary(getArbitrary[List[(K, V)]].map(HashMap.fromSeq(_)(hash))) + ): Arbitrary[HashMap[K, V]] = Arbitrary( + Gen.oneOf( + // empty + Gen.const(HashMap.empty), + // fromSeq + getArbitrary[Seq[(K, V)]].map(HashMap.fromSeq(_)(hash)), + // fromIterableOnce + getArbitrary[Seq[(K, V)]].map(seq => HashMap.fromIterableOnce(seq.view)), + // fromFoldable + getArbitrary[Seq[(K, V)]].map(HashMap.fromFoldable(_)), + // updated + Gen.delay(for { + hm <- getArbitrary[HashMap[K, V]] + (k, v) <- getArbitrary[(K, V)] + } yield hm.updated(k, v)), + // updated existing + Gen.delay(for { + hm <- getArbitrary[HashMap[K, V]] + if hm.nonEmpty + k <- Gen.oneOf(hm.keysIterator.toList) + v <- getArbitrary[V] + } yield hm.updated(k, v)), + // removed + Gen.delay(for { + hm <- getArbitrary[HashMap[K, V]] + k <- getArbitrary[K] + } yield hm.removed(k)), + // removed existing + Gen.delay(for { + hm <- getArbitrary[HashMap[K, V]] + if hm.nonEmpty + k <- Gen.oneOf(hm.keysIterator.toList) + } yield hm.removed(k)), + // concat + Gen.delay(for { + left <- getArbitrary[HashMap[K, V]] + right <- getArbitrary[HashMap[K, V]] + } yield left.concat(right)) + ) + ) implicit def catsLawsCogenForHashSet[K, V](implicit K: Cogen[K], V: Cogen[V]): Cogen[HashMap[K, V]] = Cogen.it[HashMap[K, V], (K, V)](_.iterator) diff --git a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala index e30c0c0111..2454964684 100644 --- a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala +++ b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala @@ -24,32 +24,18 @@ package cats.tests import cats.data.HashMap import cats.kernel.laws.discipline.CommutativeMonoidTests import cats.kernel.laws.discipline.HashTests -import cats.laws.discipline.arbitrary._ -import cats.laws.discipline.UnorderedTraverseTests import cats.laws.discipline.SerializableTests +import cats.laws.discipline.UnorderedTraverseTests +import cats.laws.discipline.arbitrary._ import cats.syntax.eq._ import org.scalacheck.Arbitrary.arbitrary import org.scalacheck.Gen import org.scalacheck.Prop.forAll + import scala.collection.mutable +import scala.util.Random class HashMapSuite extends CatsSuite { - // Examples from https://stackoverflow.com/questions/9406775/why-does-string-hashcode-in-java-have-many-conflicts - val collidingStrings = for { - left <- ('A' to 'Y').toList - first = List(left, 'a').mkString - second = List((left + 1).toChar, 'B').mkString - } yield (first, second) - - val collidingKv = Gen.oneOf(collidingStrings).flatMap { case (leftStr, rightStr) => - for { - leftInt <- arbitrary[Int] - rightInt <- arbitrary[Int] - } yield (leftStr -> leftInt, rightStr -> rightInt) - } - - val collidingKvs = Gen.listOf(collidingKv) - checkAll("HashMap[Int, String]", HashTests[HashMap[Int, String]].hash) checkAll("Hash[HashMap[Int, String]]", SerializableTests.serializable(HashMap.catsDataHashForHashMap[Int, String])) @@ -65,6 +51,34 @@ class HashMapSuite extends CatsSuite { SerializableTests.serializable(HashMap.catsDataCommutativeMonoidForHashMap[Int, Int]) ) + // Based on https://stackoverflow.com/questions/9406775/why-does-string-hashcode-in-java-have-many-conflicts + // We can produce a collision for any string by adding 1 to and subtracting 31 from two consecutive chars. + def collidingString(str: String) = { + if (str.length < 2) + str + else { + val randomOffset = Random.nextInt(str.length - 1) + val firstChar = str.substring(randomOffset).charAt(0) + val secondChar = str.substring(randomOffset).charAt(1) + + if (!Character.isDefined(firstChar + 1) || !Character.isDefined(secondChar - 31)) + str + else + str + .updated(randomOffset, (firstChar + 1).toChar) + .updated(randomOffset + 1, (secondChar - 31).toChar) + } + } + + def genStringKey(hm: HashMap[String, Int]): Gen[String] = + if (hm.isEmpty) + arbitrary[String] + else + Gen.oneOf( + Gen.oneOf(hm.keysIterator.toList).map(collidingString), + arbitrary[String] + ) + // Key-value pairs with the last binding for each distinct key def distinctBindings[K, V](kvs: List[(K, V)]) = kvs.reverse.distinctBy(_._1).reverse @@ -84,15 +98,13 @@ class HashMapSuite extends CatsSuite { } test("===") { - forAll { (kvs: List[(Int, String)]) => - val left = HashMap.fromSeq(kvs) - val right = HashMap.fromSeq(kvs) - assert(left === right) + forAll { (hashMap: HashMap[Int, String]) => + assert(hashMap === hashMap) } - forAll { (leftKvs: List[(Int, String)], rightKvs: List[(Int, String)]) => - val left = HashMap.fromSeq(leftKvs) - val right = HashMap.fromSeq(rightKvs) + forAll { (left: HashMap[Int, String], right: HashMap[Int, String]) => + val leftKvs = distinctBindings(left.iterator.toList) + val rightKvs = distinctBindings(right.iterator.toList) assert((distinctBindings(leftKvs) === distinctBindings(rightKvs)) === (left === right)) } } @@ -102,53 +114,68 @@ class HashMapSuite extends CatsSuite { assert(HashMap(1 -> "a", 2 -> "b", 3 -> "c").size === 3) assert(HashMap("Aa" -> 1, "BB" -> 2).size == 2) - forAll { (kvs: List[(Int, String)]) => - val hashMap = HashMap.fromSeq(kvs) - assert(hashMap.size === distinctBindings(kvs).size) + forAll { (hashMap: HashMap[Int, String]) => + val distinctKvs = distinctBindings(hashMap.iterator.toList) + assert(hashMap.size === distinctKvs.size) } } test("get") { - forAll { (kvs: List[(Int, String)]) => - val hashMap = HashMap.fromSeq(kvs) - distinctBindings(kvs).foreach { case (k, v) => - hashMap.get(k) === Some(v) + forAll { (hashMap: HashMap[Int, String]) => + distinctBindings(hashMap.iterator.toList) + .foreach { case (k, v) => + hashMap.get(k) === Some(v) + } + } + } + + test("updated") { + forAll { (initial: HashMap[String, Int], value: Int) => + forAll(genStringKey(initial)) { (key: String) => + val updated = initial.updated(key, value) + assert(updated.contains(key)) + assert(updated.get(key) === Some(value)) + if (initial.contains(key)) + assert(updated.size === initial.size) + else + assert(updated.size === (initial.size + 1)) + } + } + } + + test("removed") { + forAll { (initial: HashMap[String, Int]) => + forAll(genStringKey(initial)) { (key: String) => + val removed = initial.removed(key) + assert(!removed.contains(key)) + assert(removed.get(key) === None) + if (initial.contains(key)) + assert(removed.size === (initial.size - 1)) + else + assert(removed === initial) } } } test("concat") { - forAll { (leftKvs: List[(Int, String)], rightKvs: List[(Int, String)]) => + forAll { (left: HashMap[Int, String], right: HashMap[Int, String]) => + val leftKvs = left.iterator.toList + val rightKvs = right.iterator.toList val distinctKvs = distinctBindings(leftKvs ++ rightKvs) - val left = HashMap.fromSeq(leftKvs) - val right = HashMap.fromSeq(rightKvs) val both = left.concat(right) assert(both === HashMap.fromSeq(leftKvs ++ rightKvs)) + assert(both.size === distinctKvs.size) distinctKvs.foreach { case (k, v) => assert(both.contains(k)) assert(both.get(k) === Some(v)) + if (right.contains(k)) + assert(both.get(k) === right.get(k)) + else + assert(both.get(k) === left.get(k)) } } } - property("Empty HashMap never contains") { - forAll { (i: Int) => - assert(!HashMap.empty[Int, String].contains(i)) - } - } - - property("Empty HashMap add consistent with contains") { - forAll { (i: Int, s: String) => - assert(HashMap.empty[Int, String].add(i, s).contains(i)) - } - } - - property("Empty HashMap add and remove consistent with contains") { - forAll { (i: Int, s: String) => - assert(!HashMap.empty[Int, String].add(i, s).remove(i).contains(i)) - } - } - property("fromSeq consistent with contains") { forAll { (kvs: List[(Int, String)]) => val hashMap = HashMap.fromSeq(kvs) @@ -159,133 +186,67 @@ class HashMapSuite extends CatsSuite { } } - property("remove consistent with contains") { + property("fromIterableOnce consistent with contains") { forAll { (kvs: List[(Int, String)]) => - val distinctKvs = distinctBindings(kvs) - val hashMap = HashMap.fromSeq(kvs) + val hashMap = HashMap.fromIterableOnce(kvs.view) - distinctKvs.foldLeft(hashMap) { case (hm, (i, _)) => - if (!hm.contains(i)) { - println(hm) - println(i) - println(hm.contains(i)) - } - assert(hm.contains(i)) - assert(!hm.remove(i).contains(i)) - hm.remove(i) + kvs.foreach { case (k, _) => + assert(hashMap.contains(k)) } - - () } } - property("add and remove with collisions consistent with contains") { - forAll(collidingKvs) { (collisions: List[((String, Int), (String, Int))]) => - - val distinctCollisions = collisions.distinctBy(_._1._1) - - val hashMap = distinctCollisions.foldLeft(HashMap.empty[String, Int]) { case (hm, ((lk, lv), (rk, rv))) => - hm.add(lk, lv).add(rk, rv) - } - - distinctCollisions.foldLeft(hashMap) { case (hm, ((lk, _), (rk, _))) => - assert(hm.contains(lk)) - assert(hm.contains(rk)) - - val removeL = hm.remove(lk) - assert(removeL.contains(rk)) - assert(!removeL.contains(lk)) - - val removeR = hm.remove(rk) - assert(removeR.contains(lk)) - assert(!removeR.contains(rk)) - - val removeLR = removeL.remove(rk) - assert(!removeLR.contains(lk)) - assert(!removeLR.contains(rk)) + property("fromFoldable consistent with contains") { + forAll { (kvs: List[(Int, String)]) => + val hashMap = HashMap.fromFoldable(kvs) - removeLR + kvs.foreach { case (k, _) => + assert(hashMap.contains(k)) } - - () } } property("iterator consistent with contains") { - forAll { (kvs: List[(Int, String)]) => - val hashMap = HashMap.fromSeq(kvs) - val distinctKvs = distinctBindings(kvs) - + forAll { (hashMap: HashMap[Int, String]) => val iterated = mutable.ListBuffer[(Int, String)]() hashMap.iterator.foreach { iterated += _ } - - assert(distinctKvs.forall(iterated.contains)) - assert(iterated.forall(distinctKvs.contains)) assert(iterated.toList.distinctBy(_._1) === iterated.toList) assert(iterated.forall { case (k, _) => hashMap.contains(k) }) } } - property("iterator consistent with reverseIterator") { - forAll { (kvs: List[(Int, String)]) => - val hashMap = HashMap.fromSeq(kvs) - - val iterated = mutable.ListBuffer[(Int, String)]() - val reverseIterated = mutable.ListBuffer[(Int, String)]() - hashMap.iterator.foreach { iterated += _ } - hashMap.reverseIterator.foreach { reverseIterated += _ } - - assert(iterated.toList === reverseIterated.toList.reverse) - } - } - property("foreach consistent with contains") { - forAll { (kvs: List[(Int, String)]) => - val hashMap = HashMap.fromSeq(kvs) - val distinctKvs = distinctBindings(kvs) - + forAll { (hashMap: HashMap[Int, String]) => val foreached = mutable.ListBuffer[(Int, String)]() hashMap.foreach { (i: Int, s: String) => foreached += ((i, s)) } - - assert(distinctKvs.forall(foreached.contains)) - assert(foreached.forall(distinctKvs.contains)) assert(foreached.toList.distinct === foreached.toList) assert(foreached.forall { case (k, _) => hashMap.contains(k) }) } } property("foreach and iterator consistent") { - forAll { (kvs: List[(Int, String)]) => - val hashMap = HashMap.fromSeq(kvs) - + forAll { (hashMap: HashMap[Int, String]) => val iterated = mutable.ListBuffer[(Int, String)]() val foreached = mutable.ListBuffer[(Int, String)]() hashMap.iterator.foreach { iterated += _ } hashMap.foreach { (i: Int, s: String) => foreached += ((i, s)) } - assert(foreached.forall(iterated.contains)) assert(iterated.forall(foreached.contains)) } } property("size consistent with iterator") { - forAll { (kvs: List[(Int, String)]) => - val hashMap = HashMap.fromSeq(kvs) - + forAll { (hashMap: HashMap[Int, String]) => var size = 0 hashMap.iterator.foreach { _ => size += 1 } - assert(hashMap.size === size) } } property("size consistent with foreach") { - forAll { (kvs: List[(Int, String)]) => - val hashMap = HashMap.fromSeq(kvs) - + forAll { (hashMap: HashMap[Int, String]) => var size = 0 hashMap.foreach { case _ => size += 1 } - assert(hashMap.size === size) } } From 5c15c530a69d2c718ffdcb9850f88f7e1789445d Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 00:19:31 +0100 Subject: [PATCH 14/28] Add a test for collectFirst to NonEmptyCollectionSuite --- laws/src/main/scala/cats/laws/discipline/arbitrary.scala | 2 +- .../src/test/scala/cats/tests/NonEmptyCollectionSuite.scala | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/laws/src/main/scala/cats/laws/discipline/arbitrary.scala b/laws/src/main/scala/cats/laws/discipline/arbitrary.scala index 20fbe2d459..105f2960ff 100644 --- a/laws/src/main/scala/cats/laws/discipline/arbitrary.scala +++ b/laws/src/main/scala/cats/laws/discipline/arbitrary.scala @@ -461,7 +461,7 @@ object arbitrary extends ArbitraryInstances0 with ScalaVersionSpecific.Arbitrary ) ) - implicit def catsLawsCogenForHashSet[K, V](implicit K: Cogen[K], V: Cogen[V]): Cogen[HashMap[K, V]] = + implicit def catsLawsCogenForHashMap[K, V](implicit K: Cogen[K], V: Cogen[V]): Cogen[HashMap[K, V]] = Cogen.it[HashMap[K, V], (K, V)](_.iterator) } diff --git a/tests/shared/src/test/scala/cats/tests/NonEmptyCollectionSuite.scala b/tests/shared/src/test/scala/cats/tests/NonEmptyCollectionSuite.scala index aab0d423c5..b7a3bdb3f4 100644 --- a/tests/shared/src/test/scala/cats/tests/NonEmptyCollectionSuite.scala +++ b/tests/shared/src/test/scala/cats/tests/NonEmptyCollectionSuite.scala @@ -103,6 +103,12 @@ abstract class NonEmptyCollectionSuite[U[+_], NE[+_], NEC[x] <: NonEmptyCollecti } } + test("collectFirst is consistent with iterator.toList.collectFirst") { + forAll { (is: NE[Int], pf: PartialFunction[Int, String]) => + assert(is.collectFirst(pf) === is.iterator.toList.collectFirst(pf)) + } + } + test("find is consistent with iterator.toList.find") { forAll { (is: NE[Int], pred: Int => Boolean) => assert(is.find(pred) === (is.iterator.toList.find(pred))) From 0169ce321e858839add97b5d2c4f2f77e1976a0f Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 01:03:37 +0100 Subject: [PATCH 15/28] Fix key-value bitmap calculation when subnodes are propagated as the new root node --- core/src/main/scala/cats/data/HashMap.scala | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index d9c5b96e74..597d0f2543 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -834,12 +834,15 @@ object HashMap extends HashMapInstances { val keyIndex = Node.StrideLength * index val newContents = new Array[Any](contents.length - Node.StrideLength) - /* Single-element nodes are inlined. If this element should be propagated or - * inlined once an entry is deleted, calculate the new keyValueMap at depth - 1 + /* Single-element nodes are always inlined unless they reach the root level. + * + * If the node is inlined the keyValueMap is not used, so we calculate the new + * keyValueMap at root level just in case this node is propagated as the new + * root node. */ - val newBitPos = + val newKeyValueMap = if (keyValueCount == 2 && nodeCount == 0 && depth > 0) - Node.bitPosFrom(Node.maskFrom(removeKeyHash, depth - 1)) + Node.bitPosFrom(Node.maskFrom(removeKeyHash, depth = 0)) else keyValueMap ^ bitPos @@ -853,7 +856,7 @@ object HashMap extends HashMapInstances { contents.length - keyIndex - Node.StrideLength ) - new BitMapNode[K, V](newBitPos, nodeMap, newContents, size - 1) + new BitMapNode[K, V](newKeyValueMap, nodeMap, newContents, size - 1) } } From b292134f8cae8b98d4fd64a14fc459e1848deea7 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 13:08:55 +0100 Subject: [PATCH 16/28] Add tests for consistency between String representation and equality --- .../test/scala/cats/tests/HashMapSuite.scala | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala index 2454964684..53e9b164df 100644 --- a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala +++ b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala @@ -250,4 +250,22 @@ class HashMapSuite extends CatsSuite { assert(hashMap.size === size) } } + + property("show consistent with ===") { + forAll { (left: HashMap[Int, String], right: HashMap[Int, String]) => + if (left.show === right.show) + assert(left === right) + else + assert(left =!= right) + } + } + + property("toString consistent with equals") { + forAll { (left: HashMap[Int, String], right: HashMap[Int, String]) => + if (left.toString == right.toString) + assertEquals(left, right) + else + assertNotEquals(left, right) + } + } } From 27a69d34150dea144e1000ad6951003058408fd0 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 13:18:48 +0100 Subject: [PATCH 17/28] Reduce duplication in 2.12 concat, remove an unnecessary branch --- .../main/scala-2.12/cats/compat/HashMapCompat.scala | 12 ++---------- core/src/main/scala/cats/data/HashMap.scala | 6 +----- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala index 038fcc8546..3e7767d122 100644 --- a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala +++ b/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala @@ -48,14 +48,6 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => * @param traversable the collection of key-value pairs to be added. * @return a new map that contains all key-value pairs of this map and `traversable`. */ - final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = { - val newRootNode = hm.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, 0) - } - - if (newRootNode eq self.rootNode) - this - else - new HashMap(newRootNode) - } + final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = + concat(hm.iterator) } diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 597d0f2543..1f16db11c7 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -158,11 +158,7 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No final def updated[VV >: V](key: K, value: VV): HashMap[K, VV] = { val keyHash = improve(hashKey.hash(key)) val newRootNode = rootNode.updated(key, keyHash, value, 0) - - if (newRootNode eq rootNode) - this - else - new HashMap(newRootNode) + new HashMap(newRootNode) } /** From 6203e1a193e0f63379d94e79064e1953311037d9 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 19:18:46 +0100 Subject: [PATCH 18/28] Weaken show / equality laws due to collisions --- tests/shared/src/test/scala/cats/tests/HashMapSuite.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala index 53e9b164df..1ed855cca6 100644 --- a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala +++ b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala @@ -255,8 +255,6 @@ class HashMapSuite extends CatsSuite { forAll { (left: HashMap[Int, String], right: HashMap[Int, String]) => if (left.show === right.show) assert(left === right) - else - assert(left =!= right) } } @@ -264,8 +262,6 @@ class HashMapSuite extends CatsSuite { forAll { (left: HashMap[Int, String], right: HashMap[Int, String]) => if (left.toString == right.toString) assertEquals(left, right) - else - assertNotEquals(left, right) } } } From 0f663332f848ae4720513bbf57a6c4ea481ae2b0 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 22:00:49 +0100 Subject: [PATCH 19/28] Add a toMap operation that returns a Map wrapper for HashMap --- .../cats/{compat => data}/HashMapCompat.scala | 0 .../cats/data/HashMapCompatCompanion.scala | 28 +++++++++++++++++ .../cats/data/HashMapCompatCompanion.scala | 31 +++++++++++++++++++ core/src/main/scala/cats/data/HashMap.scala | 18 +++++++++-- .../test/scala/cats/tests/HashMapSuite.scala | 8 +++++ 5 files changed, 82 insertions(+), 3 deletions(-) rename core/src/main/scala-2.12/cats/{compat => data}/HashMapCompat.scala (100%) create mode 100644 core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala create mode 100644 core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala diff --git a/core/src/main/scala-2.12/cats/compat/HashMapCompat.scala b/core/src/main/scala-2.12/cats/data/HashMapCompat.scala similarity index 100% rename from core/src/main/scala-2.12/cats/compat/HashMapCompat.scala rename to core/src/main/scala-2.12/cats/data/HashMapCompat.scala diff --git a/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala b/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala new file mode 100644 index 0000000000..fee2b6ba47 --- /dev/null +++ b/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala @@ -0,0 +1,28 @@ +package cats.data + +import scala.collection.immutable.AbstractMap +import scala.util.hashing.MurmurHash3 + +private[data] trait HashMapCompatCompanion { + private[data] class WrappedHashMap[K, V](private[WrappedHashMap] val hashMap: HashMap[K, V]) extends AbstractMap[K, V] { + final def iterator: collection.Iterator[(K, V)] = hashMap.iterator + final def get(key: K): Option[V] = hashMap.get(key) + final def +[V1 >: V](kv: (K, V1)): Map[K, V1] = new WrappedHashMap(hashMap.updated(kv._1, kv._2)) + final def -(key: K): Map[K, V] = new WrappedHashMap(hashMap.removed(key)) + final override def size: Int = hashMap.size + final override def contains(key: K): Boolean = hashMap.contains(key) + final override def foreach[U](f: ((K, V)) => U): Unit = hashMap.foreach(Function.untupled(f)) + final override def getOrElse[V1 >: V](key: K, default: => V1): V1 = hashMap.getOrElse(key, default) + final override def keysIterator: Iterator[K] = hashMap.keysIterator + final override def valuesIterator: Iterator[V] = hashMap.valuesIterator + final override def isEmpty: Boolean = hashMap.isEmpty + final override def nonEmpty: Boolean = hashMap.nonEmpty + final override def hashCode: Int = MurmurHash3.mapHash(this) + final override def equals(that: Any): Boolean = that match { + case map: WrappedHashMap[_, _] => + (this eq map) || (this.hashMap == map.hashMap) + case other => + super.equals(other) + } + } +} diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala new file mode 100644 index 0000000000..4cead5073e --- /dev/null +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala @@ -0,0 +1,31 @@ +package cats.data + +import scala.collection.immutable.AbstractMap +import scala.util.hashing.MurmurHash3 + +private[data] trait HashMapCompatCompanion { + private[data] class WrappedHashMap[K, V](private[WrappedHashMap] val hashMap: HashMap[K, V]) extends AbstractMap[K, V] { + final def iterator: collection.Iterator[(K, V)] = hashMap.iterator + final def get(key: K): Option[V] = hashMap.get(key) + final def updated[V1 >: V](key: K, value: V1): Map[K, V1] = new WrappedHashMap(hashMap.updated(key, value)) + final def removed(key: K): Map[K, V] = new WrappedHashMap(hashMap.removed(key)) + final override def size: Int = hashMap.size + final override def knownSize: Int = hashMap.size + final override def contains(key: K): Boolean = hashMap.contains(key) + final override def foreach[U](f: ((K, V)) => U): Unit = hashMap.foreach(Function.untupled(f)) + final override def getOrElse[V1 >: V](key: K, default: => V1): V1 = hashMap.getOrElse(key, default) + final override def keysIterator: Iterator[K] = hashMap.keysIterator + final override def valuesIterator: Iterator[V] = hashMap.valuesIterator + final override def isEmpty: Boolean = hashMap.isEmpty + final override def nonEmpty: Boolean = hashMap.nonEmpty + final override def concat[V2 >: V](suffix: IterableOnce[(K, V2)]): Map[K, V2] = + new WrappedHashMap(hashMap.concat(suffix)) + final override def hashCode: Int = MurmurHash3.mapHash(this) + final override def equals(that: Any): Boolean = that match { + case map: WrappedHashMap[_, _] => + (this eq map) || (this.hashMap == map.hashMap) + case other => + super.equals(other) + } + } +} diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 1f16db11c7..3cef2eea5f 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -60,6 +60,7 @@ import cats.syntax.eq._ import java.util.Arrays import HashMap.improve +import HashMap.WrappedHashMap /** * An immutable hash map using [[cats.kernel.Hash]] for hashing. @@ -90,6 +91,14 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No final def keysIterator: Iterator[K] = iterator.map { case (k, _) => k } + /** + * An iterator for the values of this map that can be used only once. + * + * @return an iterator that iterates through the keys of this map. + */ + final def valuesIterator: Iterator[V] = + iterator.map { case (_, v) => v } + /** * The size of this map. * @@ -177,6 +186,9 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No new HashMap(newRootNode) } + final def toMap: collection.immutable.Map[K, V] = + new WrappedHashMap(this) + /** * Typesafe equality operator. * @@ -233,7 +245,7 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No iterator.map { case (k, v) => s"$k -> $v" }.mkString("HashMap(", ", ", ")") } -object HashMap extends HashMapInstances { +object HashMap extends HashMapInstances with HashMapCompatCompanion { final private[data] def improve(hash: Int): Int = scala.util.hashing.byteswap32(hash) @@ -275,7 +287,7 @@ object HashMap extends HashMapInstances { * * @param iterable the iterable source of elements to add to the [[cats.data.HashMap]]. * @param hashKey the [[cats.kernel.Hash]] instance used for hashing values. - * @return a new [[cats.data.HashMap]] which contains all elements of `seq`. + * @return a new [[cats.data.HashMap]] which contains all elements of `iterable`. */ final def fromIterableOnce[K, V](iterable: IterableOnce[(K, V)])(implicit hashKey: Hash[K]): HashMap[K, V] = { iterable match { @@ -295,7 +307,7 @@ object HashMap extends HashMapInstances { * @param fkv the [[cats.Foldable]] structure of elements to add to the [[cats.data.HashMap]]. * @param F the [[cats.Foldable]] instance used for folding the structure. * @param hashKey the [[cats.kernel.Hash]] instance used for hashing values. - * @return a new [[cats.data.HashMap]] which contains all elements of `seq`. + * @return a new [[cats.data.HashMap]] which contains all elements of `fkv`. */ final def fromFoldable[F[_], K, V](fkv: F[(K, V)])(implicit F: Foldable[F], hashKey: Hash[K]): HashMap[K, V] = { val rootNode = F.foldLeft(fkv, Node.empty[K, V]) { case (node, (k, v)) => diff --git a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala index 1ed855cca6..ac5178d593 100644 --- a/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala +++ b/tests/shared/src/test/scala/cats/tests/HashMapSuite.scala @@ -264,4 +264,12 @@ class HashMapSuite extends CatsSuite { assertEquals(left, right) } } + + property("toMap consistent with fromIterableOnce") { + forAll { (scalaMap: Map[Int, String]) => + val hashMap = HashMap.fromIterableOnce(scalaMap) + val wrappedHashMap = hashMap.toMap + assertEquals(scalaMap, wrappedHashMap) + } + } } From a4732f28dc2b47dec35815bdddd111e623fdcdf9 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 22:06:24 +0100 Subject: [PATCH 20/28] Add copyright headers for WrappedHashMap --- .../cats/data/HashMapCompatCompanion.scala | 24 ++++++++++++++++++- .../cats/data/HashMapCompatCompanion.scala | 24 ++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala b/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala index fee2b6ba47..82ff812efe 100644 --- a/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala +++ b/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala @@ -1,10 +1,32 @@ +/* + * Copyright (c) 2015 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package cats.data import scala.collection.immutable.AbstractMap import scala.util.hashing.MurmurHash3 private[data] trait HashMapCompatCompanion { - private[data] class WrappedHashMap[K, V](private[WrappedHashMap] val hashMap: HashMap[K, V]) extends AbstractMap[K, V] { + private[data] class WrappedHashMap[K, V](private[WrappedHashMap] val hashMap: HashMap[K, V]) + extends AbstractMap[K, V] { final def iterator: collection.Iterator[(K, V)] = hashMap.iterator final def get(key: K): Option[V] = hashMap.get(key) final def +[V1 >: V](kv: (K, V1)): Map[K, V1] = new WrappedHashMap(hashMap.updated(kv._1, kv._2)) diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala index 4cead5073e..c5f77965bf 100644 --- a/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala @@ -1,10 +1,32 @@ +/* + * Copyright (c) 2015 Typelevel + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package cats.data import scala.collection.immutable.AbstractMap import scala.util.hashing.MurmurHash3 private[data] trait HashMapCompatCompanion { - private[data] class WrappedHashMap[K, V](private[WrappedHashMap] val hashMap: HashMap[K, V]) extends AbstractMap[K, V] { + private[data] class WrappedHashMap[K, V](private[WrappedHashMap] val hashMap: HashMap[K, V]) + extends AbstractMap[K, V] { final def iterator: collection.Iterator[(K, V)] = hashMap.iterator final def get(key: K): Option[V] = hashMap.get(key) final def updated[V1 >: V](key: K, value: V1): Map[K, V1] = new WrappedHashMap(hashMap.updated(key, value)) From ea5c7de0ba82e52724d93b34ba6295e9dcfe032f Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 22:14:49 +0100 Subject: [PATCH 21/28] Avoid allocating tuple in HashMapBench --- bench/src/main/scala-2.12/cats/bench/HashMapBench.scala | 2 +- bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala | 2 +- core/src/main/scala/cats/data/HashMap.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala index bdeb9125a1..ee0df0207d 100644 --- a/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala +++ b/bench/src/main/scala-2.12/cats/bench/HashMapBench.scala @@ -76,7 +76,7 @@ class HashMapBench { var ss = scalaMap var i = 0 while (i < 1000) { - ss += (-i.toLong -> i) + ss = ss.updated(-i.toLong, i) i += 1 } bh.consume(ss) diff --git a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala index 250033f811..c123860a86 100644 --- a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala +++ b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala @@ -76,7 +76,7 @@ class HashMapBench { var ss = scalaMap var i = 0 while (i < 1000) { - ss += (-i.toLong -> i) + ss = ss.updated(-i.toLong, i) i += 1 } bh.consume(ss) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 3cef2eea5f..50e4f5be54 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -94,7 +94,7 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No /** * An iterator for the values of this map that can be used only once. * - * @return an iterator that iterates through the keys of this map. + * @return an iterator that iterates through the values of this map. */ final def valuesIterator: Iterator[V] = iterator.map { case (_, v) => v } From 7cc3ade50c832af266ad24790c8fa282fba1c5e3 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 17 May 2022 22:23:41 +0100 Subject: [PATCH 22/28] Address review comments --- .../main/scala-2.12/cats/data/HashMapCompatCompanion.scala | 4 ++-- .../main/scala-2.13+/cats/data/HashMapCompatCompanion.scala | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala b/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala index 82ff812efe..01ca3e19c8 100644 --- a/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala +++ b/core/src/main/scala-2.12/cats/data/HashMapCompatCompanion.scala @@ -39,10 +39,10 @@ private[data] trait HashMapCompatCompanion { final override def valuesIterator: Iterator[V] = hashMap.valuesIterator final override def isEmpty: Boolean = hashMap.isEmpty final override def nonEmpty: Boolean = hashMap.nonEmpty - final override def hashCode: Int = MurmurHash3.mapHash(this) + final override def hashCode: Int = hashMap.hashCode final override def equals(that: Any): Boolean = that match { case map: WrappedHashMap[_, _] => - (this eq map) || (this.hashMap == map.hashMap) + this.hashMap == map.hashMap case other => super.equals(other) } diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala index c5f77965bf..722e510262 100644 --- a/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompatCompanion.scala @@ -42,10 +42,10 @@ private[data] trait HashMapCompatCompanion { final override def nonEmpty: Boolean = hashMap.nonEmpty final override def concat[V2 >: V](suffix: IterableOnce[(K, V2)]): Map[K, V2] = new WrappedHashMap(hashMap.concat(suffix)) - final override def hashCode: Int = MurmurHash3.mapHash(this) + final override def hashCode: Int = hashMap.hashCode final override def equals(that: Any): Boolean = that match { case map: WrappedHashMap[_, _] => - (this eq map) || (this.hashMap == map.hashMap) + this.hashMap == map.hashMap case other => super.equals(other) } From d25015f93c62754e74521f032864e4cab2c7ad8b Mon Sep 17 00:00:00 2001 From: David Gregory Date: Mon, 23 May 2022 11:40:56 +0100 Subject: [PATCH 23/28] Address further review comments: * Reduce allocations by implementing Node#getMapping * Improve performance of CollisionNode by making better use of Vector operations * Improve performance of HashMap#concat by iterating over the smaller HashMap --- .../scala-2.12/cats/data/HashMapCompat.scala | 20 +- .../scala-2.13+/cats/data/HashMapCompat.scala | 11 +- core/src/main/scala/cats/data/HashMap.scala | 172 +++++++++++------- 3 files changed, 130 insertions(+), 73 deletions(-) diff --git a/core/src/main/scala-2.12/cats/data/HashMapCompat.scala b/core/src/main/scala-2.12/cats/data/HashMapCompat.scala index 3e7767d122..9acef91791 100644 --- a/core/src/main/scala-2.12/cats/data/HashMapCompat.scala +++ b/core/src/main/scala-2.12/cats/data/HashMapCompat.scala @@ -33,7 +33,7 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](traversable: TraversableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = traversable.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, 0) + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) } if (newRootNode eq self.rootNode) @@ -48,6 +48,20 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => * @param traversable the collection of key-value pairs to be added. * @return a new map that contains all key-value pairs of this map and `traversable`. */ - final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = - concat(hm.iterator) + final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = { + val newRootNode = if (self.size <= hm.size) { + self.iterator.foldLeft(hm.rootNode) { case (node, (k, v)) => + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = false, 0) + } + } else { + hm.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) + } + } + + if (newRootNode eq self.rootNode) + this + else + new HashMap(newRootNode) + } } diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala index 71a2fcbe9d..854fe4beb2 100644 --- a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala @@ -34,8 +34,15 @@ private[data] trait HashMapCompat[K, +V] extends IterableOnce[(K, V)] { self: Ha * @return a new map that contains all key-value pairs of this map and `iterable`. */ final def concat[VV >: V](iterable: IterableOnce[(K, VV)]): HashMap[K, VV] = { - val newRootNode = iterable.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, 0) + val newRootNode = iterable match { + case hm: HashMap[K, V] @unchecked if self.size <= hm.size => + self.iterator.foldLeft(hm.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = false, 0) + } + case _ => + iterable.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) + } } if (newRootNode eq self.rootNode) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 50e4f5be54..5c59c504b5 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -166,7 +166,7 @@ final class HashMap[K, +V] private[data] (private[data] val rootNode: HashMap.No */ final def updated[VV >: V](key: K, value: VV): HashMap[K, VV] = { val keyHash = improve(hashKey.hash(key)) - val newRootNode = rootNode.updated(key, keyHash, value, 0) + val newRootNode = rootNode.updated(key, keyHash, value, replaceExisting = true, depth = 0) new HashMap(newRootNode) } @@ -277,7 +277,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { */ final def fromSeq[K, V](seq: Seq[(K, V)])(implicit hashKey: Hash[K]): HashMap[K, V] = { val rootNode = seq.foldLeft(Node.empty[K, V]) { case (node, (k, v)) => - node.updated(k, improve(hashKey.hash(k)), v, 0) + node.updated(k, improve(hashKey.hash(k)), v, replaceExisting = true, depth = 0) } new HashMap(rootNode) } @@ -295,7 +295,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { fromSeq(seq) case notSeq => val rootNode = notSeq.iterator.foldLeft(Node.empty[K, V]) { case (node, (k, v)) => - node.updated(k, improve(hashKey.hash(k)), v, 0) + node.updated(k, improve(hashKey.hash(k)), v, replaceExisting = true, depth = 0) } new HashMap(rootNode) } @@ -311,7 +311,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { */ final def fromFoldable[F[_], K, V](fkv: F[(K, V)])(implicit F: Foldable[F], hashKey: Hash[K]): HashMap[K, V] = { val rootNode = F.foldLeft(fkv, Node.empty[K, V]) { case (node, (k, v)) => - node.updated(k, improve(hashKey.hash(k)), v, 0) + node.updated(k, improve(hashKey.hash(k)), v, replaceExisting = true, depth = 0) } new HashMap(rootNode) } @@ -350,12 +350,6 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { */ def getValue(index: Int): V - /** - * @param index the index of the value among the value elements of this trie node. - * @return the key-value element at the provided `index`. - */ - def getMapping(index: Int): (K, V) - /** * @param index the index of the node among the node elements of this trie node. * @return the node element at the provided `index`. @@ -405,10 +399,11 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { * @param newKey the key to add. * @param newKeyHash the hash of the key to add. * @param value the value to add. + * @param replaceExisting whether to replace the existing value if a matching key already exists. * @param depth the 0-indexed depth in the trie structure. * @return a new [[HashMap.Node]] containing the element to add. */ - def updated[VV >: V](newKey: K, newKeyHash: Int, value: VV, depth: Int): Node[K, VV] + def updated[VV >: V](newKey: K, newKeyHash: Int, value: VV, replaceExisting: Boolean, depth: Int): Node[K, VV] /** * The current trie node updated to remove the provided key. @@ -506,43 +501,69 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { final def getValue(index: Int): V = contents.getUnsafe(index)._2 - final def getMapping(index: Int): (K, V) = - contents.getUnsafe(index) - final def getNode(index: Int): Node[K, V] = throw new IndexOutOfBoundsException("No sub-nodes present in hash-collision leaf node.") - final def updated[VV >: V](newKey: K, newKeyHash: Int, newValue: VV, depth: Int): Node[K, VV] = - if (!contains(newKey, newKeyHash, depth)) + final def updated[VV >: V]( + newKey: K, + newKeyHash: Int, + newValue: VV, + replaceExisting: Boolean, + depth: Int + ): Node[K, VV] = { + val keyIndex = contents.toVector.indexWhere { case (k, _) => hashKey.eqv(newKey, k) } + if (keyIndex < 0) new CollisionNode(newKeyHash, contents :+ (newKey -> newValue)) - else { - val newContents = contents.filterNot { case (k, _) => hashKey.eqv(newKey, k) } - new CollisionNode[K, VV](collisionHash, NonEmptyVector(newKey -> newValue, newContents)) + else if (!replaceExisting) { + this + } else { + val newContents = contents.updatedUnsafe(keyIndex, (newKey, newValue)) + new CollisionNode[K, VV](collisionHash, newContents) } + } - final override def removed(key: K, keyHash: Int, depth: Int): Node[K, V] = - if (!contains(key, keyHash, depth)) + final override def removed(key: K, keyHash: Int, depth: Int): Node[K, V] = { + val keyIndex = contents.toVector.indexWhere { case (k, _) => hashKey.eqv(key, k) } + if (keyIndex < 0) + // The key was not found this - else { - val newContents = contents.filterNot { case (k, _) => hashKey.eqv(key, k) } - if (newContents.lengthCompare(1) > 0) - new CollisionNode(collisionHash, NonEmptyVector.fromVectorUnsafe(newContents)) - else { - // This is a singleton node so the depth doesn't matter; - // we only need to index into it to inline the value in our parent node - val mask = Node.maskFrom(collisionHash, 0) - val bitPos = Node.bitPosFrom(mask) - val newContentsArray = new Array[Any](Node.StrideLength * newContents.length) - var i = 0 - newContents.foreach { case (k, v) => - val keyIndex = Node.StrideLength * i - newContentsArray(keyIndex) = k - newContentsArray(keyIndex + 1) = v - i += 1 + else if (contents.toVector.lengthCompare(2) == 0) { + // There will no longer be any collisions once the key is removed + val keepIndex = ~keyIndex + // This is a singleton node so the depth doesn't matter; + // we only need to index into it to inline the value in our parent node + val mask = Node.maskFrom(collisionHash, depth = 0) + val bitPos = Node.bitPosFrom(mask) + val newContentsArray = new Array[Any](Node.StrideLength) + val (key, value) = contents.getUnsafe(keepIndex) + newContentsArray(0) = key + newContentsArray(1) = value + new BitMapNode[K, V](bitPos, 0, newContentsArray, 1) + } else { + if (keyIndex == 0) { + // We're removing the first item + new CollisionNode(collisionHash, NonEmptyVector.fromVectorUnsafe(contents.tail)) + } else { + val newSize = contents.toVector.size - 1 + if (keyIndex == newSize) { + // We're removing the last item + new CollisionNode(collisionHash, NonEmptyVector.fromVectorUnsafe(contents.init)) + } else { + // We're removing an item somewhere in the middle + val builder = Vector.newBuilder[(K, V)] + builder.sizeHint(newSize) + var i = 0 + val iterator = contents.iterator + while (iterator.hasNext) { + val kv = iterator.next() + if (i != keyIndex) builder += kv + i += 1 + } + new CollisionNode(collisionHash, NonEmptyVector.fromVectorUnsafe(builder.result())) } - new BitMapNode[K, V](bitPos, 0, newContentsArray, newContents.length) } } + } final def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean = { (this eq that) || { @@ -633,20 +654,13 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { final def getValue(index: Int): V = contents(Node.StrideLength * index + 1).asInstanceOf[V] - final def getMapping(index: Int): (K, V) = { - val keyValueOffset = Node.StrideLength * index - contents(keyValueOffset).asInstanceOf[K] -> - contents(keyValueOffset + 1).asInstanceOf[V] - } - final def getNode(index: Int): Node[K, V] = contents(contents.length - 1 - index).asInstanceOf[Node[K, V]] final def foreach[U](f: (K, V) => U): Unit = { var i = 0 - val fnTupled = f.tupled while (i < keyValueCount) { - fnTupled(getMapping(i)) + f(getKey(i), getValue(i)) i += 1 } @@ -764,11 +778,12 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { newKey: K, newKeyHash: Int, newValue: VV, + replaceExisting: Boolean, depth: Int ): Node[K, VV] = { val index = Node.indexFrom(nodeMap, bitPos) val subNode = getNode(index) - val newSubNode = subNode.updated(newKey, newKeyHash, newValue, depth + 1) + val newSubNode = subNode.updated(newKey, newKeyHash, newValue, replaceExisting, depth + 1) if (newSubNode eq subNode) this @@ -789,12 +804,18 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { newKey: K, newKeyHash: Int, newValue: VV, + replaceExisting: Boolean, depth: Int ): Node[K, VV] = { val index = Node.indexFrom(keyValueMap, bitPos) - val (existingKey, existingValue) = getMapping(index) - if (hashKey.eqv(existingKey, newKey)) { - replaceValueAtIndex(index, newValue) + val existingKey = getKey(index) + val existingValue = getValue(index) + val hasMatchingKey = hashKey.eqv(existingKey, newKey) + if (hasMatchingKey) { + if (replaceExisting) + replaceValueAtIndex(index, newValue) + else + this } else mergeValuesIntoNode( bitPos, @@ -818,14 +839,20 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { new BitMapNode[K, V](keyValueMap | bitPos, nodeMap, newContents, size + 1) } - final def updated[VV >: V](newKey: K, newKeyHash: Int, newValue: VV, depth: Int): Node[K, VV] = { + final def updated[VV >: V]( + newKey: K, + newKeyHash: Int, + newValue: VV, + replaceExisting: Boolean, + depth: Int + ): Node[K, VV] = { val mask = Node.maskFrom(newKeyHash, depth) val bitPos = Node.bitPosFrom(mask) if (hasKeyValueAt(bitPos)) { - updateKeyValue(bitPos, newKey, newKeyHash, newValue, depth) + updateKeyValue(bitPos, newKey, newKeyHash, newValue, replaceExisting, depth) } else if (hasNodeAt(bitPos)) { - updateNode(bitPos, newKey, newKeyHash, newValue, depth) + updateNode(bitPos, newKey, newKeyHash, newValue, replaceExisting, depth) } else { appendKeyValue(bitPos, newKey, newValue) } @@ -872,7 +899,8 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { val nodeIndex = contents.length - 1 - Node.indexFrom(nodeMap, bitPos) val keyIndex = Node.StrideLength * Node.indexFrom(keyValueMap, bitPos) val newContents = new Array[Any](contents.length + 1) - val (key, value) = newSubNode.getMapping(0) + val key = newSubNode.getKey(0) + val value = newSubNode.getValue(0) System.arraycopy(contents, 0, newContents, 0, keyIndex) @@ -939,9 +967,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { (this.size === node.size) && { var i = 0 while (i < keyValueCount) { - val (kl, vl) = getMapping(i) - val (kr, vr) = node.getMapping(i) - if (hashKey.neqv(kl, kr) || eqValue.neqv(vl, vr)) return false + if (hashKey.neqv(getKey(i), node.getKey(i)) || eqValue.neqv(getValue(i), node.getValue(i))) return false i += 1 } i = 0 @@ -1121,9 +1147,10 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { final override def next(): (K, V) = { if (!hasNext) throw new NoSuchElementException - val value = currentNode.getMapping(currentValuesIndex) + val key = currentNode.getKey(currentValuesIndex) + val value = currentNode.getValue(currentValuesIndex) currentValuesIndex += 1 - value + (key, value) } } } @@ -1192,20 +1219,29 @@ class HashMapMonoid[K: Hash, V](implicit V: Semigroup[V]) extends Monoid[HashMap def empty: HashMap[K, V] = HashMap.empty[K, V] def combine(xs: HashMap[K, V], ys: HashMap[K, V]): HashMap[K, V] = { - if (xs.size <= ys.size) { - xs.iterator.foldLeft(ys) { case (my, (k, x)) => - my.get(k) match { - case Some(y) => my.updated(k, V.combine(x, y)) - case None => my.updated(k, x) + val newRootNode = if (xs.size <= ys.size) { + xs.iterator.foldLeft(ys.rootNode) { case (node, (k, x)) => + ys.get(k) match { + case Some(y) => + node.updated(k, improve(xs.hashKey.hash(k)), V.combine(x, y), replaceExisting = true, depth = 0) + case None => node.updated(k, improve(xs.hashKey.hash(k)), x, replaceExisting = true, depth = 0) } } } else { - ys.iterator.foldLeft(xs) { case (mx, (k, y)) => - mx.get(k) match { - case Some(x) => mx.updated(k, V.combine(x, y)) - case None => mx.updated(k, y) + ys.iterator.foldLeft(xs.rootNode) { case (node, (k, y)) => + xs.get(k) match { + case Some(x) => + node.updated(k, improve(ys.hashKey.hash(k)), V.combine(x, y), replaceExisting = true, depth = 0) + case None => node.updated(k, improve(ys.hashKey.hash(k)), y, replaceExisting = true, depth = 0) } } } + + if (newRootNode eq xs.rootNode) + xs + else if (newRootNode eq ys.rootNode) + ys + else + new HashMap(newRootNode) } } From d57215661495632b10f779d029024a791873fa70 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Mon, 23 May 2022 11:50:15 +0100 Subject: [PATCH 24/28] Change formatting of HashMapMonoid#combine --- core/src/main/scala/cats/data/HashMap.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 5c59c504b5..43e913501d 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -1224,7 +1224,8 @@ class HashMapMonoid[K: Hash, V](implicit V: Semigroup[V]) extends Monoid[HashMap ys.get(k) match { case Some(y) => node.updated(k, improve(xs.hashKey.hash(k)), V.combine(x, y), replaceExisting = true, depth = 0) - case None => node.updated(k, improve(xs.hashKey.hash(k)), x, replaceExisting = true, depth = 0) + case None => + node.updated(k, improve(xs.hashKey.hash(k)), x, replaceExisting = true, depth = 0) } } } else { @@ -1232,7 +1233,8 @@ class HashMapMonoid[K: Hash, V](implicit V: Semigroup[V]) extends Monoid[HashMap xs.get(k) match { case Some(x) => node.updated(k, improve(ys.hashKey.hash(k)), V.combine(x, y), replaceExisting = true, depth = 0) - case None => node.updated(k, improve(ys.hashKey.hash(k)), y, replaceExisting = true, depth = 0) + case None => + node.updated(k, improve(ys.hashKey.hash(k)), y, replaceExisting = true, depth = 0) } } } From ea31a5d182610d3c4cc4a2eb392e8c2f38293b4c Mon Sep 17 00:00:00 2001 From: David Gregory Date: Mon, 23 May 2022 11:51:35 +0100 Subject: [PATCH 25/28] Fix inconsistency in benchmark names --- bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala index c123860a86..4926ba96de 100644 --- a/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala +++ b/bench/src/main/scala-2.13+/cats/bench/HashMapBench.scala @@ -96,7 +96,7 @@ class HashMapBench { @Benchmark @OperationsPerInvocation(1000) - def scalaMapRemove(bh: Blackhole): Unit = { + def scalaMapRemoved(bh: Blackhole): Unit = { var ss = scalaMap var i = 0L while (i < 1000L) { From e7343e841a34917e74332cacc3e0e313e1387f33 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 24 May 2022 12:54:18 +0100 Subject: [PATCH 26/28] Implement a concat operation on BitMapNode to merge the CHAMP structures --- .../scala-2.12/cats/data/HashMapCompat.scala | 10 +- .../scala-2.13+/cats/data/HashMapCompat.scala | 10 +- core/src/main/scala/cats/data/HashMap.scala | 156 +++++++++++++++++- 3 files changed, 156 insertions(+), 20 deletions(-) diff --git a/core/src/main/scala-2.12/cats/data/HashMapCompat.scala b/core/src/main/scala-2.12/cats/data/HashMapCompat.scala index 9acef91791..2696230865 100644 --- a/core/src/main/scala-2.12/cats/data/HashMapCompat.scala +++ b/core/src/main/scala-2.12/cats/data/HashMapCompat.scala @@ -33,7 +33,7 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](traversable: TraversableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = traversable.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, depth = 0) } if (newRootNode eq self.rootNode) @@ -50,13 +50,9 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = { val newRootNode = if (self.size <= hm.size) { - self.iterator.foldLeft(hm.rootNode) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = false, 0) - } + hm.rootNode.concat(self.rootNode, replaceExisting = false, depth = 0) } else { - hm.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) - } + self.rootNode.concat(hm.rootNode, replaceExisting = true, depth = 0) } if (newRootNode eq self.rootNode) diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala index 854fe4beb2..ce372861d4 100644 --- a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala @@ -35,13 +35,15 @@ private[data] trait HashMapCompat[K, +V] extends IterableOnce[(K, V)] { self: Ha */ final def concat[VV >: V](iterable: IterableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = iterable match { - case hm: HashMap[K, V] @unchecked if self.size <= hm.size => - self.iterator.foldLeft(hm.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = false, 0) + case hm: HashMap[K, V] @unchecked => + if (self.size <= hm.size) { + hm.rootNode.concat(self.rootNode, replaceExisting = false, depth = 0) + } else { + self.rootNode.concat(hm.rootNode, replaceExisting = true, depth = 0) } case _ => iterable.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, depth = 0) } } diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 43e913501d..7c7a456753 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -394,7 +394,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { def get(key: K, keyHash: Int, depth: Int): Option[V] /** - * The current trie node updated to add the provided key-value pair. + * Return the current trie node updated to include the provided key-value pair. * * @param newKey the key to add. * @param newKeyHash the hash of the key to add. @@ -405,6 +405,16 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { */ def updated[VV >: V](newKey: K, newKeyHash: Int, value: VV, replaceExisting: Boolean, depth: Int): Node[K, VV] + /** + * Return the current trie node updated to include all of the key-value pairs of `that`. + * + * @param that the trie node to concatenate with this one. + * @param replaceExisting whether to replace existing values with those from `that` if a matching key already exists. + * @param depth the 0-indexed depth in the trie structure. + * @return a new [[HashMap.Node]] containing all elements of this trie node and `that`. + */ + def concat[VV >: V](that: Node[K, VV], replaceExisting: Boolean, depth: Int): Node[K, VV] + /** * The current trie node updated to remove the provided key. * @@ -565,6 +575,31 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { } } + final def concat[VV >: V](that: Node[K, VV], replaceExisting: Boolean, depth: Int): Node[K, VV] = + that match { + case that: CollisionNode[K, VV] @unchecked if this.collisionHash == that.collisionHash => + val builder = Vector.newBuilder[(K, VV)] + builder.sizeHint(this.size + that.size) + if (replaceExisting) { + builder ++= that.contents.toVector + this.contents.toVector.foreach { case kv @ (key, _) => + if (!that.contents.exists { case (k, _) => hashKey.eqv(key, k) }) + builder += kv + } + } else { + builder ++= this.contents.toVector + that.contents.toVector.foreach { case kv @ (key, _) => + if (!contents.exists { case (k, _) => hashKey.eqv(key, k) }) + builder += kv + } + } + new CollisionNode[K, VV](collisionHash, NonEmptyVector.fromVectorUnsafe(builder.result())) + case _: CollisionNode[_, _] => + throw new IllegalStateException("Attempting to merge collision nodes with mismatched collision hashes") + case _: BitMapNode[_, _] => + throw new IllegalStateException("Attempting to merge a collision node with a bitmap node") + } + final def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean = { (this eq that) || { that match { @@ -743,7 +778,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { rightHash: Int, rightValue: VV, depth: Int - ): Node[K, VV] = { + ): BitMapNode[K, VV] = { val newNode = mergeValues(left, leftHash, leftValue, right, rightHash, rightValue, depth) val valueIndex = Node.StrideLength * Node.indexFrom(keyValueMap, bitPos) val nodeIndex = contents.length - Node.StrideLength - Node.indexFrom(nodeMap, bitPos) @@ -765,7 +800,11 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { new BitMapNode[K, V](keyValueMap ^ bitPos, nodeMap | bitPos, newContents, size + 1) } - final private def replaceNode[VV >: V](index: Int, oldNode: Node[K, VV], newNode: Node[K, VV]): Node[K, VV] = { + final private def replaceNode[VV >: V]( + index: Int, + oldNode: Node[K, VV], + newNode: Node[K, VV] + ): BitMapNode[K, VV] = { val targetIndex = contents.length - 1 - index val newContents = new Array[Any](contents.length) System.arraycopy(contents, 0, newContents, 0, contents.length) @@ -773,6 +812,15 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { new BitMapNode[K, V](keyValueMap, nodeMap, newContents, size + (newNode.size - oldNode.size)) } + final private def appendNode[VV >: V](bitPos: Int, newNode: Node[K, VV]): BitMapNode[K, VV] = { + val newContents = new Array[Any](contents.length + 1) + val nodeIndex = newContents.length - 1 - Node.indexFrom(nodeMap, bitPos) + System.arraycopy(contents, 0, newContents, 0, nodeIndex) + newContents(nodeIndex) = newNode + System.arraycopy(contents, nodeIndex, newContents, nodeIndex + 1, newContents.length - 1 - nodeIndex) + new BitMapNode[K, V](keyValueMap, nodeMap | bitPos, newContents, size + newNode.size) + } + final private def updateNode[VV >: V]( bitPos: Int, newKey: K, @@ -780,7 +828,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { newValue: VV, replaceExisting: Boolean, depth: Int - ): Node[K, VV] = { + ): BitMapNode[K, VV] = { val index = Node.indexFrom(nodeMap, bitPos) val subNode = getNode(index) val newSubNode = subNode.updated(newKey, newKeyHash, newValue, replaceExisting, depth + 1) @@ -791,7 +839,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { replaceNode(index, subNode, newSubNode) } - final private def replaceValueAtIndex[VV >: V](index: Int, newValue: VV): Node[K, VV] = { + final private def replaceValueAtIndex[VV >: V](index: Int, newValue: VV): BitMapNode[K, VV] = { val valueIndex = Node.StrideLength * index + 1 val newContents = new Array[Any](contents.length) System.arraycopy(contents, 0, newContents, 0, contents.length) @@ -806,7 +854,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { newValue: VV, replaceExisting: Boolean, depth: Int - ): Node[K, VV] = { + ): BitMapNode[K, VV] = { val index = Node.indexFrom(keyValueMap, bitPos) val existingKey = getKey(index) val existingValue = getValue(index) @@ -829,7 +877,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { ) } - final private def appendKeyValue[VV >: V](bitPos: Int, newKey: K, newValue: VV): Node[K, VV] = { + final private def appendKeyValue[VV >: V](bitPos: Int, newKey: K, newValue: VV): BitMapNode[K, VV] = { val index = Node.StrideLength * Node.indexFrom(keyValueMap, bitPos) val newContents = new Array[Any](contents.length + Node.StrideLength) System.arraycopy(contents, 0, newContents, 0, index) @@ -858,13 +906,13 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { } } - final private def removeKeyValue(bitPos: Int, removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] = { + final private def removeKeyValue(bitPos: Int, removeKey: K, removeKeyHash: Int, depth: Int): BitMapNode[K, V] = { val index = Node.indexFrom(keyValueMap, bitPos) val existingKey = getKey(index) if (!hashKey.eqv(existingKey, removeKey)) { this } else if (allElementsCount == 1) { - Node.empty[K, V] + new BitMapNode[K, V](0, 0, Array.empty[Any], 0) } else { val keyIndex = Node.StrideLength * index val newContents = new Array[Any](contents.length - Node.StrideLength) @@ -958,6 +1006,96 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { } } + final def concat[VV >: V](that: Node[K, VV], replaceExisting: Boolean, depth: Int): Node[K, VV] = that match { + case that: BitMapNode[K, VV] @unchecked => + var newNode: BitMapNode[K, VV] = this + + var index = 0 + var thisNodeMap = newNode.nodeMap + var thatNodeMap = that.nodeMap + val maxNewNodeIndex = Integer.numberOfTrailingZeros(Integer.highestOneBit(thatNodeMap)) + while (index <= maxNewNodeIndex) { + val thisHasNode = (thisNodeMap & 1) == 1 + val thatHasNode = (thatNodeMap & 1) == 1 + if (thisHasNode && thatHasNode) { + // Merge `this` and `that` nodes + val bitPos = Node.bitPosFrom(index) + val thisIndex = Node.indexFrom(newNode.nodeMap, bitPos) + val thisNode = newNode.getNode(thisIndex) + val thatNode = that.getNode(Node.indexFrom(that.nodeMap, bitPos)) + val mergedNode = thisNode.concat(thatNode, replaceExisting, depth + 1) + newNode = newNode.replaceNode(thisIndex, thisNode, mergedNode) + } else if (thatHasNode) { + // Copy node from `that` + val bitPos = Node.bitPosFrom(index) + val thatNodeIndex = Node.indexFrom(that.nodeMap, bitPos) + val thatNode = that.getNode(thatNodeIndex) + newNode = newNode.appendNode(bitPos, thatNode) + } + + thisNodeMap >>= 1 + thatNodeMap >>= 1 + index += 1 + } + + index = 0 + var thisKeyValueMap = this.keyValueMap + var thatKeyValueMap = that.keyValueMap + val bothKeyValueMap = thisKeyValueMap | thatKeyValueMap + val maxKeyValueIndex = Integer.numberOfTrailingZeros(Integer.highestOneBit(bothKeyValueMap)) + while (index <= maxKeyValueIndex) { + val thisHasKeyValue = (thisKeyValueMap & 1) == 1 + val thatHasKeyValue = (thatKeyValueMap & 1) == 1 + if (thisHasKeyValue && thatHasKeyValue) { + // Merge `this` and `that` key value pair + val bitPos = Node.bitPosFrom(index) + val thatIndex = Node.indexFrom(that.keyValueMap, bitPos) + val thatKey = that.getKey(thatIndex) + val thatValue = that.getValue(thatIndex) + newNode = newNode.updateKeyValue( + bitPos, + thatKey, + improve(hashKey.hash(thatKey)), + thatValue, + replaceExisting, + depth + ) + } else if (thisHasKeyValue) { + val bitPos = Node.bitPosFrom(index) + if (newNode.hasNodeAt(bitPos)) { + // Move `this` key-value pair into `that`'s node for this hash + val valueIndex = Node.indexFrom(this.keyValueMap, bitPos) + val thisKey = this.getKey(valueIndex) + val thisKeyHash = improve(hashKey.hash(thisKey)) + val thisValue = this.getValue(valueIndex) + newNode = newNode.removeKeyValue(bitPos, thisKey, thisKeyHash, depth) + newNode = newNode.updateNode(bitPos, thisKey, thisKeyHash, thisValue, !replaceExisting, depth) + } + } else if (thatHasKeyValue) { + // Move `that` key value pair into `this` + val bitPos = Node.bitPosFrom(index) + val thatIndex = Node.indexFrom(that.keyValueMap, bitPos) + val thatKey = that.getKey(thatIndex) + val thatKeyHash = improve(hashKey.hash(thatKey)) + val thatValue = that.getValue(thatIndex) + if (newNode.hasNodeAt(bitPos)) { + newNode = newNode.updateNode(bitPos, thatKey, thatKeyHash, thatValue, replaceExisting, depth) + } else { + newNode = newNode.appendKeyValue(bitPos, thatKey, thatValue) + } + } + + thisKeyValueMap >>= 1 + thatKeyValueMap >>= 1 + index += 1 + } + + newNode + + case _: CollisionNode[_, _] => + throw new IllegalStateException("Attempting to merge a bitmap node with a collision node") + } + final override def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean = { (this eq that) || { that match { From a64bcf701b009e95fcb63dbff593f09ae49b68c9 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 21 Jun 2022 13:26:43 +0100 Subject: [PATCH 27/28] Revert "Implement a concat operation on BitMapNode to merge the CHAMP structures" This reverts commit e7343e841a34917e74332cacc3e0e313e1387f33. --- .../scala-2.12/cats/data/HashMapCompat.scala | 10 +- .../scala-2.13+/cats/data/HashMapCompat.scala | 10 +- core/src/main/scala/cats/data/HashMap.scala | 156 +----------------- 3 files changed, 20 insertions(+), 156 deletions(-) diff --git a/core/src/main/scala-2.12/cats/data/HashMapCompat.scala b/core/src/main/scala-2.12/cats/data/HashMapCompat.scala index 2696230865..9acef91791 100644 --- a/core/src/main/scala-2.12/cats/data/HashMapCompat.scala +++ b/core/src/main/scala-2.12/cats/data/HashMapCompat.scala @@ -33,7 +33,7 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](traversable: TraversableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = traversable.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, depth = 0) + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) } if (newRootNode eq self.rootNode) @@ -50,9 +50,13 @@ private[data] trait HashMapCompat[K, +V] { self: HashMap[K, V] => */ final def concat[VV >: V](hm: HashMap[K, VV]): HashMap[K, VV] = { val newRootNode = if (self.size <= hm.size) { - hm.rootNode.concat(self.rootNode, replaceExisting = false, depth = 0) + self.iterator.foldLeft(hm.rootNode) { case (node, (k, v)) => + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = false, 0) + } } else { - self.rootNode.concat(hm.rootNode, replaceExisting = true, depth = 0) + hm.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) + } } if (newRootNode eq self.rootNode) diff --git a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala index ce372861d4..854fe4beb2 100644 --- a/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala +++ b/core/src/main/scala-2.13+/cats/data/HashMapCompat.scala @@ -35,15 +35,13 @@ private[data] trait HashMapCompat[K, +V] extends IterableOnce[(K, V)] { self: Ha */ final def concat[VV >: V](iterable: IterableOnce[(K, VV)]): HashMap[K, VV] = { val newRootNode = iterable match { - case hm: HashMap[K, V] @unchecked => - if (self.size <= hm.size) { - hm.rootNode.concat(self.rootNode, replaceExisting = false, depth = 0) - } else { - self.rootNode.concat(hm.rootNode, replaceExisting = true, depth = 0) + case hm: HashMap[K, V] @unchecked if self.size <= hm.size => + self.iterator.foldLeft(hm.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = false, 0) } case _ => iterable.iterator.foldLeft(self.rootNode: HashMap.Node[K, VV]) { case (node, (k, v)) => - node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, depth = 0) + node.updated(k, improve(self.hashKey.hash(k)), v, replaceExisting = true, 0) } } diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 7c7a456753..43e913501d 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -394,7 +394,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { def get(key: K, keyHash: Int, depth: Int): Option[V] /** - * Return the current trie node updated to include the provided key-value pair. + * The current trie node updated to add the provided key-value pair. * * @param newKey the key to add. * @param newKeyHash the hash of the key to add. @@ -405,16 +405,6 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { */ def updated[VV >: V](newKey: K, newKeyHash: Int, value: VV, replaceExisting: Boolean, depth: Int): Node[K, VV] - /** - * Return the current trie node updated to include all of the key-value pairs of `that`. - * - * @param that the trie node to concatenate with this one. - * @param replaceExisting whether to replace existing values with those from `that` if a matching key already exists. - * @param depth the 0-indexed depth in the trie structure. - * @return a new [[HashMap.Node]] containing all elements of this trie node and `that`. - */ - def concat[VV >: V](that: Node[K, VV], replaceExisting: Boolean, depth: Int): Node[K, VV] - /** * The current trie node updated to remove the provided key. * @@ -575,31 +565,6 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { } } - final def concat[VV >: V](that: Node[K, VV], replaceExisting: Boolean, depth: Int): Node[K, VV] = - that match { - case that: CollisionNode[K, VV] @unchecked if this.collisionHash == that.collisionHash => - val builder = Vector.newBuilder[(K, VV)] - builder.sizeHint(this.size + that.size) - if (replaceExisting) { - builder ++= that.contents.toVector - this.contents.toVector.foreach { case kv @ (key, _) => - if (!that.contents.exists { case (k, _) => hashKey.eqv(key, k) }) - builder += kv - } - } else { - builder ++= this.contents.toVector - that.contents.toVector.foreach { case kv @ (key, _) => - if (!contents.exists { case (k, _) => hashKey.eqv(key, k) }) - builder += kv - } - } - new CollisionNode[K, VV](collisionHash, NonEmptyVector.fromVectorUnsafe(builder.result())) - case _: CollisionNode[_, _] => - throw new IllegalStateException("Attempting to merge collision nodes with mismatched collision hashes") - case _: BitMapNode[_, _] => - throw new IllegalStateException("Attempting to merge a collision node with a bitmap node") - } - final def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean = { (this eq that) || { that match { @@ -778,7 +743,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { rightHash: Int, rightValue: VV, depth: Int - ): BitMapNode[K, VV] = { + ): Node[K, VV] = { val newNode = mergeValues(left, leftHash, leftValue, right, rightHash, rightValue, depth) val valueIndex = Node.StrideLength * Node.indexFrom(keyValueMap, bitPos) val nodeIndex = contents.length - Node.StrideLength - Node.indexFrom(nodeMap, bitPos) @@ -800,11 +765,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { new BitMapNode[K, V](keyValueMap ^ bitPos, nodeMap | bitPos, newContents, size + 1) } - final private def replaceNode[VV >: V]( - index: Int, - oldNode: Node[K, VV], - newNode: Node[K, VV] - ): BitMapNode[K, VV] = { + final private def replaceNode[VV >: V](index: Int, oldNode: Node[K, VV], newNode: Node[K, VV]): Node[K, VV] = { val targetIndex = contents.length - 1 - index val newContents = new Array[Any](contents.length) System.arraycopy(contents, 0, newContents, 0, contents.length) @@ -812,15 +773,6 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { new BitMapNode[K, V](keyValueMap, nodeMap, newContents, size + (newNode.size - oldNode.size)) } - final private def appendNode[VV >: V](bitPos: Int, newNode: Node[K, VV]): BitMapNode[K, VV] = { - val newContents = new Array[Any](contents.length + 1) - val nodeIndex = newContents.length - 1 - Node.indexFrom(nodeMap, bitPos) - System.arraycopy(contents, 0, newContents, 0, nodeIndex) - newContents(nodeIndex) = newNode - System.arraycopy(contents, nodeIndex, newContents, nodeIndex + 1, newContents.length - 1 - nodeIndex) - new BitMapNode[K, V](keyValueMap, nodeMap | bitPos, newContents, size + newNode.size) - } - final private def updateNode[VV >: V]( bitPos: Int, newKey: K, @@ -828,7 +780,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { newValue: VV, replaceExisting: Boolean, depth: Int - ): BitMapNode[K, VV] = { + ): Node[K, VV] = { val index = Node.indexFrom(nodeMap, bitPos) val subNode = getNode(index) val newSubNode = subNode.updated(newKey, newKeyHash, newValue, replaceExisting, depth + 1) @@ -839,7 +791,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { replaceNode(index, subNode, newSubNode) } - final private def replaceValueAtIndex[VV >: V](index: Int, newValue: VV): BitMapNode[K, VV] = { + final private def replaceValueAtIndex[VV >: V](index: Int, newValue: VV): Node[K, VV] = { val valueIndex = Node.StrideLength * index + 1 val newContents = new Array[Any](contents.length) System.arraycopy(contents, 0, newContents, 0, contents.length) @@ -854,7 +806,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { newValue: VV, replaceExisting: Boolean, depth: Int - ): BitMapNode[K, VV] = { + ): Node[K, VV] = { val index = Node.indexFrom(keyValueMap, bitPos) val existingKey = getKey(index) val existingValue = getValue(index) @@ -877,7 +829,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { ) } - final private def appendKeyValue[VV >: V](bitPos: Int, newKey: K, newValue: VV): BitMapNode[K, VV] = { + final private def appendKeyValue[VV >: V](bitPos: Int, newKey: K, newValue: VV): Node[K, VV] = { val index = Node.StrideLength * Node.indexFrom(keyValueMap, bitPos) val newContents = new Array[Any](contents.length + Node.StrideLength) System.arraycopy(contents, 0, newContents, 0, index) @@ -906,13 +858,13 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { } } - final private def removeKeyValue(bitPos: Int, removeKey: K, removeKeyHash: Int, depth: Int): BitMapNode[K, V] = { + final private def removeKeyValue(bitPos: Int, removeKey: K, removeKeyHash: Int, depth: Int): Node[K, V] = { val index = Node.indexFrom(keyValueMap, bitPos) val existingKey = getKey(index) if (!hashKey.eqv(existingKey, removeKey)) { this } else if (allElementsCount == 1) { - new BitMapNode[K, V](0, 0, Array.empty[Any], 0) + Node.empty[K, V] } else { val keyIndex = Node.StrideLength * index val newContents = new Array[Any](contents.length - Node.StrideLength) @@ -1006,96 +958,6 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { } } - final def concat[VV >: V](that: Node[K, VV], replaceExisting: Boolean, depth: Int): Node[K, VV] = that match { - case that: BitMapNode[K, VV] @unchecked => - var newNode: BitMapNode[K, VV] = this - - var index = 0 - var thisNodeMap = newNode.nodeMap - var thatNodeMap = that.nodeMap - val maxNewNodeIndex = Integer.numberOfTrailingZeros(Integer.highestOneBit(thatNodeMap)) - while (index <= maxNewNodeIndex) { - val thisHasNode = (thisNodeMap & 1) == 1 - val thatHasNode = (thatNodeMap & 1) == 1 - if (thisHasNode && thatHasNode) { - // Merge `this` and `that` nodes - val bitPos = Node.bitPosFrom(index) - val thisIndex = Node.indexFrom(newNode.nodeMap, bitPos) - val thisNode = newNode.getNode(thisIndex) - val thatNode = that.getNode(Node.indexFrom(that.nodeMap, bitPos)) - val mergedNode = thisNode.concat(thatNode, replaceExisting, depth + 1) - newNode = newNode.replaceNode(thisIndex, thisNode, mergedNode) - } else if (thatHasNode) { - // Copy node from `that` - val bitPos = Node.bitPosFrom(index) - val thatNodeIndex = Node.indexFrom(that.nodeMap, bitPos) - val thatNode = that.getNode(thatNodeIndex) - newNode = newNode.appendNode(bitPos, thatNode) - } - - thisNodeMap >>= 1 - thatNodeMap >>= 1 - index += 1 - } - - index = 0 - var thisKeyValueMap = this.keyValueMap - var thatKeyValueMap = that.keyValueMap - val bothKeyValueMap = thisKeyValueMap | thatKeyValueMap - val maxKeyValueIndex = Integer.numberOfTrailingZeros(Integer.highestOneBit(bothKeyValueMap)) - while (index <= maxKeyValueIndex) { - val thisHasKeyValue = (thisKeyValueMap & 1) == 1 - val thatHasKeyValue = (thatKeyValueMap & 1) == 1 - if (thisHasKeyValue && thatHasKeyValue) { - // Merge `this` and `that` key value pair - val bitPos = Node.bitPosFrom(index) - val thatIndex = Node.indexFrom(that.keyValueMap, bitPos) - val thatKey = that.getKey(thatIndex) - val thatValue = that.getValue(thatIndex) - newNode = newNode.updateKeyValue( - bitPos, - thatKey, - improve(hashKey.hash(thatKey)), - thatValue, - replaceExisting, - depth - ) - } else if (thisHasKeyValue) { - val bitPos = Node.bitPosFrom(index) - if (newNode.hasNodeAt(bitPos)) { - // Move `this` key-value pair into `that`'s node for this hash - val valueIndex = Node.indexFrom(this.keyValueMap, bitPos) - val thisKey = this.getKey(valueIndex) - val thisKeyHash = improve(hashKey.hash(thisKey)) - val thisValue = this.getValue(valueIndex) - newNode = newNode.removeKeyValue(bitPos, thisKey, thisKeyHash, depth) - newNode = newNode.updateNode(bitPos, thisKey, thisKeyHash, thisValue, !replaceExisting, depth) - } - } else if (thatHasKeyValue) { - // Move `that` key value pair into `this` - val bitPos = Node.bitPosFrom(index) - val thatIndex = Node.indexFrom(that.keyValueMap, bitPos) - val thatKey = that.getKey(thatIndex) - val thatKeyHash = improve(hashKey.hash(thatKey)) - val thatValue = that.getValue(thatIndex) - if (newNode.hasNodeAt(bitPos)) { - newNode = newNode.updateNode(bitPos, thatKey, thatKeyHash, thatValue, replaceExisting, depth) - } else { - newNode = newNode.appendKeyValue(bitPos, thatKey, thatValue) - } - } - - thisKeyValueMap >>= 1 - thatKeyValueMap >>= 1 - index += 1 - } - - newNode - - case _: CollisionNode[_, _] => - throw new IllegalStateException("Attempting to merge a bitmap node with a collision node") - } - final override def ===[VV >: V](that: Node[K, VV])(implicit eqValue: Eq[VV]): Boolean = { (this eq that) || { that match { From 9cc8be65a2720d354710a66a6991133ece425587 Mon Sep 17 00:00:00 2001 From: David Gregory Date: Tue, 21 Jun 2022 14:26:53 +0100 Subject: [PATCH 28/28] Update formatting for new scalafmt version --- core/src/main/scala/cats/data/HashMap.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/cats/data/HashMap.scala b/core/src/main/scala/cats/data/HashMap.scala index 43e913501d..803ed90c82 100644 --- a/core/src/main/scala/cats/data/HashMap.scala +++ b/core/src/main/scala/cats/data/HashMap.scala @@ -972,7 +972,7 @@ object HashMap extends HashMapInstances with HashMapCompatCompanion { } i = 0 while (i < nodeCount) { - if (!(getNode(i).===[VV](node.getNode(i)))) return false + if (!getNode(i).===[VV](node.getNode(i))) return false i += 1 } true