diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java index 0bce6569d268f..3249918b44776 100644 --- a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java @@ -23,8 +23,10 @@ import org.apache.arrow.vector.holders.NullableVarCharHolder; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; @@ -52,8 +54,11 @@ public class VariableWidthVectorBenchmarks { private VarCharVector vector; + @Param({"1", "2", "10", "40"}) + private int step; + /** Setup benchmarks. */ - @Setup + @Setup(Level.Iteration) public void prepare() { allocator = new RootAllocator(ALLOCATOR_CAPACITY); vector = new VarCharVector("vector", allocator); @@ -63,7 +68,7 @@ public void prepare() { } /** Tear down benchmarks. */ - @TearDown + @TearDown(Level.Iteration) public void tearDown() { arrowBuff.close(); vector.close(); @@ -87,7 +92,7 @@ public int getValueCapacity() { @OutputTimeUnit(TimeUnit.MILLISECONDS) public int setSafeFromArray() { for (int i = 0; i < 500; ++i) { - vector.setSafe(i * 40, bytes); + vector.setSafe(i * step, bytes); } return vector.getBufferSize(); } diff --git a/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java new file mode 100644 index 0000000000000..9a04f868e0340 --- /dev/null +++ b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthViewVectorBenchmarks.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.vector; + +import java.util.concurrent.TimeUnit; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.holders.NullableViewVarCharHolder; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** Benchmarks for {@link BaseVariableWidthVector}. */ +@State(Scope.Benchmark) +public class VariableWidthViewVectorBenchmarks { + // checkstyle:off: MissingJavadocMethod + + private static final int VECTOR_CAPACITY = 16 * 1024; + + private static final int VECTOR_LENGTH = 1024; + + private static final int ALLOCATOR_CAPACITY = 1024 * 1024; + + private static byte[] bytes = VariableWidthVectorBenchmarks.class.getName().getBytes(); + private ArrowBuf arrowBuff; + + private BufferAllocator allocator; + + private ViewVarCharVector vector; + + @Param({"1", "2", "10", "40"}) + private int step; + + /** Setup benchmarks. */ + @Setup(Level.Iteration) + public void prepare() { + allocator = new RootAllocator(); + vector = new ViewVarCharVector("vector", allocator); + vector.allocateNew(VECTOR_CAPACITY, VECTOR_LENGTH); + arrowBuff = allocator.buffer(VECTOR_LENGTH); + arrowBuff.setBytes(0, bytes, 0, bytes.length); + } + + /** Tear down benchmarks. */ + @TearDown(Level.Iteration) + public void tearDown() { + arrowBuff.close(); + vector.close(); + allocator.close(); + } + + /** + * Test {@link BaseVariableWidthVector#getValueCapacity()}. + * + * @return useless. To avoid DCE by JIT. + */ + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.NANOSECONDS) + public int getValueCapacity() { + return vector.getValueCapacity(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public int setSafeFromArray() { + for (int i = 0; i < 500; ++i) { + vector.setSafe(i * step, bytes); + } + return vector.getBufferSize(); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public int setSafeFromNullableVarcharHolder() { + NullableViewVarCharHolder nvch = new NullableViewVarCharHolder(); + nvch.buffer = arrowBuff; + nvch.start = 0; + nvch.end = bytes.length; + for (int i = 0; i < 50; ++i) { + nvch.isSet = 0; + for (int j = 0; j < 9; ++j) { + int idx = 10 * i + j; + vector.setSafe(idx, nvch); + } + nvch.isSet = 1; + vector.setSafe(10 * (i + 1), nvch); + } + return vector.getBufferSize(); + } + + public static void main(String[] args) throws RunnerException { + Options opt = + new OptionsBuilder() + .include(VariableWidthViewVectorBenchmarks.class.getSimpleName()) + .forks(1) + .build(); + + new Runner(opt).run(); + } + // checkstyle:on: MissingJavadocMethod +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index 15d21827839e2..1ad2144c5499a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -1367,11 +1367,13 @@ protected ArrowBuf allocateOrGetLastDataBuffer(int length) { protected final void setBytes(int index, byte[] value, int start, int length) { int writePosition = index * ELEMENT_SIZE; - // to clear the memory segment of view being written to - // this is helpful in case of overwriting the value - viewBuffer.setZero(writePosition, ELEMENT_SIZE); - if (length <= INLINE_SIZE) { + // to clear the memory segment of view being written to + // if it has been set + if (viewBuffer.getLong(writePosition) != 0 || viewBuffer.getLong(writePosition + 8) != 0) { + viewBuffer.setZero(writePosition, ELEMENT_SIZE); + } + // allocate inline buffer // set length viewBuffer.setInt(writePosition, length); @@ -1411,11 +1413,13 @@ protected final void setBytes(int index, byte[] value, int start, int length) { protected final void setBytes(int index, ArrowBuf valueBuf, int start, int length) { int writePosition = index * ELEMENT_SIZE; - // to clear the memory segment of view being written to - // this is helpful in case of overwriting the value - viewBuffer.setZero(writePosition, ELEMENT_SIZE); - if (length <= INLINE_SIZE) { + // to clear the memory segment of view being written to + // if it has been set + if (viewBuffer.getLong(writePosition) != 0 || viewBuffer.getLong(writePosition + 8) != 0) { + viewBuffer.setZero(writePosition, ELEMENT_SIZE); + } + // allocate inline buffer // set length viewBuffer.setInt(writePosition, length);