Skip to content

Commit

Permalink
Fix comment
Browse files Browse the repository at this point in the history
  • Loading branch information
PHILO-HE committed Nov 29, 2024
1 parent 3420bf9 commit d5d2655
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 41 deletions.
6 changes: 3 additions & 3 deletions velox/docs/functions/spark/string.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ String Functions
Returns the concatenation result for ``string`` and all elements in ``array<string>``, separated
by ``separator``. The type of ``separator`` is VARCHAR. It can take variable number of remaining
arguments, and it allows mixed use of ``string`` and ``array<string>``. Skips NULL argument or
array element during the concatenation. If ``separator`` is NULL, returns NULL, regardless of the
following inputs. For non-NULL ``separator``, if no remaining input or all remaining inputs are
NULL, returns an empty string. ::
NULL array element during the concatenation. If ``separator`` is NULL, returns NULL, regardless
of the following inputs. For non-NULL ``separator``, if no remaining input or all remaining inputs
are NULL, returns an empty string. ::

SELECT concat_ws('~', 'a', 'b', 'c'); -- 'a~b~c'
SELECT concat_ws('~', ['a', 'b', 'c'], ['d']); -- 'a~b~c~d'
Expand Down
68 changes: 30 additions & 38 deletions velox/functions/sparksql/ConcatWs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,10 @@ class ConcatWs : public exec::VectorFunction {
const SelectivityVector& rows,
exec::EvalCtx& context,
std::vector<exec::LocalDecodedVector>& decodedArrays,
const std::vector<DecodedVector>& decodedElements,
const std::vector<std::optional<std::string>>& constantStrings,
const std::vector<exec::LocalDecodedVector>& decodedStringArgs,
const exec::LocalDecodedVector& decodedSeparator) const {
const auto arrayArgNum = decodedArrays.size();
std::vector<const ArrayVector*> arrayVectors;
arrayVectors.reserve(arrayArgNum);
std::vector<DecodedVector> elementsDecodedVectors;
elementsDecodedVectors.reserve(arrayArgNum);
for (auto i = 0; i < arrayArgNum; ++i) {
auto arrayVector = decodedArrays[i].get()->base()->as<ArrayVector>();
arrayVectors.push_back(arrayVector);
SelectivityVector nestedRows(arrayVector->elements()->size());
elementsDecodedVectors.emplace_back(*arrayVector->elements(), nestedRows);
}

size_t totalResultBytes = 0;
rows.applyToSelected([&](auto row) {
// NULL separator produces NULL result.
Expand All @@ -57,21 +46,21 @@ class ConcatWs : public exec::VectorFunction {
}
int32_t allElements = 0;
// Calculate size for array columns data.
for (auto i = 0; i < arrayArgNum; i++) {
for (auto i = 0; i < decodedArrays.size(); i++) {
if (decodedArrays[i]->isNullAt(row)) {
continue;
}
auto indices = decodedArrays[i].get()->indices();
auto size = arrayVectors[i]->sizeAt(indices[row]);
auto offset = arrayVectors[i]->offsetAt(indices[row]);
auto arrayVector = decodedArrays[i].get()->base()->as<ArrayVector>();
auto size = arrayVector->sizeAt(indices[row]);
auto offset = arrayVector->offsetAt(indices[row]);

for (auto j = 0; j < size; ++j) {
if (!elementsDecodedVectors[i].isNullAt(offset + j)) {
auto element =
elementsDecodedVectors[i].valueAt<StringView>(offset + j);
if (!decodedElements[i].isNullAt(offset + j)) {
// No matter empty string or not.
++allElements;
totalResultBytes += element.size();
totalResultBytes +=
decodedElements[i].valueAt<StringView>(offset + j).size();
}
}
}
Expand Down Expand Up @@ -116,12 +105,17 @@ class ConcatWs : public exec::VectorFunction {
const std::vector<VectorPtr>& args,
const exec::EvalCtx& context,
std::vector<exec::LocalDecodedVector>& decodedArrays,
std::vector<DecodedVector>& decodedElements,
std::vector<column_index_t>& argMapping,
std::vector<std::optional<std::string>>& constantStrings,
std::vector<exec::LocalDecodedVector>& decodedStringArgs) const {
for (auto i = 1; i < args.size(); ++i) {
if (args[i] && args[i]->typeKind() == TypeKind::ARRAY) {
decodedArrays.emplace_back(context, *args[i], rows);
auto arrayVector =
decodedArrays.back().get()->base()->as<ArrayVector>();
SelectivityVector nestedRows(arrayVector->elements()->size());
decodedElements.emplace_back(*arrayVector->elements(), nestedRows);
continue;
}
argMapping.push_back(i);
Expand Down Expand Up @@ -185,11 +179,14 @@ class ConcatWs : public exec::VectorFunction {
std::vector<exec::LocalDecodedVector> decodedStringArgs;
decodedStringArgs.reserve(numArgs);

std::vector<DecodedVector> decodedElements;
decodedElements.reserve(numArgs - 1);
initVectors(
rows,
args,
context,
decodedArrays,
decodedElements,
argMapping,
constantStrings,
decodedStringArgs);
Expand All @@ -202,18 +199,11 @@ class ConcatWs : public exec::VectorFunction {
rows,
context,
decodedArrays,
decodedElements,
constantStrings,
decodedStringArgs,
decodedSeparator);

std::vector<const ArrayVector*> arrayVectors;
std::vector<DecodedVector> elementsDecodedVectors;
for (auto& decodedArray : decodedArrays) {
auto arrayVector = decodedArray.get()->base()->as<ArrayVector>();
arrayVectors.push_back(arrayVector);
SelectivityVector nestedRows(arrayVector->elements()->size());
elementsDecodedVectors.emplace_back(*arrayVector->elements(), nestedRows);
}
// Allocate a string buffer.
auto rawBuffer =
flatResult.getRawStringBufferWithSpace(totalResultBytes, true);
Expand All @@ -223,7 +213,7 @@ class ConcatWs : public exec::VectorFunction {
result->setNull(row, true);
return;
}
const char* start = rawBuffer;
uint32_t bufferOffset = 0;
auto isFirst = true;
// For array arg.
int32_t i = 0;
Expand All @@ -240,11 +230,11 @@ class ConcatWs : public exec::VectorFunction {
isFirst = false;
} else {
// Add separator before the current value.
memcpy(rawBuffer, separator.data(), separator.size());
rawBuffer += separator.size();
memcpy(rawBuffer + bufferOffset, separator.data(), separator.size());
bufferOffset += separator.size();
}
memcpy(rawBuffer, value, valueSize);
rawBuffer += valueSize;
memcpy(rawBuffer + bufferOffset, value, valueSize);
bufferOffset += valueSize;
};

for (auto itArgs = args.begin() + 1; itArgs != args.end(); ++itArgs) {
Expand All @@ -254,13 +244,13 @@ class ConcatWs : public exec::VectorFunction {
continue;
}
auto indices = decodedArrays[i].get()->indices();
auto size = arrayVectors[i]->sizeAt(indices[row]);
auto offset = arrayVectors[i]->offsetAt(indices[row]);
auto arrayVector = decodedArrays[i].get()->base()->as<ArrayVector>();
auto size = arrayVector->sizeAt(indices[row]);
auto offset = arrayVector->offsetAt(indices[row]);

for (int k = 0; k < size; ++k) {
if (!elementsDecodedVectors[i].isNullAt(offset + k)) {
auto element =
elementsDecodedVectors[i].valueAt<StringView>(offset + k);
if (!decodedElements[i].isNullAt(offset + k)) {
auto element = decodedElements[i].valueAt<StringView>(offset + k);
copyToBuffer(element.data(), element.size());
}
}
Expand Down Expand Up @@ -289,7 +279,9 @@ class ConcatWs : public exec::VectorFunction {
}
++j;
}
flatResult.setNoCopy(row, StringView(start, rawBuffer - start));
VELOX_USER_CHECK_LE(bufferOffset, INT32_MAX);
flatResult.setNoCopy(row, StringView(rawBuffer, bufferOffset));
rawBuffer += bufferOffset;
});
}

Expand Down

0 comments on commit d5d2655

Please sign in to comment.