Skip to content

Commit

Permalink
Merge pull request #27970 from vespa-engine/arnej/better-check-in-ten…
Browse files Browse the repository at this point in the history
…sor-from

more robust checking of value type
  • Loading branch information
baldersheim authored Aug 7, 2023
2 parents ce5a603 + 2da5042 commit 217271f
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ TensorFactoryBlueprint::TensorFactoryBlueprint(const vespalib::string &baseName)
: Blueprint(baseName),
_sourceType(),
_sourceParam(),
_dimension("0") // default dimension is set to the source param if not specified.
_dimension("0"), // default dimension is set to the source param if not specified.
_valueType(vespalib::eval::ValueType::error_type())
{
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <vespa/searchlib/fef/blueprint.h>
#include <vespa/vespalib/stllike/string.h>
#include <vespa/eval/eval/value_type.h>

namespace search::features {

Expand All @@ -19,6 +20,7 @@ class TensorFactoryBlueprint : public fef::Blueprint
vespalib::string _sourceType;
vespalib::string _sourceParam;
vespalib::string _dimension;
vespalib::eval::ValueType _valueType;

bool extractSource(const vespalib::string &source);
TensorFactoryBlueprint(const vespalib::string &baseName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ class TensorFromAttributeExecutor : public fef::FeatureExecutor

public:
TensorFromAttributeExecutor(const search::attribute::IAttributeVector *attribute,
const vespalib::string &dimension)
const vespalib::eval::ValueType &valueType)
: _attribute(attribute),
_type(vespalib::eval::ValueType::make_type(CellType::DOUBLE, {{dimension}})),
_type(valueType),
_attrBuffer(),
_addr_ref(),
_tensor()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,65 +41,74 @@ TensorFromLabelsBlueprint::setup(const search::fef::IIndexEnvironment &env,
// _params[0] = source ('attribute(name)' OR 'query(param)');
// _params[1] = dimension (optional);
bool validSource = extractSource(params[0].getValue());
if (! validSource) {
return fail("invalid source: '%s'", params[0].getValue().c_str());
}
if (params.size() == 2) {
_dimension = params[1].getValue();
} else {
_dimension = _sourceParam;
}
auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}});
_valueType = ValueType::from_spec(vt.to_spec());
if (_valueType.is_error()) {
return fail("invalid dimension name: '%s'", _dimension.c_str());
}
describeOutput("tensor",
"The tensor created from the given source (attribute field or query parameter)",
FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}})));
return validSource;
FeatureType::object(_valueType));
return true;
}

namespace {

FeatureExecutor &
createAttributeExecutor(const search::fef::IQueryEnvironment &env,
const vespalib::string &attrName,
const vespalib::string &dimension, vespalib::Stash &stash)
const ValueType &valueType,
vespalib::Stash &stash)
{
const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName);
if (attribute == NULL) {
Issue::report("tensor_from_labels feature: The attribute vector '%s' was not found."
" Returning empty tensor.", attrName.c_str());
return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
return ConstantTensorExecutor::createEmpty(valueType, stash);
}
if (attribute->isFloatingPointType()) {
Issue::report("tensor_from_labels feature: The attribute vector '%s' must have basic type string or integer."
" Returning empty tensor.", attrName.c_str());
return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
return ConstantTensorExecutor::createEmpty(valueType, stash);
}
if (attribute->getCollectionType() == search::attribute::CollectionType::WSET) {
Issue::report("tensor_from_labels feature: The attribute vector '%s' is a weighted set - use tensorFromWeightedSet instead."
" Returning empty tensor.", attrName.c_str());
return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
return ConstantTensorExecutor::createEmpty(valueType, stash);
}
// Note that for array attribute vectors the default weight is 1.0 for all values.
// This means we can get the attribute content as weighted content and build
// the tensor the same way as with weighted set attributes in tensorFromWeightedSet.
if (attribute->isIntegerType()) {
// Using WeightedStringContent ensures that the integer values are converted
// to strings while extracting them from the attribute.
return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension);
return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, valueType);
}
// When the underlying attribute is of type string we can reference these values
// using WeightedConstCharContent.
return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension);
return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, valueType);
}

FeatureExecutor &
createQueryExecutor(const search::fef::IQueryEnvironment &env,
const vespalib::string &queryKey,
const vespalib::string &dimension, vespalib::Stash &stash)
const ValueType &valueType,
vespalib::Stash &stash)
{
ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}});
search::fef::Property prop = env.getProperties().lookup(queryKey);
if (prop.found() && !prop.get().empty()) {
std::vector<vespalib::string> vector;
ArrayParser::parse(prop.get(), vector);
auto factory = FastValueBuilderFactory::get();
auto builder = factory.create_value_builder<double>(type, 1, 1, vector.size());
auto builder = factory.create_value_builder<double>(valueType, 1, 1, vector.size());
std::vector<vespalib::stringref> addr_ref;
for (const auto &elem : vector) {
addr_ref.clear();
Expand All @@ -109,7 +118,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env,
}
return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash);
}
return ConstantTensorExecutor::createEmpty(type, stash);
return ConstantTensorExecutor::createEmpty(valueType, stash);
}

}
Expand All @@ -118,11 +127,11 @@ FeatureExecutor &
TensorFromLabelsBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
{
if (_sourceType == ATTRIBUTE_SOURCE) {
return createAttributeExecutor(env, _sourceParam, _dimension, stash);
return createAttributeExecutor(env, _sourceParam, _valueType, stash);
} else if (_sourceType == QUERY_SOURCE) {
return createQueryExecutor(env, _sourceParam, _dimension, stash);
return createQueryExecutor(env, _sourceParam, _valueType, stash);
}
return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash);
return ConstantTensorExecutor::createEmpty(_valueType, stash);
}

} // namespace features
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,61 +54,69 @@ TensorFromWeightedSetBlueprint::setup(const search::fef::IIndexEnvironment &env,
// _params[0] = source ('attribute(name)' OR 'query(param)');
// _params[1] = dimension (optional);
bool validSource = extractSource(params[0].getValue());
if (! validSource) {
return fail("invalid source: '%s'", params[0].getValue().c_str());
}
if (params.size() == 2) {
_dimension = params[1].getValue();
} else {
_dimension = _sourceParam;
}
auto vt = ValueType::make_type(CellType::DOUBLE, {{_dimension}});
_valueType = ValueType::from_spec(vt.to_spec());
if (_valueType.is_error()) {
return fail("invalid dimension name: '%s'", _dimension.c_str());
}
describeOutput("tensor",
"The tensor created from the given weighted set source (attribute field or query parameter)",
FeatureType::object(ValueType::make_type(CellType::DOUBLE, {{_dimension}})));
return validSource;
FeatureType::object(_valueType));
return true;
}

namespace {

FeatureExecutor &
createAttributeExecutor(const search::fef::IQueryEnvironment &env,
const vespalib::string &attrName,
const vespalib::string &dimension,
const ValueType &valueType,
vespalib::Stash &stash)
{
const IAttributeVector *attribute = env.getAttributeContext().getAttribute(attrName);
if (attribute == NULL) {
Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' was not found."
" Returning empty tensor.", attrName.c_str());
return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
return ConstantTensorExecutor::createEmpty(valueType, stash);
}
if (attribute->getCollectionType() != search::attribute::CollectionType::WSET ||
attribute->isFloatingPointType())
{
Issue::report("tensor_from_weighted_set feature: The attribute vector '%s' is NOT of type weighted set of string or integer."
" Returning empty tensor.", attrName.c_str());
return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{dimension}}), stash);
return ConstantTensorExecutor::createEmpty(valueType, stash);
}
if (attribute->isIntegerType()) {
// Using WeightedStringContent ensures that the integer values are converted
// to strings while extracting them from the attribute.
return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, dimension);
return stash.create<TensorFromAttributeExecutor<WeightedStringContent>>(attribute, valueType);
}
// When the underlying attribute is of type string we can reference these values
// using WeightedConstCharContent.
return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, dimension);
return stash.create<TensorFromAttributeExecutor<WeightedConstCharContent>>(attribute, valueType);
}

FeatureExecutor &
createQueryExecutor(const search::fef::IQueryEnvironment &env,
const vespalib::string &queryKey,
const vespalib::string &dimension, vespalib::Stash &stash)
const ValueType &valueType,
vespalib::Stash &stash)
{
ValueType type = ValueType::make_type(CellType::DOUBLE, {{dimension}});
search::fef::Property prop = env.getProperties().lookup(queryKey);
if (prop.found() && !prop.get().empty()) {
WeightedStringVector vector;
WeightedSetParser::parse(prop.get(), vector);
auto factory = FastValueBuilderFactory::get();
size_t sz = vector._data.size();
auto builder = factory.create_value_builder<double>(type, 1, 1, sz);
auto builder = factory.create_value_builder<double>(valueType, 1, 1, sz);
std::vector<vespalib::stringref> addr_ref;
for (const auto &elem : vector._data) {
addr_ref.clear();
Expand All @@ -118,7 +126,7 @@ createQueryExecutor(const search::fef::IQueryEnvironment &env,
}
return ConstantTensorExecutor::create(builder->build(std::move(builder)), stash);
}
return ConstantTensorExecutor::createEmpty(type, stash);
return ConstantTensorExecutor::createEmpty(valueType, stash);
}

}
Expand All @@ -127,11 +135,11 @@ FeatureExecutor &
TensorFromWeightedSetBlueprint::createExecutor(const search::fef::IQueryEnvironment &env, vespalib::Stash &stash) const
{
if (_sourceType == ATTRIBUTE_SOURCE) {
return createAttributeExecutor(env, _sourceParam, _dimension, stash);
return createAttributeExecutor(env, _sourceParam, _valueType, stash);
} else if (_sourceType == QUERY_SOURCE) {
return createQueryExecutor(env, _sourceParam, _dimension, stash);
return createQueryExecutor(env, _sourceParam, _valueType, stash);
}
return ConstantTensorExecutor::createEmpty(ValueType::make_type(CellType::DOUBLE, {{_dimension}}), stash);
return ConstantTensorExecutor::createEmpty(_valueType, stash);
}

} // namespace features
Expand Down

0 comments on commit 217271f

Please sign in to comment.