Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EISW-117113][NPU] Apply the common model passes found in the compiler inside the plugin instead #28925

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions src/core/src/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,19 +387,15 @@ const ov::op::AutoBroadcastSpec& ov::Node::get_autob() const {

namespace ov {
ostream& operator<<(ostream& out, const Node& node) {
return node.write_description(out, 1);
return node.write_description(out, 0);
}
ostream& operator<<(ostream& out, const Node* node) {
return node->write_description(out, 1);
return node->write_description(out, 0);
}
} // namespace ov

std::ostream& ov::Node::write_description(std::ostream& out, uint32_t depth) const {
auto version = get_type_info().version_id;
if (version)
out << version << "::" << get_type_info().name << " " << get_friendly_name();
else
out << get_type_info().name << " " << get_friendly_name();
out << get_type_info().name << " " << get_friendly_name();

if (depth > 0) {
out << " (";
Expand Down
4 changes: 1 addition & 3 deletions src/core/src/pass/graph_rewrite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,14 +282,12 @@ void ov::pass::MatcherPass::register_matcher(const std::shared_ptr<ov::pass::pat
set_property(property, true);
m_matcher = m;
m_handler = [m, callback](const std::shared_ptr<Node>& node) -> bool {
OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " trying to match ", node);
if (m->match(node->output(0))) {
OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " matched ", node);
std::cout << m->get_name() << " " << node << std::endl;
OV_PASS_CALLBACK(m);

try {
const bool status = callback(*m.get());
OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " callback ", (status ? "succeded" : "failed"));
// explicitly clear Matcher state because it holds pointers to matched nodes
m->clear_state();
return status;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ class ICompilerAdapter {
virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
virtual uint32_t get_version() const = 0;

/**
* @brief Applies the common OV passes previously found inside the compiler.
*
* @param model The model on which the passes will be applied.
* @return A clone of the original model on which the passes have been applied.
*/
std::shared_ptr<ov::Model> apply_common_passes(const std::shared_ptr<const ov::Model>& model) const;

virtual ~ICompilerAdapter() = default;
};

Expand Down
188 changes: 188 additions & 0 deletions src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "intel_npu/common/icompiler_adapter.hpp"

#include "openvino/pass/constant_folding.hpp"
#include "openvino/pass/manager.hpp"
#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
#include "transformations/common_optimizations/batch_to_space_fusion.hpp"
#include "transformations/common_optimizations/conv_mul_fusion.hpp"
#include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
#include "transformations/common_optimizations/depth_to_space_fusion.hpp"
#include "transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp"
#include "transformations/common_optimizations/fq_mul_fusion.hpp"
#include "transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp"
#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp"
#include "transformations/common_optimizations/moc_transformations.hpp"
#include "transformations/common_optimizations/mul_conv_fusion.hpp"
#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
#include "transformations/common_optimizations/mvn_fusion.hpp"
#include "transformations/common_optimizations/pad_fusion.hpp"
#include "transformations/common_optimizations/pull_through_reduce.hpp"
#include "transformations/common_optimizations/reduce_reshape_fusion.hpp"
#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp"
#include "transformations/common_optimizations/rms_fusion.hpp"
#include "transformations/common_optimizations/shuffle_channels_fusion.hpp"
#include "transformations/common_optimizations/space_to_batch_fusion.hpp"
#include "transformations/common_optimizations/strides_optimization.hpp"
#include "transformations/common_optimizations/transpose_to_reshape.hpp"
#include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
#include "transformations/control_flow/unroll_if.hpp"
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp"
#include "transformations/init_node_info.hpp"
#include "transformations/low_precision/mark_dequantization_subgraph.hpp"
#include "transformations/op_conversions/batch_norm_decomposition.hpp"
#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp"
#include "transformations/op_conversions/convert_avgpool_downgrade.hpp"
#include "transformations/op_conversions/convert_broadcast_to_tiles.hpp"
#include "transformations/op_conversions/convert_convertlike.hpp"
#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp"
#include "transformations/op_conversions/convert_gather_upgrade.hpp"
#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp"
#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp"
#include "transformations/op_conversions/convert_maxpool_downgrade.hpp"
#include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp"
#include "transformations/op_conversions/convert_pad12_downgrade.hpp"
#include "transformations/op_conversions/convert_pad_to_group_conv.hpp"
#include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp"
#include "transformations/op_conversions/convert_reduce_to_pooling.hpp"
#include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp"
#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp"
#include "transformations/op_conversions/convert_shapeof3.hpp"
#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp"
#include "transformations/op_conversions/convert_softmax_upgrade.hpp"
#include "transformations/op_conversions/convert_topk11_downgrade.hpp"
#include "transformations/op_conversions/detection_output_downgrade.hpp"
#include "transformations/op_conversions/einsum_decomposition.hpp"
#include "transformations/op_conversions/gelu7_downgrade.hpp"
#include "transformations/op_conversions/group_normalization_decomposition.hpp"
#include "transformations/op_conversions/log_softmax_decomposition.hpp"
#include "transformations/op_conversions/normalize_l2_decomposition.hpp"
#include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp"
#include "transformations/op_conversions/softmax_decomposition.hpp"
#include "transformations/rt_info/fused_names_attribute.hpp"
#include "transformations/utils/utils.hpp"

namespace intel_npu {

std::shared_ptr<ov::Model> ICompilerAdapter::apply_common_passes(const std::shared_ptr<const ov::Model>& model) const {
const std::shared_ptr<ov::Model> clonedModel = model->clone();

ov::pass::Manager manager;
manager.register_pass<ov::pass::InitNodeInfo>();
ov::element::TypeVector decompression_precisions{
ov::element::u4,
ov::element::i4,
ov::element::nf4,
ov::element::u8,
ov::element::i8,
};
manager.register_pass<ov::pass::MarkDequantization>(decompression_precisions, /*fold_subtract_const=*/true);
manager.register_pass<ov::pass::KeepConstsPrecision>(decompression_precisions, /*fold_subtract_const=*/true);
manager.register_pass<ov::pass::ConvertQuantizeDequantize>();
manager.register_pass<ov::pass::ConstantFolding>();
manager.register_pass<ov::pass::ConvertScatterElementsUpdate12ToScatterElementsUpdate3>();
manager.register_pass<ov::pass::ConvertInterpolate1ToInterpolate4>();
manager.register_pass<ov::pass::ConvertInterpolate11ToInterpolate4>();
manager.register_pass<ov::pass::ConvertTopK11ToTopK3>();
manager.register_pass<ov::pass::ConvertPad12ToPad1>();
manager.register_pass<ov::pass::ConstantFolding>();

// MOCTransformations contain StridedSliceOptimization transformation,
// so we must call SliceToStridedSlice before MOCTransformations call
manager.register_pass<ov::pass::SliceToStridedSlice>(true);
// Disable low_precision_enabled as all plugins handle low-precision sub-graph manually
// before CommonOptimization pipeline execution
manager.register_pass<ov::pass::MOCTransformations>(true, false);

auto pass_config = manager.get_pass_config();
pass_config->disable<ov::pass::PadFusionConvolution>();
pass_config->disable<ov::pass::PadFusionGroupConvolution>();
pass_config->disable<ov::pass::MVNFusionWithConstantsInside>();
pass_config->disable<ov::pass::PullThroughReduce>();
pass_config->disable<ov::pass::AddFakeQuantizeFusion>();
pass_config->disable<ov::pass::FakeQuantizeMulFusion>();
pass_config->disable<ov::pass::MulFakeQuantizeFusion>();

// NMS conversion passes
manager.register_pass<ov::pass::ConvertNMS1ToNMS9>();
manager.register_pass<ov::pass::ConvertNMS3ToNMS9>();
manager.register_pass<ov::pass::ConvertNMS4ToNMS9>();
manager.register_pass<ov::pass::ConvertNMS5ToNMS9>();

auto static_shape = manager.register_pass<ov::pass::GraphRewrite>();
static_shape->add_matcher<ov::pass::ConvertNMS9ToNMSIEInternal>();
static_shape->set_name("ov::pass::CommonStaticShape");

auto common_fusions = manager.register_pass<ov::pass::GraphRewrite>();
common_fusions->add_matcher<ov::pass::DepthToSpaceFusion>();
common_fusions->add_matcher<ov::pass::ShuffleChannelsFusion>(false);
common_fusions->add_matcher<ov::pass::SpaceToBatchFusion>();
common_fusions->add_matcher<ov::pass::BatchToSpaceFusion>();
common_fusions->add_matcher<ov::pass::TransposeToReshape>();
common_fusions->add_matcher<ov::pass::RMSFusion>();
common_fusions->set_name("ov::pass::CommonFusions");

auto decomp = manager.register_pass<ov::pass::GraphRewrite>();
decomp->add_matcher<ov::pass::Gelu7Downgrade>();
decomp->add_matcher<ov::pass::BidirectionalGRUSequenceDecomposition>();
decomp->add_matcher<ov::pass::BidirectionalRNNSequenceDecomposition>();
decomp->add_matcher<ov::pass::ConvertBroadcastToTiles>();
decomp->add_matcher<ov::pass::ConvertConvertLike>();
decomp->add_matcher<ov::pass::BatchNormDecomposition>();
decomp->add_matcher<ov::pass::EinsumDecomposition>();
decomp->add_matcher<ov::pass::DropoutWithRandomUniformReplacer>();
decomp->add_matcher<ov::pass::ScaledDotProductAttentionDecomposition>();
decomp->add_matcher<ov::pass::GroupNormalizationDecomposition>();
decomp->set_name("ov::pass::CommonDecompositions");

// CF is required after all decompositions
manager.register_pass<ov::pass::ConstantFolding>();

// LinOpSequenceFusion must be executed after all decompositions
manager.register_pass<ov::pass::LinOpSequenceFusion>();
manager.register_pass<ov::pass::UnrollIf>();

auto conv_fusions = manager.register_pass<ov::pass::GraphRewrite>();
conv_fusions->add_matcher<ov::pass::ConvolutionMultiplyFusion>();
conv_fusions->add_matcher<ov::pass::GroupConvolutionMultiplyFusion>();
conv_fusions->add_matcher<ov::pass::ConvolutionBackpropDataMultiplyFusion>();
conv_fusions->add_matcher<ov::pass::GroupConvolutionBackpropDataMultiplyFusion>();
conv_fusions->add_matcher<ov::pass::MultiplyConvolutionFusion>();
conv_fusions->add_matcher<ov::pass::MultiplyGroupConvolutionFusion>();
conv_fusions->add_matcher<ov::pass::MultiplyConvolutionBackpropDataFusion>();
conv_fusions->add_matcher<ov::pass::MultiplyGroupConvolutionBackpropDataFusion>();
conv_fusions->set_name("ov::pass::ConvFusions");

manager.register_pass<ov::pass::ConstantFolding>();
manager.register_pass<ov::pass::ConvertGather1ToGather7>();
manager.register_pass<ov::pass::ConvertGather7ToGather8>();
manager.register_pass<ov::pass::ConvertDeformableConv8To1>();
manager.register_pass<ov::pass::ConvertMaxPool14ToMaxPool8>();
manager.register_pass<ov::pass::ConvertMaxPool8ToMaxPool1>();
manager.register_pass<ov::pass::ConvertAvgPool14ToAvgPool1>();
manager.register_pass<ov::pass::ConvertSoftMax1ToSoftMax8>();
manager.register_pass<ov::pass::ConvertDetectionOutput8ToDetectionOutput1>();
manager.register_pass<ov::pass::ConvertShapeOf3>();

// StridesOptimization should be at the very end
// because we cannot insert any MaxPools since they may prevent
// other optimizations
manager.register_pass<ov::pass::StridesOptimization>();
manager.register_pass<ov::pass::ConvertSoftMax1ToSoftMax8>();

std::cout << "Starting NPU plugin passes" << std::endl;
std::cout << "Columns: Pass NodeType NodeFriendlyName" << std::endl;
manager.run_passes(clonedModel);
std::cout << "Ending NPU plugin passes" << std::endl;

// Notifies the compiler to skip applying the passes on its end
clonedModel->set_rt_info(true, "common_passes_applied");

return clonedModel;
}

} // namespace intel_npu
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter {
std::string serializeIOInfo(const std::shared_ptr<const ov::Model>& model, const bool useIndices) const;

SerializedIR serializeIR(const std::shared_ptr<const ov::Model>& model,
ze_graph_compiler_version_info_t compilerVersion,
const uint32_t supportedOpsetVersion) const;
ze_graph_compiler_version_info_t compilerVersion) const;

std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t compilerVersion) const;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,11 @@
#include "intel_npu/utils/logger/logger.hpp"
#include "openvino/pass/manager.hpp"

/**
* @brief Contain all required transformation on OpenVINO model in case for external compiler usage and
* providing forward compatibility (OV model with opset N+M, external compiler with opset N)
*/
namespace intel_npu::driver_compiler_utils {

class IRSerializer {
public:
IRSerializer(const std::shared_ptr<const ov::Model>& origModel, const uint32_t supportedOpset = 11);
IRSerializer(const std::shared_ptr<const ov::Model>& origModel);

size_t getXmlSize() const {
return _xmlSize;
Expand Down Expand Up @@ -47,7 +43,6 @@ class IRSerializer {

Logger _logger;
std::shared_ptr<ov::Model> _model = nullptr;
uint32_t _supportedOpset = 11;
size_t _xmlSize = 0;
size_t _weightsSize = 0;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,30 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
const auto maxOpsetVersion = _compilerProperties.maxOVOpsetVersionSupported;
_logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);

std::cout << compilerVersion.major << " " << compilerVersion.minor << std::endl;

std::shared_ptr<const ov::Model> modelAfterPasses = model;
if ((compilerVersion.major > 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2)) {
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
modelAfterPasses = apply_common_passes(model);
std::cout
<< "Running common passes "
<< std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - begin).count()
<< "[microseconds]" << std::endl;

_logger.debug("Common OV passes have been applied inside the plugin");
} else {
_logger.debug("No common OV passes have been applied inside the plugin");
}

_logger.debug("serialize IR");
auto serializedIR = serializeIR(model, compilerVersion, maxOpsetVersion);
auto serializedIR = serializeIR(modelAfterPasses, compilerVersion);

std::string buildFlags;
const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));

_logger.debug("build flags");
buildFlags += serializeIOInfo(model, useIndices);
buildFlags += serializeIOInfo(modelAfterPasses, useIndices);
buildFlags += " ";
buildFlags += serializeConfig(config, compilerVersion);

Expand All @@ -190,7 +206,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con

OV_ITT_TASK_NEXT(COMPILE_BLOB, "getNetworkMeta");
auto networkMeta = _zeGraphExt->getNetworkMeta(graphHandle);
networkMeta.name = model->get_friendly_name();
networkMeta.name = modelAfterPasses->get_friendly_name();

return std::make_shared<DriverGraph>(_zeGraphExt,
_zeroInitStruct,
Expand Down Expand Up @@ -228,8 +244,17 @@ ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov:
const auto maxOpsetVersion = _compilerProperties.maxOVOpsetVersionSupported;
_logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);

std::shared_ptr<const ov::Model> modelAfterPasses = model;
if ((compilerVersion.major > 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2)) {
modelAfterPasses = apply_common_passes(model);

_logger.debug("Common OV passes have been applied inside the plugin");
} else {
_logger.debug("No common OV passes have been applied inside the plugin");
}

_logger.debug("serialize IR");
auto serializedIR = serializeIR(model, compilerVersion, maxOpsetVersion);
auto serializedIR = serializeIR(modelAfterPasses, compilerVersion);

std::string buildFlags;
buildFlags += serializeConfig(config, compilerVersion);
Expand Down Expand Up @@ -261,9 +286,8 @@ uint32_t DriverCompilerAdapter::get_version() const {
* @details Format of the memory:
*/
SerializedIR DriverCompilerAdapter::serializeIR(const std::shared_ptr<const ov::Model>& model,
ze_graph_compiler_version_info_t compilerVersion,
const uint32_t supportedOpsetVersion) const {
driver_compiler_utils::IRSerializer irSerializer(model, supportedOpsetVersion);
ze_graph_compiler_version_info_t compilerVersion) const {
driver_compiler_utils::IRSerializer irSerializer(model);

// Contract between adapter and compiler in driver
const uint32_t maxNumberOfElements = 10;
Expand Down
Loading
Loading