From 550d712c06ad3243da5b1ca8b895eb08a41c8d2d Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Mon, 10 Feb 2025 21:39:36 +0000 Subject: [PATCH 1/6] Adding the passes inside the plugin --- .../include/driver_compiler_adapter.hpp | 3 +- .../include/ir_serializer.hpp | 11 +- .../src/driver_compiler_adapter.cpp | 212 +++++++++++++++++- .../compiler_adapter/src/ir_serializer.cpp | 18 +- 4 files changed, 231 insertions(+), 13 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index daf401100e6239..4300095c23af61 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -48,7 +48,8 @@ class DriverCompilerAdapter final : public ICompilerAdapter { SerializedIR serializeIR(const std::shared_ptr& model, ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpsetVersion) const; + const uint32_t supportedOpsetVersio, + const bool applyCommonPasses) const; std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t compilerVersion) const; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp index fbb098a1238228..a23ee9d3ebe862 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp @@ -19,7 +19,9 @@ namespace intel_npu::driver_compiler_utils { class IRSerializer { public: - IRSerializer(const std::shared_ptr& origModel, const uint32_t supportedOpset = 11); + IRSerializer(const std::shared_ptr& origModel, + const uint32_t supportedOpset = 11, + const bool commonPassesApplied = false); size_t getXmlSize() const { return _xmlSize; @@ -50,6 +52,13 @@ class IRSerializer { uint32_t _supportedOpset = 11; size_t _xmlSize = 0; size_t _weightsSize = 0; + + /** + * @brief Indicates whether or not the OV common passes have already been applied on the model. + * @details This attribute will be stored inside model's runtime information in order to be sent to the compiler. + * The compiler will thus know if applying the same passes on its end is required. + */ + bool _commonPassesApplied; }; } // namespace intel_npu::driver_compiler_utils diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 624ba448fed44f..b48e61cfc17240 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -19,6 +19,67 @@ #include "intel_npu/utils/zero/zero_utils.hpp" #include "ir_serializer.hpp" #include "openvino/core/model.hpp" +#include "openvino/pass/constant_folding.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/batch_to_space_fusion.hpp" +#include "transformations/common_optimizations/conv_mul_fusion.hpp" +#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" +#include "transformations/common_optimizations/depth_to_space_fusion.hpp" +#include "transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp" +#include "transformations/common_optimizations/fq_mul_fusion.hpp" +#include "transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp" +#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" +#include "transformations/common_optimizations/moc_transformations.hpp" +#include "transformations/common_optimizations/mul_conv_fusion.hpp" +#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/mvn_fusion.hpp" +#include "transformations/common_optimizations/pad_fusion.hpp" +#include "transformations/common_optimizations/pull_through_reduce.hpp" +#include "transformations/common_optimizations/reduce_reshape_fusion.hpp" +#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/rms_fusion.hpp" +#include "transformations/common_optimizations/shuffle_channels_fusion.hpp" +#include "transformations/common_optimizations/space_to_batch_fusion.hpp" +#include "transformations/common_optimizations/strides_optimization.hpp" +#include "transformations/common_optimizations/transpose_to_reshape.hpp" +#include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp" +#include "transformations/control_flow/unroll_if.hpp" +#include "transformations/control_flow/unroll_tensor_iterator.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" +#include "transformations/init_node_info.hpp" +#include "transformations/low_precision/mark_dequantization_subgraph.hpp" +#include "transformations/op_conversions/batch_norm_decomposition.hpp" +#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_avgpool_downgrade.hpp" +#include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" +#include "transformations/op_conversions/convert_convertlike.hpp" +#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" +#include "transformations/op_conversions/convert_gather_upgrade.hpp" +#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp" +#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" +#include "transformations/op_conversions/convert_maxpool_downgrade.hpp" +#include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp" +#include "transformations/op_conversions/convert_pad12_downgrade.hpp" +#include "transformations/op_conversions/convert_pad_to_group_conv.hpp" +#include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp" +#include "transformations/op_conversions/convert_reduce_to_pooling.hpp" +#include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp" +#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" +#include "transformations/op_conversions/convert_shapeof3.hpp" +#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" +#include "transformations/op_conversions/convert_softmax_upgrade.hpp" +#include "transformations/op_conversions/convert_topk11_downgrade.hpp" +#include "transformations/op_conversions/detection_output_downgrade.hpp" +#include "transformations/op_conversions/einsum_decomposition.hpp" +#include "transformations/op_conversions/gelu7_downgrade.hpp" +#include "transformations/op_conversions/group_normalization_decomposition.hpp" +#include "transformations/op_conversions/log_softmax_decomposition.hpp" +#include "transformations/op_conversions/normalize_l2_decomposition.hpp" +#include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp" +#include "transformations/op_conversions/softmax_decomposition.hpp" +#include "transformations/rt_info/fused_names_attribute.hpp" +#include "transformations/utils/utils.hpp" namespace { @@ -41,6 +102,122 @@ const std::vector NC_TO_CN_LAYOUT_DIMENSIONS_ORDER = {1, 0}; const std::vector NCHW_TO_NHWC_LAYOUT_DIMENSIONS_ORDER = {0, 2, 3, 1}; const std::vector NCDHW_TO_NDHWC_LAYOUT_DIMENSIONS_ORDER = {0, 2, 3, 4, 1}; +/** + * @brief Applies the common OV passes previously found in the compiler. + * + * @param model The target model. + * @return A clone of the original model on which the passes have been applied. + */ +std::shared_ptr applyCommonPasses(const std::shared_ptr& model) { + const std::shared_ptr clonedModel = model->clone(); + + ov::pass::Manager manager; + manager.register_pass(); + ov::element::TypeVector decompression_precisions{ + ov::element::u4, + ov::element::i4, + ov::element::nf4, + ov::element::u8, + ov::element::i8, + }; + manager.register_pass(decompression_precisions, /*fold_subtract_const=*/true); + manager.register_pass(decompression_precisions, /*fold_subtract_const=*/true); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + // MOCTransformations contain StridedSliceOptimization transformation, + // so we must call SliceToStridedSlice before MOCTransformations call + manager.register_pass(true); + // Disable low_precision_enabled as all plugins handle low-precision sub-graph manually + // before CommonOptimization pipeline execution + manager.register_pass(true, false); + + auto pass_config = manager.get_pass_config(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + + // NMS conversion passes + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + auto static_shape = manager.register_pass(); + static_shape->add_matcher(); + static_shape->set_name("ov::pass::CommonStaticShape"); + + auto common_fusions = manager.register_pass(); + common_fusions->add_matcher(); + common_fusions->add_matcher(false); + common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->set_name("ov::pass::CommonFusions"); + + auto decomp = manager.register_pass(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->set_name("ov::pass::CommonDecompositions"); + + // CF is required after all decompositions + manager.register_pass(); + + // LinOpSequenceFusion must be executed after all decompositions + manager.register_pass(); + manager.register_pass(); + + auto conv_fusions = manager.register_pass(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->set_name("ov::pass::ConvFusions"); + + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + // StridesOptimization should be at the very end + // because we cannot insert any MaxPools since they may prevent + // other optimizations + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(clonedModel); + return clonedModel; +} + /** * @brief A standard copy function concerning memory segments. Additional checks on the given arguments are performed * before copying. @@ -164,14 +341,25 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr modelAfterPasses = model; + const bool applyCommonPassesFlag = + (compilerVersion.major > 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2); + if (applyCommonPassesFlag) { + modelAfterPasses = applyCommonPasses(model); + + _logger.debug("Common OV passes have been applied inside the plugin"); + } else { + _logger.debug("No common OV passes have been applied inside the plugin"); + } + _logger.debug("serialize IR"); - auto serializedIR = serializeIR(model, compilerVersion, maxOpsetVersion); + auto serializedIR = serializeIR(modelAfterPasses, compilerVersion, maxOpsetVersion, applyCommonPassesFlag); std::string buildFlags; const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); _logger.debug("build flags"); - buildFlags += serializeIOInfo(model, useIndices); + buildFlags += serializeIOInfo(modelAfterPasses, useIndices); buildFlags += " "; buildFlags += serializeConfig(config, compilerVersion); @@ -190,7 +378,7 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptrgetNetworkMeta(graphHandle); - networkMeta.name = model->get_friendly_name(); + networkMeta.name = modelAfterPasses->get_friendly_name(); return std::make_shared(_zeGraphExt, _zeroInitStruct, @@ -228,8 +416,19 @@ ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr modelAfterPasses = model; + const bool applyCommonPassesFlag = + (compilerVersion.major > 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2); + if (applyCommonPassesFlag) { + modelAfterPasses = applyCommonPasses(model); + + _logger.info("Common OV passes have been applied inside the plugin"); + } else { + _logger.info("No common OV passes have been applied inside the plugin"); + } + _logger.debug("serialize IR"); - auto serializedIR = serializeIR(model, compilerVersion, maxOpsetVersion); + auto serializedIR = serializeIR(modelAfterPasses, compilerVersion, maxOpsetVersion, applyCommonPassesFlag); std::string buildFlags; buildFlags += serializeConfig(config, compilerVersion); @@ -262,8 +461,9 @@ uint32_t DriverCompilerAdapter::get_version() const { */ SerializedIR DriverCompilerAdapter::serializeIR(const std::shared_ptr& model, ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpsetVersion) const { - driver_compiler_utils::IRSerializer irSerializer(model, supportedOpsetVersion); + const uint32_t supportedOpsetVersion, + const bool commonPassesApplied) const { + driver_compiler_utils::IRSerializer irSerializer(model, supportedOpsetVersion, commonPassesApplied); // Contract between adapter and compiler in driver const uint32_t maxNumberOfElements = 10; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp index 7760056e127bf8..49227315e325e7 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp @@ -14,9 +14,12 @@ namespace intel_npu::driver_compiler_utils { -IRSerializer::IRSerializer(const std::shared_ptr& origModel, const uint32_t supportedOpset) +IRSerializer::IRSerializer(const std::shared_ptr& origModel, + const uint32_t supportedOpset, + const bool commonPassesApplied) : _logger("IRSerializer", Logger::global().level()), - _supportedOpset(supportedOpset) { + _supportedOpset(supportedOpset), + _commonPassesApplied(commonPassesApplied) { // There is no const variant of run_passes so use const_cast here // as model serialization does not mutate the model _model = std::const_pointer_cast(origModel); @@ -52,7 +55,10 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh // Flag used for indicating an NPU plugin version which switched the I/O identification convention from names to // indices. The flag is required in order to inform the driver-compiler adapter to expect indices when attempting to // deserialize the I/O metadata. - const auto useIndicesForIOMetadata = "use_indices_for_io_metadata"; + const auto useIndicesForIOMetadataKey = "use_indices_for_io_metadata"; + + // See the attribute's description + const auto commonPassesAppliedKey = "common_passes_applied"; // We modify the original model object here therefore a mutex is required static std::mutex rtInfoMutex; @@ -61,13 +67,15 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh std::lock_guard lock(rtInfoMutex); _model->set_rt_info(true, newAPIKey); - _model->set_rt_info(true, useIndicesForIOMetadata); + _model->set_rt_info(true, useIndicesForIOMetadataKey); + _model->set_rt_info(_commonPassesApplied, commonPassesAppliedKey); manager.run_passes(_model); auto& rtInfo = _model->get_rt_info(); rtInfo.erase(newAPIKey); - rtInfo.erase(useIndicesForIOMetadata); + rtInfo.erase(useIndicesForIOMetadataKey); + rtInfo.erase(commonPassesAppliedKey); } _logger.debug("serializeModelToStream end"); } From 9c639bfac28c40a6905cc19057fb98be3728f8b1 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Tue, 11 Feb 2025 12:39:43 +0000 Subject: [PATCH 2/6] Adding a time measurement for the passes --- .../src/compiler_adapter/src/driver_compiler_adapter.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index b48e61cfc17240..a999069a2ec4df 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -345,7 +345,12 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2); if (applyCommonPassesFlag) { + std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); modelAfterPasses = applyCommonPasses(model); + std::cout + << "Running common passes " + << std::chrono::duration_cast(std::chrono::steady_clock::now() - begin).count() + << "[microseconds]" << std::endl; _logger.debug("Common OV passes have been applied inside the plugin"); } else { From 13c710157d380681934d257a5506985af4843fd3 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 12 Feb 2025 19:39:37 +0000 Subject: [PATCH 3/6] Acommodating the CiP case as well --- .../intel_npu/common/icompiler_adapter.hpp | 8 + .../src/common/src/icompiler_adapter.cpp | 185 ++++++++++++++++ .../include/driver_compiler_adapter.hpp | 3 +- .../include/ir_serializer.hpp | 11 +- .../src/driver_compiler_adapter.cpp | 204 ++---------------- .../compiler_adapter/src/ir_serializer.cpp | 12 +- .../src/plugin_compiler_adapter.cpp | 10 +- 7 files changed, 217 insertions(+), 216 deletions(-) create mode 100644 src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp index a86d942627c6b5..7ccfa09a0fa2f7 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp @@ -16,6 +16,14 @@ class ICompilerAdapter { virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; virtual uint32_t get_version() const = 0; + /** + * @brief Applies the common OV passes previously found inside the compiler. + * + * @param model The model on which the passes will be applied. + * @return A clone of the original model on which the passes have been applied. + */ + std::shared_ptr apply_common_passes(const std::shared_ptr& model) const; + virtual ~ICompilerAdapter() = default; }; diff --git a/src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp b/src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp new file mode 100644 index 00000000000000..4694494201224b --- /dev/null +++ b/src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp @@ -0,0 +1,185 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_npu/common/icompiler_adapter.hpp" + +#include "openvino/pass/constant_folding.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/batch_to_space_fusion.hpp" +#include "transformations/common_optimizations/conv_mul_fusion.hpp" +#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" +#include "transformations/common_optimizations/depth_to_space_fusion.hpp" +#include "transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp" +#include "transformations/common_optimizations/fq_mul_fusion.hpp" +#include "transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp" +#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" +#include "transformations/common_optimizations/moc_transformations.hpp" +#include "transformations/common_optimizations/mul_conv_fusion.hpp" +#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/mvn_fusion.hpp" +#include "transformations/common_optimizations/pad_fusion.hpp" +#include "transformations/common_optimizations/pull_through_reduce.hpp" +#include "transformations/common_optimizations/reduce_reshape_fusion.hpp" +#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/rms_fusion.hpp" +#include "transformations/common_optimizations/shuffle_channels_fusion.hpp" +#include "transformations/common_optimizations/space_to_batch_fusion.hpp" +#include "transformations/common_optimizations/strides_optimization.hpp" +#include "transformations/common_optimizations/transpose_to_reshape.hpp" +#include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp" +#include "transformations/control_flow/unroll_if.hpp" +#include "transformations/control_flow/unroll_tensor_iterator.hpp" +#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" +#include "transformations/init_node_info.hpp" +#include "transformations/low_precision/mark_dequantization_subgraph.hpp" +#include "transformations/op_conversions/batch_norm_decomposition.hpp" +#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_avgpool_downgrade.hpp" +#include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" +#include "transformations/op_conversions/convert_convertlike.hpp" +#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" +#include "transformations/op_conversions/convert_gather_upgrade.hpp" +#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp" +#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" +#include "transformations/op_conversions/convert_maxpool_downgrade.hpp" +#include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp" +#include "transformations/op_conversions/convert_pad12_downgrade.hpp" +#include "transformations/op_conversions/convert_pad_to_group_conv.hpp" +#include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp" +#include "transformations/op_conversions/convert_reduce_to_pooling.hpp" +#include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp" +#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" +#include "transformations/op_conversions/convert_shapeof3.hpp" +#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" +#include "transformations/op_conversions/convert_softmax_upgrade.hpp" +#include "transformations/op_conversions/convert_topk11_downgrade.hpp" +#include "transformations/op_conversions/detection_output_downgrade.hpp" +#include "transformations/op_conversions/einsum_decomposition.hpp" +#include "transformations/op_conversions/gelu7_downgrade.hpp" +#include "transformations/op_conversions/group_normalization_decomposition.hpp" +#include "transformations/op_conversions/log_softmax_decomposition.hpp" +#include "transformations/op_conversions/normalize_l2_decomposition.hpp" +#include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp" +#include "transformations/op_conversions/softmax_decomposition.hpp" +#include "transformations/rt_info/fused_names_attribute.hpp" +#include "transformations/utils/utils.hpp" + +namespace intel_npu { + +std::shared_ptr ICompilerAdapter::apply_common_passes(const std::shared_ptr& model) const { + const std::shared_ptr clonedModel = model->clone(); + + ov::pass::Manager manager; + manager.register_pass(); + ov::element::TypeVector decompression_precisions{ + ov::element::u4, + ov::element::i4, + ov::element::nf4, + ov::element::u8, + ov::element::i8, + }; + manager.register_pass(decompression_precisions, /*fold_subtract_const=*/true); + manager.register_pass(decompression_precisions, /*fold_subtract_const=*/true); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + // MOCTransformations contain StridedSliceOptimization transformation, + // so we must call SliceToStridedSlice before MOCTransformations call + manager.register_pass(true); + // Disable low_precision_enabled as all plugins handle low-precision sub-graph manually + // before CommonOptimization pipeline execution + manager.register_pass(true, false); + + auto pass_config = manager.get_pass_config(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + pass_config->disable(); + + // NMS conversion passes + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + auto static_shape = manager.register_pass(); + static_shape->add_matcher(); + static_shape->set_name("ov::pass::CommonStaticShape"); + + auto common_fusions = manager.register_pass(); + common_fusions->add_matcher(); + common_fusions->add_matcher(false); + common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->add_matcher(); + common_fusions->set_name("ov::pass::CommonFusions"); + + auto decomp = manager.register_pass(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->add_matcher(); + decomp->set_name("ov::pass::CommonDecompositions"); + + // CF is required after all decompositions + manager.register_pass(); + + // LinOpSequenceFusion must be executed after all decompositions + manager.register_pass(); + manager.register_pass(); + + auto conv_fusions = manager.register_pass(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->add_matcher(); + conv_fusions->set_name("ov::pass::ConvFusions"); + + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + + // StridesOptimization should be at the very end + // because we cannot insert any MaxPools since they may prevent + // other optimizations + manager.register_pass(); + manager.register_pass(); + + manager.run_passes(clonedModel); + + // Notifies the compiler to skip applying the passes on its end + clonedModel->set_rt_info(true, "common_passes_applied"); + + return clonedModel; +} + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 4300095c23af61..75998fbf7bedf6 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -48,8 +48,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { SerializedIR serializeIR(const std::shared_ptr& model, ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpsetVersio, - const bool applyCommonPasses) const; + const uint32_t supportedOpsetVersio) const; std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t compilerVersion) const; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp index a23ee9d3ebe862..fbb098a1238228 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp @@ -19,9 +19,7 @@ namespace intel_npu::driver_compiler_utils { class IRSerializer { public: - IRSerializer(const std::shared_ptr& origModel, - const uint32_t supportedOpset = 11, - const bool commonPassesApplied = false); + IRSerializer(const std::shared_ptr& origModel, const uint32_t supportedOpset = 11); size_t getXmlSize() const { return _xmlSize; @@ -52,13 +50,6 @@ class IRSerializer { uint32_t _supportedOpset = 11; size_t _xmlSize = 0; size_t _weightsSize = 0; - - /** - * @brief Indicates whether or not the OV common passes have already been applied on the model. - * @details This attribute will be stored inside model's runtime information in order to be sent to the compiler. - * The compiler will thus know if applying the same passes on its end is required. - */ - bool _commonPassesApplied; }; } // namespace intel_npu::driver_compiler_utils diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index a999069a2ec4df..0efbb76911c564 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -19,67 +19,6 @@ #include "intel_npu/utils/zero/zero_utils.hpp" #include "ir_serializer.hpp" #include "openvino/core/model.hpp" -#include "openvino/pass/constant_folding.hpp" -#include "openvino/pass/manager.hpp" -#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" -#include "transformations/common_optimizations/batch_to_space_fusion.hpp" -#include "transformations/common_optimizations/conv_mul_fusion.hpp" -#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" -#include "transformations/common_optimizations/depth_to_space_fusion.hpp" -#include "transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp" -#include "transformations/common_optimizations/fq_mul_fusion.hpp" -#include "transformations/common_optimizations/fuse_rotary_positional_embeddings.hpp" -#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" -#include "transformations/common_optimizations/moc_transformations.hpp" -#include "transformations/common_optimizations/mul_conv_fusion.hpp" -#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp" -#include "transformations/common_optimizations/mvn_fusion.hpp" -#include "transformations/common_optimizations/pad_fusion.hpp" -#include "transformations/common_optimizations/pull_through_reduce.hpp" -#include "transformations/common_optimizations/reduce_reshape_fusion.hpp" -#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp" -#include "transformations/common_optimizations/rms_fusion.hpp" -#include "transformations/common_optimizations/shuffle_channels_fusion.hpp" -#include "transformations/common_optimizations/space_to_batch_fusion.hpp" -#include "transformations/common_optimizations/strides_optimization.hpp" -#include "transformations/common_optimizations/transpose_to_reshape.hpp" -#include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp" -#include "transformations/control_flow/unroll_if.hpp" -#include "transformations/control_flow/unroll_tensor_iterator.hpp" -#include "transformations/fp16_compression/mark_decompression_convert_constant_folding.hpp" -#include "transformations/init_node_info.hpp" -#include "transformations/low_precision/mark_dequantization_subgraph.hpp" -#include "transformations/op_conversions/batch_norm_decomposition.hpp" -#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" -#include "transformations/op_conversions/convert_avgpool_downgrade.hpp" -#include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" -#include "transformations/op_conversions/convert_convertlike.hpp" -#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" -#include "transformations/op_conversions/convert_gather_upgrade.hpp" -#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp" -#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" -#include "transformations/op_conversions/convert_maxpool_downgrade.hpp" -#include "transformations/op_conversions/convert_nms9_to_nms_ie_internal.hpp" -#include "transformations/op_conversions/convert_pad12_downgrade.hpp" -#include "transformations/op_conversions/convert_pad_to_group_conv.hpp" -#include "transformations/op_conversions/convert_previous_nms_to_nms_9.hpp" -#include "transformations/op_conversions/convert_reduce_to_pooling.hpp" -#include "transformations/op_conversions/convert_scatter_elements_update12_downgrade.hpp" -#include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" -#include "transformations/op_conversions/convert_shapeof3.hpp" -#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" -#include "transformations/op_conversions/convert_softmax_upgrade.hpp" -#include "transformations/op_conversions/convert_topk11_downgrade.hpp" -#include "transformations/op_conversions/detection_output_downgrade.hpp" -#include "transformations/op_conversions/einsum_decomposition.hpp" -#include "transformations/op_conversions/gelu7_downgrade.hpp" -#include "transformations/op_conversions/group_normalization_decomposition.hpp" -#include "transformations/op_conversions/log_softmax_decomposition.hpp" -#include "transformations/op_conversions/normalize_l2_decomposition.hpp" -#include "transformations/op_conversions/scaled_dot_product_attention_decomposition.hpp" -#include "transformations/op_conversions/softmax_decomposition.hpp" -#include "transformations/rt_info/fused_names_attribute.hpp" -#include "transformations/utils/utils.hpp" namespace { @@ -102,122 +41,6 @@ const std::vector NC_TO_CN_LAYOUT_DIMENSIONS_ORDER = {1, 0}; const std::vector NCHW_TO_NHWC_LAYOUT_DIMENSIONS_ORDER = {0, 2, 3, 1}; const std::vector NCDHW_TO_NDHWC_LAYOUT_DIMENSIONS_ORDER = {0, 2, 3, 4, 1}; -/** - * @brief Applies the common OV passes previously found in the compiler. - * - * @param model The target model. - * @return A clone of the original model on which the passes have been applied. - */ -std::shared_ptr applyCommonPasses(const std::shared_ptr& model) { - const std::shared_ptr clonedModel = model->clone(); - - ov::pass::Manager manager; - manager.register_pass(); - ov::element::TypeVector decompression_precisions{ - ov::element::u4, - ov::element::i4, - ov::element::nf4, - ov::element::u8, - ov::element::i8, - }; - manager.register_pass(decompression_precisions, /*fold_subtract_const=*/true); - manager.register_pass(decompression_precisions, /*fold_subtract_const=*/true); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - - // MOCTransformations contain StridedSliceOptimization transformation, - // so we must call SliceToStridedSlice before MOCTransformations call - manager.register_pass(true); - // Disable low_precision_enabled as all plugins handle low-precision sub-graph manually - // before CommonOptimization pipeline execution - manager.register_pass(true, false); - - auto pass_config = manager.get_pass_config(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - pass_config->disable(); - - // NMS conversion passes - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - - auto static_shape = manager.register_pass(); - static_shape->add_matcher(); - static_shape->set_name("ov::pass::CommonStaticShape"); - - auto common_fusions = manager.register_pass(); - common_fusions->add_matcher(); - common_fusions->add_matcher(false); - common_fusions->add_matcher(); - common_fusions->add_matcher(); - common_fusions->add_matcher(); - common_fusions->add_matcher(); - common_fusions->set_name("ov::pass::CommonFusions"); - - auto decomp = manager.register_pass(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->add_matcher(); - decomp->set_name("ov::pass::CommonDecompositions"); - - // CF is required after all decompositions - manager.register_pass(); - - // LinOpSequenceFusion must be executed after all decompositions - manager.register_pass(); - manager.register_pass(); - - auto conv_fusions = manager.register_pass(); - conv_fusions->add_matcher(); - conv_fusions->add_matcher(); - conv_fusions->add_matcher(); - conv_fusions->add_matcher(); - conv_fusions->add_matcher(); - conv_fusions->add_matcher(); - conv_fusions->add_matcher(); - conv_fusions->add_matcher(); - conv_fusions->set_name("ov::pass::ConvFusions"); - - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - manager.register_pass(); - - // StridesOptimization should be at the very end - // because we cannot insert any MaxPools since they may prevent - // other optimizations - manager.register_pass(); - manager.register_pass(); - - manager.run_passes(clonedModel); - return clonedModel; -} - /** * @brief A standard copy function concerning memory segments. Additional checks on the given arguments are performed * before copying. @@ -341,12 +164,12 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr modelAfterPasses = model; - const bool applyCommonPassesFlag = - (compilerVersion.major > 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2); - if (applyCommonPassesFlag) { + if ((compilerVersion.major > 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2)) { std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now(); - modelAfterPasses = applyCommonPasses(model); + modelAfterPasses = apply_common_passes(model); std::cout << "Running common passes " << std::chrono::duration_cast(std::chrono::steady_clock::now() - begin).count() @@ -358,7 +181,7 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr modelAfterPasses = model; - const bool applyCommonPassesFlag = - (compilerVersion.major > 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2); - if (applyCommonPassesFlag) { - modelAfterPasses = applyCommonPasses(model); + if ((compilerVersion.major > 7) || (compilerVersion.major == 7 && compilerVersion.minor >= 2)) { + modelAfterPasses = apply_common_passes(model); - _logger.info("Common OV passes have been applied inside the plugin"); + _logger.debug("Common OV passes have been applied inside the plugin"); } else { - _logger.info("No common OV passes have been applied inside the plugin"); + _logger.debug("No common OV passes have been applied inside the plugin"); } _logger.debug("serialize IR"); - auto serializedIR = serializeIR(modelAfterPasses, compilerVersion, maxOpsetVersion, applyCommonPassesFlag); + auto serializedIR = serializeIR(modelAfterPasses, compilerVersion, maxOpsetVersion); std::string buildFlags; buildFlags += serializeConfig(config, compilerVersion); @@ -466,9 +287,8 @@ uint32_t DriverCompilerAdapter::get_version() const { */ SerializedIR DriverCompilerAdapter::serializeIR(const std::shared_ptr& model, ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpsetVersion, - const bool commonPassesApplied) const { - driver_compiler_utils::IRSerializer irSerializer(model, supportedOpsetVersion, commonPassesApplied); + const uint32_t supportedOpsetVersion) const { + driver_compiler_utils::IRSerializer irSerializer(model, supportedOpsetVersion); // Contract between adapter and compiler in driver const uint32_t maxNumberOfElements = 10; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp index 49227315e325e7..87c97da89ddd91 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp @@ -14,12 +14,9 @@ namespace intel_npu::driver_compiler_utils { -IRSerializer::IRSerializer(const std::shared_ptr& origModel, - const uint32_t supportedOpset, - const bool commonPassesApplied) +IRSerializer::IRSerializer(const std::shared_ptr& origModel, const uint32_t supportedOpset) : _logger("IRSerializer", Logger::global().level()), - _supportedOpset(supportedOpset), - _commonPassesApplied(commonPassesApplied) { + _supportedOpset(supportedOpset) { // There is no const variant of run_passes so use const_cast here // as model serialization does not mutate the model _model = std::const_pointer_cast(origModel); @@ -57,9 +54,6 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh // deserialize the I/O metadata. const auto useIndicesForIOMetadataKey = "use_indices_for_io_metadata"; - // See the attribute's description - const auto commonPassesAppliedKey = "common_passes_applied"; - // We modify the original model object here therefore a mutex is required static std::mutex rtInfoMutex; @@ -68,14 +62,12 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh _model->set_rt_info(true, newAPIKey); _model->set_rt_info(true, useIndicesForIOMetadataKey); - _model->set_rt_info(_commonPassesApplied, commonPassesAppliedKey); manager.run_passes(_model); auto& rtInfo = _model->get_rt_info(); rtInfo.erase(newAPIKey); rtInfo.erase(useIndicesForIOMetadataKey); - rtInfo.erase(commonPassesAppliedKey); } _logger.debug("serializeModelToStream end"); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 809e1c88e05a71..a686d649602dcc 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -78,8 +78,11 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr modelAfterPasses = apply_common_passes(model); + _logger.debug("Common OV passes have been applied inside the plugin"); + _logger.debug("compile start"); - auto networkDesc = _compiler->compile(model, config); + auto networkDesc = _compiler->compile(modelAfterPasses, config); auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); @@ -138,7 +141,10 @@ ov::SupportedOpsMap PluginCompilerAdapter::query(const std::shared_ptrquery(model, config); + const std::shared_ptr modelAfterPasses = apply_common_passes(model); + _logger.debug("Common OV passes have been applied inside the plugin"); + + return _compiler->query(modelAfterPasses, config); } uint32_t PluginCompilerAdapter::get_version() const { From 033000cad3e8b1f85f35a435b5b5328cc81f1cc0 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 12 Feb 2025 19:50:58 +0000 Subject: [PATCH 4/6] Removing the redundant interpolate downgrade pass --- .../include/driver_compiler_adapter.hpp | 3 +- .../include/ir_serializer.hpp | 7 +- .../src/driver_compiler_adapter.cpp | 9 +- .../compiler_adapter/src/ir_serializer.cpp | 19 +--- .../downgrade_interpolate11.cpp | 21 ---- .../downgrade_interpolate11.hpp | 97 ------------------- .../skip_tests_config.cpp | 5 - 7 files changed, 8 insertions(+), 153 deletions(-) delete mode 100644 src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/downgrade_interpolate11.cpp delete mode 100644 src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/downgrade_interpolate11.hpp diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 75998fbf7bedf6..d21312b33e1271 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -47,8 +47,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::string serializeIOInfo(const std::shared_ptr& model, const bool useIndices) const; SerializedIR serializeIR(const std::shared_ptr& model, - ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpsetVersio) const; + ze_graph_compiler_version_info_t compilerVersion) const; std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t compilerVersion) const; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp index fbb098a1238228..064b9d729f0cf3 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp @@ -11,15 +11,11 @@ #include "intel_npu/utils/logger/logger.hpp" #include "openvino/pass/manager.hpp" -/** - * @brief Contain all required transformation on OpenVINO model in case for external compiler usage and - * providing forward compatibility (OV model with opset N+M, external compiler with opset N) - */ namespace intel_npu::driver_compiler_utils { class IRSerializer { public: - IRSerializer(const std::shared_ptr& origModel, const uint32_t supportedOpset = 11); + IRSerializer(const std::shared_ptr& origModel); size_t getXmlSize() const { return _xmlSize; @@ -47,7 +43,6 @@ class IRSerializer { Logger _logger; std::shared_ptr _model = nullptr; - uint32_t _supportedOpset = 11; size_t _xmlSize = 0; size_t _weightsSize = 0; }; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 0efbb76911c564..b3b60b2b82dbfe 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -181,7 +181,7 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr& model, - ze_graph_compiler_version_info_t compilerVersion, - const uint32_t supportedOpsetVersion) const { - driver_compiler_utils::IRSerializer irSerializer(model, supportedOpsetVersion); + ze_graph_compiler_version_info_t compilerVersion) const { + driver_compiler_utils::IRSerializer irSerializer(model); // Contract between adapter and compiler in driver const uint32_t maxNumberOfElements = 10; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp index 87c97da89ddd91..554822493eea18 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp @@ -10,23 +10,15 @@ #include #include "openvino/pass/serialize.hpp" -#include "transformations/op_conversions/convert_interpolate11_downgrade.hpp" namespace intel_npu::driver_compiler_utils { -IRSerializer::IRSerializer(const std::shared_ptr& origModel, const uint32_t supportedOpset) - : _logger("IRSerializer", Logger::global().level()), - _supportedOpset(supportedOpset) { +IRSerializer::IRSerializer(const std::shared_ptr& origModel) + : _logger("IRSerializer", Logger::global().level()) { // There is no const variant of run_passes so use const_cast here // as model serialization does not mutate the model _model = std::const_pointer_cast(origModel); - if (supportedOpset < 11) { - // Need to clone to modify the model and remain thread safe - _model = _model->clone(); - _logger.info("Clone model for offset smaller than 11"); - } - countModelSize(); } @@ -34,13 +26,6 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh _logger.debug("serializeModelToStream"); const auto passConfig = std::make_shared(); ov::pass::Manager manager(passConfig, "NPU:serializeModelToStream"); - - if (_supportedOpset < 11) { - // Downgrade to opset10 - manager.register_pass(); - _logger.info("Downgrade op for opset smaller than 11"); - } - manager.register_pass(xml, weights); // Depending on the driver version, the compiler attached to it may request this information as an indicator of the diff --git a/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/downgrade_interpolate11.cpp b/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/downgrade_interpolate11.cpp deleted file mode 100644 index 5655d797518cdf..00000000000000 --- a/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/downgrade_interpolate11.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (C) 2018-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "downgrade_interpolate11.hpp" - -#include "common/npu_test_env_cfg.hpp" -#include "common/utils.hpp" -#include "intel_npu/config/common.hpp" - -using namespace ov::test::behavior; - -const std::vector configs = { - {{ov::intel_npu::compiler_type(ov::intel_npu::CompilerType::DRIVER)}}, -}; - -INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest, - DriverCompilerAdapterDowngradeInterpolate11TestNPU, - ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU), - ::testing::ValuesIn(configs)), - DriverCompilerAdapterDowngradeInterpolate11TestNPU::getTestCaseName); diff --git a/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/downgrade_interpolate11.hpp b/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/downgrade_interpolate11.hpp deleted file mode 100644 index c9d9e7c588bc8c..00000000000000 --- a/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/downgrade_interpolate11.hpp +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (C) 2018-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "base/ov_behavior_test_utils.hpp" -#include "common/npu_test_env_cfg.hpp" - -#include "openvino/op/constant.hpp" -#include "openvino/op/interpolate.hpp" - -using CompilationParams = std::tuple; - -namespace ov { -namespace test { -namespace behavior { -class DriverCompilerAdapterDowngradeInterpolate11TestNPU : - public ov::test::behavior::OVPluginTestBase, - public testing::WithParamInterface { -protected: - std::shared_ptr core = utils::PluginCache::get().core(); - ov::AnyMap configuration; - std::shared_ptr ov_model; - -public: - static std::string getTestCaseName(testing::TestParamInfo obj) { - std::string targetDevice; - ov::AnyMap configuration; - std::tie(targetDevice, configuration) = obj.param; - std::replace(targetDevice.begin(), targetDevice.end(), ':', '.'); - - std::ostringstream result; - result << "targetDevice=" << targetDevice << "_"; - result << "targetPlatform=" << ov::test::utils::getTestsPlatformFromEnvironmentOr(targetDevice) << "_"; - result << "model=" - << "Interpolate11Model" - << "_"; - if (!configuration.empty()) { - for (auto& configItem : configuration) { - result << "configItem=" << configItem.first << "_"; - configItem.second.print(result); - } - } - return result.str(); - } - - void SetUp() override { - std::tie(target_device, configuration) = this->GetParam(); - SKIP_IF_CURRENT_TEST_IS_DISABLED() - ov_model = createInterpolate11Model(); - OVPluginTestBase::SetUp(); - } - - void TearDown() override { - if (!configuration.empty()) { - utils::PluginCache::get().reset(); - } - APIBaseTest::TearDown(); - } - -private: - std::shared_ptr createInterpolate11Model() { - using InterpolateAttrs = op::v11::Interpolate::InterpolateAttrs; - using InterpolateMode = op::v11::Interpolate::InterpolateMode; - using ShapeCalcMode = op::v11::Interpolate::ShapeCalcMode; - using TransformMode = op::v11::Interpolate::CoordinateTransformMode; - using NearestMode = op::v11::Interpolate::NearestMode; - const auto data = std::make_shared(ov::element::f32, ov::PartialShape{2, 2, 30, 60}); - const auto scales = - ov::op::v0::Constant::create(ov::element::f32, ov::Shape{2}, std::vector{0.5f, 0.5f}); - const auto axes = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{2}, std::vector{2, 3}); - // Only modes of NEAREST, LINEAR, LINEAR_ONNX and CUBIC are supported for ConvertInterpolate11ToInterpolate4, - // here we use mode of NEAREST . - const InterpolateAttrs attrs{InterpolateMode::NEAREST, - ShapeCalcMode::SCALES, - std::vector{0, 0, 0, 0}, - std::vector{0, 0, 0, 0}, - TransformMode::HALF_PIXEL, - NearestMode::ROUND_PREFER_FLOOR, - false, - -0.75}; - const auto interpolate = std::make_shared(data, scales, axes, attrs); - ov::ResultVector results{std::make_shared(interpolate)}; - return std::make_shared(results, ov::ParameterVector{{data}}, "Interpolate-11"); - } -}; - -TEST_P(DriverCompilerAdapterDowngradeInterpolate11TestNPU, CheckOpsetVersion) { - EXPECT_NO_THROW(auto compiledModel = core->compile_model(ov_model, target_device, configuration);); -} - -} // namespace behavior -} // namespace test -} // namespace ov diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 0d93eacc1be60d..48cf8996d589e9 100644 --- a/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -278,7 +278,6 @@ std::vector disabledTestPatterns() { // [Track number: E#70764] ".*InferRequestCheckTensorPrecision.*", ".*InferRequestIOTensorSetPrecisionTest.*", - ".*DriverCompilerAdapterDowngradeInterpolate11TestNPU.*", ".*DriverCompilerAdapterInputsOutputsTestNPU.*", }); @@ -393,8 +392,6 @@ std::vector disabledTestPatterns() { { // [Track number: E#49620] ".*NPU3720.*", - // [Track number: E#84621] - ".*DriverCompilerAdapterDowngradeInterpolate11TestNPU.*", ".*DriverCompilerAdapterInputsOutputsTestNPU.*", }); @@ -432,8 +429,6 @@ std::vector disabledTestPatterns() { // [Track number: E#30824] ".*OVClassImportExportTestP.*", ".*OVClassLoadNetworkTestNPU.*LoadNetwork.*", - // [Track number: E#84621] - ".*DriverCompilerAdapterDowngradeInterpolate11TestNPU.*", ".*QueryNetworkTestSuite.*", }); From 6c73e43bceecc6cab77fbd658e41614f265f9d5f Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 12 Feb 2025 20:20:41 +0000 Subject: [PATCH 5/6] Forgot to update a test --- .../behavior/npu_driver_compiler_adapter/custom_stream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp b/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp index e125416209938b..02ef0786e8ad57 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp @@ -83,7 +83,7 @@ class DriverCompilerAdapterCustomStreamTestNPU : public ov::test::behavior::OVPl TEST_P(DriverCompilerAdapterCustomStreamTestNPU, TestLargeModel) { auto model = createModelWithLargeSize(); - IRSerializer irSerializer(model, 11); + IRSerializer irSerializer(model); size_t xmlSize = irSerializer.getXmlSize(); size_t weightsSize = irSerializer.getWeightsSize(); From 8f330e24f6b7fa30940973cbbbfc945f11ccf401 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Wed, 12 Feb 2025 20:22:48 +0000 Subject: [PATCH 6/6] Adding some debug info --- src/core/src/node.cpp | 10 +++------- src/core/src/pass/graph_rewrite.cpp | 4 +--- .../intel_npu/src/common/src/icompiler_adapter.cpp | 3 +++ 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index 642b233fa5526c..3a80d7d11c1641 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -387,19 +387,15 @@ const ov::op::AutoBroadcastSpec& ov::Node::get_autob() const { namespace ov { ostream& operator<<(ostream& out, const Node& node) { - return node.write_description(out, 1); + return node.write_description(out, 0); } ostream& operator<<(ostream& out, const Node* node) { - return node->write_description(out, 1); + return node->write_description(out, 0); } } // namespace ov std::ostream& ov::Node::write_description(std::ostream& out, uint32_t depth) const { - auto version = get_type_info().version_id; - if (version) - out << version << "::" << get_type_info().name << " " << get_friendly_name(); - else - out << get_type_info().name << " " << get_friendly_name(); + out << get_type_info().name << " " << get_friendly_name(); if (depth > 0) { out << " ("; diff --git a/src/core/src/pass/graph_rewrite.cpp b/src/core/src/pass/graph_rewrite.cpp index b7e839838c54fa..190a70acc3eb1b 100644 --- a/src/core/src/pass/graph_rewrite.cpp +++ b/src/core/src/pass/graph_rewrite.cpp @@ -282,14 +282,12 @@ void ov::pass::MatcherPass::register_matcher(const std::shared_ptr& node) -> bool { - OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " trying to match ", node); if (m->match(node->output(0))) { - OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " matched ", node); + std::cout << m->get_name() << " " << node << std::endl; OV_PASS_CALLBACK(m); try { const bool status = callback(*m.get()); - OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " callback ", (status ? "succeded" : "failed")); // explicitly clear Matcher state because it holds pointers to matched nodes m->clear_state(); return status; diff --git a/src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp b/src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp index 4694494201224b..bc21e20088f176 100644 --- a/src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp +++ b/src/plugins/intel_npu/src/common/src/icompiler_adapter.cpp @@ -174,7 +174,10 @@ std::shared_ptr ICompilerAdapter::apply_common_passes(const std::shar manager.register_pass(); manager.register_pass(); + std::cout << "Starting NPU plugin passes" << std::endl; + std::cout << "Columns: Pass NodeType NodeFriendlyName" << std::endl; manager.run_passes(clonedModel); + std::cout << "Ending NPU plugin passes" << std::endl; // Notifies the compiler to skip applying the passes on its end clonedModel->set_rt_info(true, "common_passes_applied");