diff --git a/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc b/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc index db020a3aa3..d87995ffb8 100644 --- a/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc +++ b/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc @@ -1,4 +1,4 @@ -/******************************************************************************* +/****************************************************************-*- C++ -*-**** * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * diff --git a/lib/Optimizer/Transforms/LoopAnalysis.cpp b/lib/Optimizer/Transforms/LoopAnalysis.cpp index c40e2b7e30..37ee67489d 100644 --- a/lib/Optimizer/Transforms/LoopAnalysis.cpp +++ b/lib/Optimizer/Transforms/LoopAnalysis.cpp @@ -7,6 +7,7 @@ ******************************************************************************/ #include "LoopAnalysis.h" +#include "cudaq/Optimizer/Builder/Factory.h" #include "mlir/IR/Dominance.h" using namespace mlir; @@ -73,14 +74,6 @@ static bool isaConstantOf(Value v, std::int64_t hasVal) { return false; } -static bool isNegativeConstant(Value v) { - v = peelCastOps(v); - if (auto c = v.getDefiningOp()) - if (auto ia = dyn_cast(c.getValue())) - return ia.getInt() < 0; - return false; -} - static bool isClosedIntervalForm(arith::CmpIPredicate p) { return p == arith::CmpIPredicate::ule || p == arith::CmpIPredicate::sle; } @@ -210,6 +203,10 @@ static BlockArgument getLinearExpr(Value expr, return scaledIteration(expr); } +static unsigned bitWidth(Value val) { + return cast(val.getType()).getWidth(); +} + namespace cudaq { bool opt::isSemiOpenPredicate(arith::CmpIPredicate p) { @@ -223,6 +220,11 @@ bool opt::isUnsignedPredicate(arith::CmpIPredicate p) { p == arith::CmpIPredicate::ugt || p == arith::CmpIPredicate::uge; } +bool opt::isSignedPredicate(arith::CmpIPredicate p) { + return p == arith::CmpIPredicate::slt || p == arith::CmpIPredicate::sle || + p == arith::CmpIPredicate::sgt || p == arith::CmpIPredicate::sge; +} + // We expect the loop control value to have the following form. // // %final = cc.loop while ((%iter = %initial) -> (iN)) { @@ -282,7 +284,7 @@ bool opt::isaMonotonicLoop(Operation *op, bool allowEarlyExit, bool opt::isaInvariantLoop(const LoopComponents &c, bool allowClosedInterval) { if (isaConstantOf(c.initialValue, 0) && isaConstantOf(c.stepValue, 1) && - isa(c.stepOp) && !isNegativeConstant(c.compareValue)) { + isa(c.stepOp)) { auto cmp = cast(c.compareOp); return validCountedLoopIntervalForm(cmp, allowClosedInterval); } @@ -314,26 +316,296 @@ bool opt::isaConstantUpperBoundLoop(cc::LoopOp loop, bool allowClosedInterval) { isaConstant(c.compareValue); } -Value opt::LoopComponents::getCompareInduction() { +Value opt::LoopComponents::getCompareInduction() const { auto cmpOp = cast(compareOp); return cmpOp.getLhs() == compareValue ? cmpOp.getRhs() : cmpOp.getLhs(); } -bool opt::LoopComponents::stepIsAnAddOp() { return isa(stepOp); } +bool opt::LoopComponents::stepIsAnAddOp() const { + return isa(stepOp); +} -bool opt::LoopComponents::shouldCommuteStepOp() { +bool opt::LoopComponents::shouldCommuteStepOp() const { if (auto addOp = dyn_cast_or_null(stepOp)) return addOp.getRhs() == stepRegion->front().getArgument(induction); // Note: we don't allow induction on lhs of subtraction. return false; } -bool opt::LoopComponents::isClosedIntervalForm() { +bool opt::LoopComponents::isClosedIntervalForm() const { auto cmp = cast(compareOp); return ::isClosedIntervalForm(cmp.getPredicate()); } -bool opt::LoopComponents::isLinearExpr() { return addendValue || scaleValue; } +bool opt::LoopComponents::isLinearExpr() const { + return addendValue || scaleValue; +} + +std::int64_t opt::LoopComponents::extendValue(unsigned width, + std::size_t val) const { + const bool signExt = + isSignedPredicate(cast(compareOp).getPredicate()); + std::int64_t result = val; + switch (width) { + case 8: + if (signExt) { + std::int8_t v = val & 0xFF; + result = v; + } else { + std::uint8_t v = val & 0xFF; + result = v; + } + break; + case 16: + if (signExt) { + std::int16_t v = val & 0xFFFF; + result = v; + } else { + std::uint16_t v = val & 0xFFFF; + result = v; + } + break; + case 32: + if (signExt) { + std::int32_t v = val & 0xFFFFFFFF; + result = v; + } else { + std::uint32_t v = val & 0xFFFFFFFF; + result = v; + } + break; + default: + break; + } + return result; +} + +bool opt::LoopComponents::hasAlwaysTrueCondition() const { + auto cmpValOpt = factory::maybeValueOfIntConstant(compareValue); + if (!cmpValOpt) + return false; + auto width = bitWidth(compareValue); + std::int64_t cmpVal = *cmpValOpt; + auto pred = cast(compareOp).getPredicate(); + switch (width) { + case 8: { + switch (pred) { + case arith::CmpIPredicate::sge: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::sle: + return static_cast(cmpVal) == + std::numeric_limits::max(); + case arith::CmpIPredicate::uge: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::ule: + return static_cast(cmpVal) == + std::numeric_limits::max(); + default: + break; + } + } break; + case 16: { + switch (pred) { + case arith::CmpIPredicate::sge: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::sle: + return static_cast(cmpVal) == + std::numeric_limits::max(); + case arith::CmpIPredicate::uge: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::ule: + return static_cast(cmpVal) == + std::numeric_limits::max(); + default: + break; + } + } break; + case 32: { + switch (pred) { + case arith::CmpIPredicate::sge: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::sle: + return static_cast(cmpVal) == + std::numeric_limits::max(); + case arith::CmpIPredicate::uge: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::ule: + return static_cast(cmpVal) == + std::numeric_limits::max(); + default: + break; + } + } break; + case 64: { + switch (pred) { + case arith::CmpIPredicate::sge: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::sle: + return static_cast(cmpVal) == + std::numeric_limits::max(); + case arith::CmpIPredicate::uge: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::ule: + return static_cast(cmpVal) == + std::numeric_limits::max(); + default: + break; + } + } break; + default: + break; + } + return false; +} + +bool opt::LoopComponents::hasAlwaysFalseCondition() const { + auto cmpValOpt = factory::maybeValueOfIntConstant(compareValue); + if (!cmpValOpt) + return false; + auto width = bitWidth(compareValue); + std::int64_t cmpVal = *cmpValOpt; + auto pred = cast(compareOp).getPredicate(); + switch (width) { + case 8: { + switch (pred) { + case arith::CmpIPredicate::slt: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::sgt: + return static_cast(cmpVal) == + std::numeric_limits::max(); + case arith::CmpIPredicate::ult: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::ugt: + return static_cast(cmpVal) == + std::numeric_limits::max(); + default: + break; + } + } break; + case 16: { + switch (pred) { + case arith::CmpIPredicate::slt: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::sgt: + return static_cast(cmpVal) == + std::numeric_limits::max(); + case arith::CmpIPredicate::ult: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::ugt: + return static_cast(cmpVal) == + std::numeric_limits::max(); + default: + break; + } + } break; + case 32: { + switch (pred) { + case arith::CmpIPredicate::slt: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::sgt: + return static_cast(cmpVal) == + std::numeric_limits::max(); + case arith::CmpIPredicate::ult: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::ugt: + return static_cast(cmpVal) == + std::numeric_limits::max(); + default: + break; + } + } break; + case 64: { + switch (pred) { + case arith::CmpIPredicate::slt: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::sgt: + return static_cast(cmpVal) == + std::numeric_limits::max(); + case arith::CmpIPredicate::ult: + return static_cast(cmpVal) == + std::numeric_limits::min(); + case arith::CmpIPredicate::ugt: + return static_cast(cmpVal) == + std::numeric_limits::max(); + default: + break; + } + } break; + default: + break; + } + return false; +} + +std::optional opt::LoopComponents::getIterationsConstant() const { + auto initValOpt = factory::maybeValueOfIntConstant(initialValue); + if (!initValOpt) + return std::nullopt; + std::int64_t initVal = extendValue(bitWidth(initialValue), *initValOpt); + auto endValOpt = factory::maybeValueOfIntConstant(compareValue); + if (!endValOpt) + return std::nullopt; + std::int64_t endVal = extendValue(bitWidth(compareValue), *endValOpt); + auto stepValOpt = factory::maybeValueOfIntConstant(stepValue); + if (!stepValOpt) + return std::nullopt; + std::int64_t stepVal = extendValue(bitWidth(stepValue), *stepValOpt); + if (!stepIsAnAddOp()) + stepVal = -stepVal; + if (isLinearExpr()) { + if (addendValue) { + auto addendOpt = factory::maybeValueOfIntConstant(addendValue); + if (!addendOpt) + return std::nullopt; + std::int64_t addend = extendValue(bitWidth(addendValue), *addendOpt); + if (negatedAddend) + endVal += addend; + else + endVal -= addend; + } + if (minusOneMult) { + initVal = -initVal; + stepVal = -stepVal; + } + if (scaleValue) { + auto scaleValOpt = factory::maybeValueOfIntConstant(scaleValue); + if (!scaleValOpt) + return std::nullopt; + std::int64_t scaleVal = extendValue(bitWidth(scaleValue), *scaleValOpt); + if (reciprocalScale) { + endVal *= scaleVal; + } else { + endVal *= scaleVal; + stepVal *= scaleVal; + } + } + } + if (!isClosedIntervalForm()) { + if (stepVal < 0) + endVal += 1; + else + endVal -= 1; + } + std::int64_t result = (endVal - initVal + stepVal) / stepVal; + if (result < 0) + result = 0; + return {result}; +} template constexpr int computeArgsOffset() { @@ -350,7 +622,9 @@ std::optional opt::getLoopComponents(cc::LoopOp loop) { auto &whileEntry = whileRegion.front(); auto condOp = cast(whileRegion.back().back()); result.compareOp = condOp.getCondition().getDefiningOp(); - auto cmpOp = cast(result.compareOp); + auto cmpOp = dyn_cast(result.compareOp); + if (!cmpOp) + return {}; auto argumentToCompare = [&](unsigned idx) -> bool { return (getLinearExpr(cmpOp.getLhs(), result, loop) == diff --git a/lib/Optimizer/Transforms/LoopAnalysis.h b/lib/Optimizer/Transforms/LoopAnalysis.h index 1d2f6181f0..12f7310655 100644 --- a/lib/Optimizer/Transforms/LoopAnalysis.h +++ b/lib/Optimizer/Transforms/LoopAnalysis.h @@ -14,17 +14,29 @@ namespace cudaq::opt { // Loops that are transformed into normal form have this attribute. static constexpr char NormalizedLoopAttr[] = "normalized"; +static constexpr char DeadLoopAttr[] = "dead"; struct LoopComponents { LoopComponents() = default; // Get the induction expression of the comparison. - mlir::Value getCompareInduction(); + mlir::Value getCompareInduction() const; - bool stepIsAnAddOp(); - bool shouldCommuteStepOp(); - bool isClosedIntervalForm(); - bool isLinearExpr(); + bool stepIsAnAddOp() const; + bool shouldCommuteStepOp() const; + bool isClosedIntervalForm() const; + bool isLinearExpr() const; + std::optional getIterationsConstant() const; + + // Determine if the condition is always true. e.g., `x uge 0`. + bool hasAlwaysTrueCondition() const; + // Determine if the condition is always false. e.g., `x ult 0`. + bool hasAlwaysFalseCondition() const; + bool hasInvariantCondition() const { + return hasAlwaysTrueCondition() || hasAlwaysFalseCondition(); + } + + std::int64_t extendValue(unsigned width, std::size_t val) const; unsigned induction = 0; mlir::Value initialValue; @@ -50,6 +62,7 @@ struct LoopComponents { /// Does boundary test defines a semi-open interval? bool isSemiOpenPredicate(mlir::arith::CmpIPredicate p); bool isUnsignedPredicate(mlir::arith::CmpIPredicate p); +bool isSignedPredicate(mlir::arith::CmpIPredicate p); /// A counted loop is defined to be a loop that will execute some compile-time /// constant number of iterations. We recognize a normalized, semi-open interval diff --git a/lib/Optimizer/Transforms/LoopNormalizePatterns.inc b/lib/Optimizer/Transforms/LoopNormalizePatterns.inc index 3191d877a4..a147c78bd3 100644 --- a/lib/Optimizer/Transforms/LoopNormalizePatterns.inc +++ b/lib/Optimizer/Transforms/LoopNormalizePatterns.inc @@ -1,4 +1,4 @@ -/******************************************************************************* +/****************************************************************-*- C++ -*-**** * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * @@ -13,8 +13,8 @@ // identifiers from that namespace. // Return true if \p loop is not monotonic or it is an invariant loop. -// Normalization is to be done on any loop that is monotonic and not -// invariant (which includes loops that are already in counted form). +// Normalization is to be done on any loop that is monotonic and not invariant +// (which includes loops that are already in counted form). static bool isNotMonotonicOrInvariant(cudaq::cc::LoopOp loop, bool allowClosedInterval, bool allowEarlyExit) { @@ -32,7 +32,8 @@ public: LogicalResult matchAndRewrite(cudaq::cc::LoopOp loop, PatternRewriter &rewriter) const override { - if (loop->hasAttr(cudaq::opt::NormalizedLoopAttr)) + if (loop->hasAttr(cudaq::opt::NormalizedLoopAttr) || + loop->hasAttr(cudaq::opt::DeadLoopAttr)) return failure(); if (isNotMonotonicOrInvariant(loop, allowClosedInterval, allowEarlyExit)) return failure(); @@ -42,6 +43,19 @@ public: auto componentsOpt = cudaq::opt::getLoopComponents(loop); assert(componentsOpt && "loop must have components"); auto c = *componentsOpt; + if (c.hasAlwaysTrueCondition()) { + loop->emitWarning("Loop condition is always true. This loop is not " + "supported in a kernel."); + return failure(); + } + + if (c.hasAlwaysFalseCondition()) { + rewriter.startRootUpdate(loop); + rewriter.replaceOpWithNewOp(c.compareOp, 0, 1); + loop->setAttr(cudaq::opt::DeadLoopAttr, rewriter.getUnitAttr()); + rewriter.finalizeRootUpdate(loop); + return success(); + } auto loc = loop.getLoc(); // 1) Set initial value to 0. @@ -104,11 +118,13 @@ public: Value diff = rewriter.create(loc, upper, lower); Value disp = rewriter.create(loc, diff, step); auto cmpOp = cast(c.compareOp); - Value up1 = rewriter.create(loc, disp, step); - Value noLoopCond = rewriter.create( - loc, arith::CmpIPredicate::sgt, up1, zero); - Value newUpper = - rewriter.create(loc, ty, noLoopCond, up1, zero); + Value newUpper = rewriter.create(loc, disp, step); + if (cudaq::opt::isSignedPredicate(cmpOp.getPredicate())) { + Value noLoopCond = rewriter.create( + loc, arith::CmpIPredicate::sgt, newUpper, zero); + newUpper = + rewriter.create(loc, ty, noLoopCond, newUpper, zero); + } // 3) Rewrite the comparison (!=) and step operations (+1). Value v1 = c.getCompareInduction(); diff --git a/lib/Optimizer/Transforms/LoopUnroll.cpp b/lib/Optimizer/Transforms/LoopUnroll.cpp index abfdb9cfa4..b9ba7f137c 100644 --- a/lib/Optimizer/Transforms/LoopUnroll.cpp +++ b/lib/Optimizer/Transforms/LoopUnroll.cpp @@ -70,7 +70,10 @@ class LoopUnrollPass : public cudaq::opt::impl::LoopUnrollBase { static unsigned countLoopOps(Operation *op) { unsigned result = 0; - op->walk([&](cudaq::cc::LoopOp loop) { result++; }); + op->walk([&](cudaq::cc::LoopOp loop) { + if (!loop->hasAttr(cudaq::opt::DeadLoopAttr)) + result++; + }); LLVM_DEBUG(llvm::dbgs() << "Total number of loops: " << result << '\n'); return result; } diff --git a/lib/Optimizer/Transforms/LoopUnrollPatterns.inc b/lib/Optimizer/Transforms/LoopUnrollPatterns.inc index 1a299b9ca1..0db404d050 100644 --- a/lib/Optimizer/Transforms/LoopUnrollPatterns.inc +++ b/lib/Optimizer/Transforms/LoopUnrollPatterns.inc @@ -1,4 +1,4 @@ -/******************************************************************************* +/****************************************************************-*- C++ -*-**** * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * @@ -20,17 +20,24 @@ static std::size_t unrollLoopByValue(cudaq::cc::LoopOp loop, const cudaq::opt::LoopComponents &components) { auto c = components.compareValue.getDefiningOp(); - return cast(c.getValue()).getInt(); -} - -static std::size_t unrollLoopByValue(cudaq::cc::LoopOp loop) { - auto components = cudaq::opt::getLoopComponents(loop); - return unrollLoopByValue(loop, *components); + if (loop->hasAttr(cudaq::opt::NormalizedLoopAttr)) + return cast(c.getValue()).getInt(); + if (components.hasAlwaysFalseCondition()) + return 0; + auto resultOpt = components.getIterationsConstant(); + assert(resultOpt.has_value() && "must be counted loop"); + return *resultOpt; } static bool exceedsThresholdValue(cudaq::cc::LoopOp loop, std::size_t threshold) { - auto upperBound = unrollLoopByValue(loop); + auto components = cudaq::opt::getLoopComponents(loop); + if (components->hasAlwaysTrueCondition()) { + loop->emitWarning("Loop condition is always true. This loop is not " + "supported in a kernel."); + return true; + } + auto upperBound = unrollLoopByValue(loop, *components); return upperBound >= threshold; } @@ -58,6 +65,8 @@ struct UnrollCountedLoop : public OpRewritePattern { // requires that all LoopOp operations be rewritten. Despite the setting of // this flag, it may not be possible to fully unroll every LoopOp anyway. // Check for cases that are clearly not going to be unrolled. + if (loop->hasAttr(cudaq::opt::DeadLoopAttr)) + return failure(); if (!allowBreak && !cudaq::opt::isaCountedLoop(loop)) { if (signalFailure) loop.emitOpError("not a simple counted loop"); @@ -82,8 +91,6 @@ struct UnrollCountedLoop : public OpRewritePattern { auto components = cudaq::opt::getLoopComponents(loop); assert(components && "counted loop must have components"); auto unrollBy = unrollLoopByValue(loop, *components); - if (components->isClosedIntervalForm()) - ++unrollBy; Type inductionTy = loop.getOperands()[components->induction].getType(); LLVM_DEBUG(llvm::dbgs() << "unrolling loop by " << unrollBy << " iterations\n"); diff --git a/lib/Optimizer/Transforms/LowerToCFGPatterns.inc b/lib/Optimizer/Transforms/LowerToCFGPatterns.inc index c094741aeb..a449d6df69 100644 --- a/lib/Optimizer/Transforms/LowerToCFGPatterns.inc +++ b/lib/Optimizer/Transforms/LowerToCFGPatterns.inc @@ -1,4 +1,4 @@ -/******************************************************************************* +/****************************************************************-*- C++ -*-**** * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * diff --git a/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc b/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc index 642a7a7663..f6fbf30d3f 100644 --- a/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc +++ b/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc @@ -1,4 +1,4 @@ -/******************************************************************************* +/****************************************************************-*- C++ -*-**** * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * diff --git a/targettests/execution/uccsd.cpp b/targettests/execution/uccsd.cpp index efc076abad..1011e2191e 100644 --- a/targettests/execution/uccsd.cpp +++ b/targettests/execution/uccsd.cpp @@ -8,7 +8,7 @@ // clang-format off // RUN: nvq++ %cpp_std --target anyon --emulate %s -o %t && %t | FileCheck %s -// RUN: nvq++ %cpp_std --target braket --emulate %s -o %t && %t | FileCheck %s +// XUN: if %braket_avail; then nvq++ %cpp_std --target braket --emulate %s -o %t && %t | FileCheck %s ; fi // RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s // RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s diff --git a/test/AST-Quake/infinite_loop.cpp b/test/AST-Quake/infinite_loop.cpp new file mode 100644 index 0000000000..10c803a78d --- /dev/null +++ b/test/AST-Quake/infinite_loop.cpp @@ -0,0 +1,63 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// RUN: cudaq-quake %cpp_std %s | cudaq-opt --memtoreg=quantum=0 --canonicalize --cc-loop-normalize |& FileCheck %s + +#include + +// Counted loop structure when condition is always true + +__qpu__ int t1() { + cudaq::qubit q; + for (std::uint32_t u = 1; u <= 0xffffffff; u++) + x(q); + return 0; +} + +__qpu__ int t2() { + cudaq::qubit q; + for (std::int32_t u = 1; u <= 0x7fffffff; u++) + x(q); + return 0; +} + +__qpu__ int t3() { + cudaq::qubit q; + for (std::uint64_t u = 5; u <= 0xffffffffffffffff; u++) + x(q); + return 0; +} + +__qpu__ int t4() { + cudaq::qubit q; + for (std::int64_t u = 16; u <= 0x7fffffffffffffff; u++) + x(q); + return 0; +} + +__qpu__ int t5() { + cudaq::qubit q; + for (std::uint64_t u = -14; u >= 0; u--) + x(q); + return 0; +} + +__qpu__ int t6() { + cudaq::qubit q; + std::int64_t cmp = 0x8000000000000000; + for (std::int64_t u = 83; u >= cmp; u++) + x(q); + return 0; +} + +// CHECK: Loop condition is always true. This loop is not supported in a kernel. +// CHECK: Loop condition is always true. This loop is not supported in a kernel. +// CHECK: Loop condition is always true. This loop is not supported in a kernel. +// CHECK: Loop condition is always true. This loop is not supported in a kernel. +// CHECK: Loop condition is always true. This loop is not supported in a kernel. +// CHECK: Loop condition is always true. This loop is not supported in a kernel. diff --git a/test/AST-Quake/loop_normal.cpp b/test/AST-Quake/loop_normal.cpp index a4bf4bfad5..62ff0aa75b 100644 --- a/test/AST-Quake/loop_normal.cpp +++ b/test/AST-Quake/loop_normal.cpp @@ -402,3 +402,278 @@ __qpu__ void linear_expr6() { // CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_1]] : i32 // CHECK: cc.continue %[[VAL_15]] : i32 // CHECK: } {normalized} + +// In cases where the number of iterations is invalid, we should normalize to +// a count of 0. + +__qpu__ void non_iterating_loop2() { + cudaq::qvector q(100); + for (std::int64_t i = 1; i < -1; i++) + x(q[i]); +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_non_iterating_loop2._Z19non_iterating_loop2v() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK-DAG: %[[VAL_2:.*]] = quake.alloca !quake.veq<100> +// CHECK: %[[VAL_3:.*]] = cc.loop while ((%[[VAL_4:.*]] = %[[VAL_0]]) -> (i64)) { +// CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_4]], %[[VAL_0]] : i64 +// CHECK: cc.condition %[[VAL_5]](%[[VAL_4]] : i64) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_6:.*]]: i64): +// CHECK: %[[VAL_7:.*]] = arith.addi %[[VAL_6]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_8:.*]] = quake.extract_ref %[[VAL_2]]{{\[}}%[[VAL_7]]] : (!quake.veq<100>, i64) -> !quake.ref +// CHECK: quake.x %[[VAL_8]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_6]] : i64 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_9:.*]]: i64): +// CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_1]] : i64 +// CHECK: cc.continue %[[VAL_10]] : i64 +// CHECK: } {normalized} +// CHECK: return +// CHECK: } + +__qpu__ int f2a() { + cudaq::qubit q; + for (int u = 1; u < 0; u++) + x(q); + return 0; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_f2a._Z3f2av() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = quake.alloca !quake.ref +// CHECK: %[[VAL_3:.*]] = cc.loop while ((%[[VAL_4:.*]] = %[[VAL_0]]) -> (i32)) { +// CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_4]], %[[VAL_0]] : i32 +// CHECK: cc.condition %[[VAL_5]](%[[VAL_4]] : i32) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_6:.*]]: i32): +// CHECK: quake.x %[[VAL_2]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_6]] : i32 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_7:.*]]: i32): +// CHECK: %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_1]] : i32 +// CHECK: cc.continue %[[VAL_8]] : i32 +// CHECK: } {normalized} +// CHECK: return %[[VAL_0]] : i32 +// CHECK: } + +__qpu__ int f2b() { + cudaq::qubit q; + for (int u = 10; u < 0; u++) + x(q); + return 0; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_f2b._Z3f2bv() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 1 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = quake.alloca !quake.ref +// CHECK: %[[VAL_3:.*]] = cc.loop while ((%[[VAL_4:.*]] = %[[VAL_0]]) -> (i32)) { +// CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_4]], %[[VAL_0]] : i32 +// CHECK: cc.condition %[[VAL_5]](%[[VAL_4]] : i32) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_6:.*]]: i32): +// CHECK: quake.x %[[VAL_2]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_6]] : i32 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_7:.*]]: i32): +// CHECK: %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_1]] : i32 +// CHECK: cc.continue %[[VAL_8]] : i32 +// CHECK: } {normalized} +// CHECK: return %[[VAL_0]] : i32 +// CHECK: } + +__qpu__ int f4() { + cudaq::qubit q; + for (std::int64_t u = 6; u < 0; u++) + x(q); + return 0; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_f4._Z2f4v() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK-DAG: %[[VAL_0:.*]] = arith.constant 0 : i64 +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[VAL_3:.*]] = quake.alloca !quake.ref +// CHECK: %[[VAL_4:.*]] = cc.loop while ((%[[VAL_5:.*]] = %[[VAL_0]]) -> (i64)) { +// CHECK: %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_0]] : i64 +// CHECK: cc.condition %[[VAL_6]](%[[VAL_5]] : i64) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_7:.*]]: i64): +// CHECK: quake.x %[[VAL_3]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_7]] : i64 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_8:.*]]: i64): +// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_1]] : i64 +// CHECK: cc.continue %[[VAL_9]] : i64 +// CHECK: } {normalized} +// CHECK: return %[[VAL_2]] : i32 +// CHECK: } + +__qpu__ int m1(unsigned z) { + cudaq::qubit q; + for (unsigned u = 1; u < z; u++) + x(q); + return 0; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_m1._Z2m1j( +// CHECK-SAME: %[[VAL_0:.*]]: i32) -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_3:.*]] = quake.alloca !quake.ref +// CHECK: %[[VAL_4:.*]] = arith.subi %[[VAL_0]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_7:.*]] = cc.loop while ((%[[VAL_8:.*]] = %[[VAL_1]]) -> (i32)) { +// CHECK: %[[VAL_9:.*]] = arith.cmpi ne, %[[VAL_8]], %[[VAL_4]] : i32 +// CHECK: cc.condition %[[VAL_9]](%[[VAL_8]] : i32) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_10:.*]]: i32): +// CHECK: quake.x %[[VAL_3]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_10]] : i32 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_11:.*]]: i32): +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_2]] : i32 +// CHECK: cc.continue %[[VAL_12]] : i32 +// CHECK: } {normalized} +// CHECK: return %[[VAL_1]] : i32 +// CHECK: } + +__qpu__ int m2(int z) { + cudaq::qubit q; + for (int u = 1; u < z; u++) + x(q); + return 0; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_m2._Z2m2i( +// CHECK-SAME: %[[VAL_0:.*]]: i32) -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK-DAG: %[[VAL_1:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_3:.*]] = quake.alloca !quake.ref +// CHECK: %[[VAL_4:.*]] = arith.subi %[[VAL_0]], %[[VAL_2]] : i32 +// CHECK: %[[VAL_5:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_1]] : i32 +// CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_5]], %[[VAL_4]], %[[VAL_1]] : i32 +// CHECK: %[[VAL_7:.*]] = cc.loop while ((%[[VAL_8:.*]] = %[[VAL_1]]) -> (i32)) { +// CHECK: %[[VAL_9:.*]] = arith.cmpi ne, %[[VAL_8]], %[[VAL_6]] : i32 +// CHECK: cc.condition %[[VAL_9]](%[[VAL_8]] : i32) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_10:.*]]: i32): +// CHECK: quake.x %[[VAL_3]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_10]] : i32 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_11:.*]]: i32): +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_2]] : i32 +// CHECK: cc.continue %[[VAL_12]] : i32 +// CHECK: } {normalized} +// CHECK: return %[[VAL_1]] : i32 +// CHECK: } + +// Dead loops: no unsigned value will ever be less than 0, so these loops will +// never execute. Make sure they are marked "dead" by the normalizer. + +__qpu__ void non_iterating_loop1() { + cudaq::qvector q(100); + for (std::uint64_t i = 1; i < 0; i++) + x(q[i]); +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_non_iterating_loop1._Z19non_iterating_loop1v() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant false +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<100> +// CHECK: %[[VAL_3:.*]] = cc.loop while ((%[[VAL_4:.*]] = %[[VAL_1]]) -> (i64)) { +// CHECK: cc.condition %[[VAL_0]](%[[VAL_4]] : i64) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_5:.*]]: i64): +// CHECK: %[[VAL_6:.*]] = quake.extract_ref %[[VAL_2]]{{\[}}%[[VAL_5]]] : (!quake.veq<100>, i64) -> !quake.ref +// CHECK: quake.x %[[VAL_6]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_5]] : i64 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_7:.*]]: i64): +// CHECK: %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_1]] : i64 +// CHECK: cc.continue %[[VAL_8]] : i64 +// CHECK: } {dead} +// CHECK: return +// CHECK: } + +__qpu__ int f1a() { + cudaq::qubit q; + for (unsigned u = 1; u < 0; u++) + x(q); + return 0; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_f1a._Z3f1av() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant false +// CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_3:.*]] = quake.alloca !quake.ref +// CHECK: %[[VAL_4:.*]] = cc.loop while ((%[[VAL_5:.*]] = %[[VAL_2]]) -> (i32)) { +// CHECK: cc.condition %[[VAL_0]](%[[VAL_5]] : i32) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_6:.*]]: i32): +// CHECK: quake.x %[[VAL_3]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_6]] : i32 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_7:.*]]: i32): +// CHECK: %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] : i32 +// CHECK: cc.continue %[[VAL_8]] : i32 +// CHECK: } {dead} +// CHECK: return %[[VAL_1]] : i32 +// CHECK: } + +__qpu__ int f1b() { + cudaq::qubit q; + for (unsigned u = 10; u < 0; u++) + x(q); + return 0; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_f1b._Z3f1bv() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant false +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_3:.*]] = arith.constant 10 : i32 +// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.ref +// CHECK: %[[VAL_5:.*]] = cc.loop while ((%[[VAL_6:.*]] = %[[VAL_3]]) -> (i32)) { +// CHECK: cc.condition %[[VAL_0]](%[[VAL_6]] : i32) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_7:.*]]: i32): +// CHECK: quake.x %[[VAL_4]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_7]] : i32 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_8:.*]]: i32): +// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_1]] : i32 +// CHECK: cc.continue %[[VAL_9]] : i32 +// CHECK: } {dead} +// CHECK: return %[[VAL_2]] : i32 +// CHECK: } + +__qpu__ int f3() { + cudaq::qubit q; + for (std::uint64_t u = 22; u < 0; u++) + x(q); + return 0; +} + +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_f3._Z2f3v() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK: %[[VAL_0:.*]] = arith.constant false +// CHECK: %[[VAL_1:.*]] = arith.constant 22 : i64 +// CHECK: %[[VAL_2:.*]] = arith.constant 1 : i64 +// CHECK: %[[VAL_3:.*]] = arith.constant 0 : i32 +// CHECK: %[[VAL_4:.*]] = quake.alloca !quake.ref +// CHECK: %[[VAL_5:.*]] = cc.loop while ((%[[VAL_6:.*]] = %[[VAL_1]]) -> (i64)) { +// CHECK: cc.condition %[[VAL_0]](%[[VAL_6]] : i64) +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_7:.*]]: i64): +// CHECK: quake.x %[[VAL_4]] : (!quake.ref) -> () +// CHECK: cc.continue %[[VAL_7]] : i64 +// CHECK: } step { +// CHECK: ^bb0(%[[VAL_8:.*]]: i64): +// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : i64 +// CHECK: cc.continue %[[VAL_9]] : i64 +// CHECK: } {dead} +// CHECK: return %[[VAL_3]] : i32 +// CHECK: } diff --git a/test/Quake/loop_normalize.qke b/test/Quake/loop_normalize.qke deleted file mode 100644 index f040f87843..0000000000 --- a/test/Quake/loop_normalize.qke +++ /dev/null @@ -1,95 +0,0 @@ -// ========================================================================== // -// Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates. // -// All rights reserved. // -// // -// This source code and the accompanying materials are made available under // -// the terms of the Apache License 2.0 which accompanies this distribution. // -// ========================================================================== // - -// RUN: cudaq-opt -cc-loop-normalize %s | FileCheck %s - -module { - func.func @test_positive_boundaries() { - %c0_i64 = arith.constant 0 : i64 - %c1_i64 = arith.constant 1 : i64 - %0 = quake.alloca !quake.veq<0> - %1 = cc.loop while ((%arg0 = %c1_i64) -> (i64)) { - %2 = arith.cmpi ult, %arg0, %c0_i64 : i64 - cc.condition %2(%arg0 : i64) - } do { - ^bb0(%arg0: i64): - %2 = arith.subi %arg0, %c1_i64 : i64 - %3 = quake.extract_ref %0[%2] : (!quake.veq<0>, i64) -> !quake.ref - quake.x %3 : (!quake.ref) -> () - cc.continue %arg0 : i64 - } step { - ^bb0(%arg0: i64): - %2 = arith.addi %arg0, %c1_i64 : i64 - cc.continue %2 : i64 - } - return - } - -// CHECK-LABEL: func.func @test_positive_boundaries() { -// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i64 -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i64 -// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<0> -// CHECK: %[[VAL_3:.*]] = cc.loop while ((%arg0 = %[[VAL_0]]) -> (i64)) { -// CHECK: %[[VAL_4:.*]] = arith.cmpi ne, %arg0, %[[VAL_0]] : i64 -// CHECK: cc.condition %[[VAL_4]](%arg0 : i64) -// CHECK: } do { -// CHECK: ^bb0(%arg0: i64): -// CHECK: %[[VAL_4:.*]] = quake.extract_ref %[[VAL_2]][%arg0] : (!quake.veq<0>, i64) -> !quake.ref -// CHECK: quake.x %[[VAL_4]] : (!quake.ref) -> () -// CHECK: cc.continue %arg0 : i64 -// CHECK: } step { -// CHECK: ^bb0(%arg0: i64): -// CHECK: %[[VAL_4:.*]] = arith.addi %arg0, %[[VAL_1]] : i64 -// CHECK: cc.continue %[[VAL_4]] : i64 -// CHECK: } {normalized} -// CHECK: return -// CHECK: } - - func.func @test_negative_boundaries() { - %c-1_i32 = arith.constant -1 : i32 - %c1_i32 = arith.constant 1 : i32 - %c0_i32 = arith.constant 0 : i32 - %0 = quake.alloca !quake.veq<0> - %1 = cc.loop while ((%arg0 = %c0_i32) -> (i32)) { - %2 = arith.cmpi slt, %arg0, %c-1_i32 : i32 - cc.condition %2(%arg0 : i32) - } do { - ^bb0(%arg0: i32): - %2 = cc.cast signed %arg0 : (i32) -> i64 - %3 = quake.extract_ref %0[%2] : (!quake.veq<0>, i64) -> !quake.ref - quake.x %3 : (!quake.ref) -> () - cc.continue %arg0 : i32 - } step { - ^bb0(%arg0: i32): - %2 = arith.addi %arg0, %c1_i32 : i32 - cc.continue %2 : i32 - } - return - } - -// CHECK-LABEL: func.func @test_negative_boundaries() { -// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i32 -// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32 -// CHECK: %[[VAL_2:.*]] = quake.alloca !quake.veq<0> -// CHECK: %[[VAL_3:.*]] = cc.loop while ((%arg0 = %[[VAL_0]]) -> (i32)) { -// CHECK: %[[VAL_4:.*]] = arith.cmpi ne, %arg0, %[[VAL_0]] : i32 -// CHECK: cc.condition %[[VAL_4]](%arg0 : i32) -// CHECK: } do { -// CHECK: ^bb0(%arg0: i32): -// CHECK: %[[VAL_4:.*]] = cc.cast signed %arg0 : (i32) -> i64 -// CHECK: %[[VAL_5:.*]] = quake.extract_ref %[[VAL_2]][%[[VAL_4]]] : (!quake.veq<0>, i64) -> !quake.ref -// CHECK: quake.x %[[VAL_5]] : (!quake.ref) -> () -// CHECK: cc.continue %arg0 : i32 -// CHECK: } step { -// CHECK: ^bb0(%arg0: i32): -// CHECK: %[[VAL_4:.*]] = arith.addi %arg0, %[[VAL_1]] : i32 -// CHECK: cc.continue %[[VAL_4]] : i32 -// CHECK: } {normalized} -// CHECK: return -// CHECK: } -}