diff --git a/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc b/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc
index db020a3aa3..d87995ffb8 100644
--- a/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc
+++ b/lib/Optimizer/Transforms/LiftArrayAllocPatterns.inc
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/****************************************************************-*- C++ -*-****
  * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
  * All rights reserved.                                                        *
  *                                                                             *
diff --git a/lib/Optimizer/Transforms/LoopAnalysis.cpp b/lib/Optimizer/Transforms/LoopAnalysis.cpp
index c40e2b7e30..37ee67489d 100644
--- a/lib/Optimizer/Transforms/LoopAnalysis.cpp
+++ b/lib/Optimizer/Transforms/LoopAnalysis.cpp
@@ -7,6 +7,7 @@
  ******************************************************************************/
 
 #include "LoopAnalysis.h"
+#include "cudaq/Optimizer/Builder/Factory.h"
 #include "mlir/IR/Dominance.h"
 
 using namespace mlir;
@@ -73,14 +74,6 @@ static bool isaConstantOf(Value v, std::int64_t hasVal) {
   return false;
 }
 
-static bool isNegativeConstant(Value v) {
-  v = peelCastOps(v);
-  if (auto c = v.getDefiningOp<arith::ConstantOp>())
-    if (auto ia = dyn_cast<IntegerAttr>(c.getValue()))
-      return ia.getInt() < 0;
-  return false;
-}
-
 static bool isClosedIntervalForm(arith::CmpIPredicate p) {
   return p == arith::CmpIPredicate::ule || p == arith::CmpIPredicate::sle;
 }
@@ -210,6 +203,10 @@ static BlockArgument getLinearExpr(Value expr,
   return scaledIteration(expr);
 }
 
+static unsigned bitWidth(Value val) {
+  return cast<IntegerType>(val.getType()).getWidth();
+}
+
 namespace cudaq {
 
 bool opt::isSemiOpenPredicate(arith::CmpIPredicate p) {
@@ -223,6 +220,11 @@ bool opt::isUnsignedPredicate(arith::CmpIPredicate p) {
          p == arith::CmpIPredicate::ugt || p == arith::CmpIPredicate::uge;
 }
 
+bool opt::isSignedPredicate(arith::CmpIPredicate p) {
+  return p == arith::CmpIPredicate::slt || p == arith::CmpIPredicate::sle ||
+         p == arith::CmpIPredicate::sgt || p == arith::CmpIPredicate::sge;
+}
+
 // We expect the loop control value to have the following form.
 //
 //   %final = cc.loop while ((%iter = %initial) -> (iN)) {
@@ -282,7 +284,7 @@ bool opt::isaMonotonicLoop(Operation *op, bool allowEarlyExit,
 
 bool opt::isaInvariantLoop(const LoopComponents &c, bool allowClosedInterval) {
   if (isaConstantOf(c.initialValue, 0) && isaConstantOf(c.stepValue, 1) &&
-      isa<arith::AddIOp>(c.stepOp) && !isNegativeConstant(c.compareValue)) {
+      isa<arith::AddIOp>(c.stepOp)) {
     auto cmp = cast<arith::CmpIOp>(c.compareOp);
     return validCountedLoopIntervalForm(cmp, allowClosedInterval);
   }
@@ -314,26 +316,296 @@ bool opt::isaConstantUpperBoundLoop(cc::LoopOp loop, bool allowClosedInterval) {
          isaConstant(c.compareValue);
 }
 
-Value opt::LoopComponents::getCompareInduction() {
+Value opt::LoopComponents::getCompareInduction() const {
   auto cmpOp = cast<arith::CmpIOp>(compareOp);
   return cmpOp.getLhs() == compareValue ? cmpOp.getRhs() : cmpOp.getLhs();
 }
 
-bool opt::LoopComponents::stepIsAnAddOp() { return isa<arith::AddIOp>(stepOp); }
+bool opt::LoopComponents::stepIsAnAddOp() const {
+  return isa<arith::AddIOp>(stepOp);
+}
 
-bool opt::LoopComponents::shouldCommuteStepOp() {
+bool opt::LoopComponents::shouldCommuteStepOp() const {
   if (auto addOp = dyn_cast_or_null<arith::AddIOp>(stepOp))
     return addOp.getRhs() == stepRegion->front().getArgument(induction);
   // Note: we don't allow induction on lhs of subtraction.
   return false;
 }
 
-bool opt::LoopComponents::isClosedIntervalForm() {
+bool opt::LoopComponents::isClosedIntervalForm() const {
   auto cmp = cast<arith::CmpIOp>(compareOp);
   return ::isClosedIntervalForm(cmp.getPredicate());
 }
 
-bool opt::LoopComponents::isLinearExpr() { return addendValue || scaleValue; }
+bool opt::LoopComponents::isLinearExpr() const {
+  return addendValue || scaleValue;
+}
+
+std::int64_t opt::LoopComponents::extendValue(unsigned width,
+                                              std::size_t val) const {
+  const bool signExt =
+      isSignedPredicate(cast<arith::CmpIOp>(compareOp).getPredicate());
+  std::int64_t result = val;
+  switch (width) {
+  case 8:
+    if (signExt) {
+      std::int8_t v = val & 0xFF;
+      result = v;
+    } else {
+      std::uint8_t v = val & 0xFF;
+      result = v;
+    }
+    break;
+  case 16:
+    if (signExt) {
+      std::int16_t v = val & 0xFFFF;
+      result = v;
+    } else {
+      std::uint16_t v = val & 0xFFFF;
+      result = v;
+    }
+    break;
+  case 32:
+    if (signExt) {
+      std::int32_t v = val & 0xFFFFFFFF;
+      result = v;
+    } else {
+      std::uint32_t v = val & 0xFFFFFFFF;
+      result = v;
+    }
+    break;
+  default:
+    break;
+  }
+  return result;
+}
+
+bool opt::LoopComponents::hasAlwaysTrueCondition() const {
+  auto cmpValOpt = factory::maybeValueOfIntConstant(compareValue);
+  if (!cmpValOpt)
+    return false;
+  auto width = bitWidth(compareValue);
+  std::int64_t cmpVal = *cmpValOpt;
+  auto pred = cast<arith::CmpIOp>(compareOp).getPredicate();
+  switch (width) {
+  case 8: {
+    switch (pred) {
+    case arith::CmpIPredicate::sge:
+      return static_cast<std::int8_t>(cmpVal) ==
+             std::numeric_limits<std::int8_t>::min();
+    case arith::CmpIPredicate::sle:
+      return static_cast<std::int8_t>(cmpVal) ==
+             std::numeric_limits<std::int8_t>::max();
+    case arith::CmpIPredicate::uge:
+      return static_cast<std::uint8_t>(cmpVal) ==
+             std::numeric_limits<std::uint8_t>::min();
+    case arith::CmpIPredicate::ule:
+      return static_cast<std::uint8_t>(cmpVal) ==
+             std::numeric_limits<std::uint8_t>::max();
+    default:
+      break;
+    }
+  } break;
+  case 16: {
+    switch (pred) {
+    case arith::CmpIPredicate::sge:
+      return static_cast<std::int16_t>(cmpVal) ==
+             std::numeric_limits<std::int16_t>::min();
+    case arith::CmpIPredicate::sle:
+      return static_cast<std::int16_t>(cmpVal) ==
+             std::numeric_limits<std::int16_t>::max();
+    case arith::CmpIPredicate::uge:
+      return static_cast<std::uint16_t>(cmpVal) ==
+             std::numeric_limits<std::uint16_t>::min();
+    case arith::CmpIPredicate::ule:
+      return static_cast<std::uint16_t>(cmpVal) ==
+             std::numeric_limits<std::uint16_t>::max();
+    default:
+      break;
+    }
+  } break;
+  case 32: {
+    switch (pred) {
+    case arith::CmpIPredicate::sge:
+      return static_cast<std::int32_t>(cmpVal) ==
+             std::numeric_limits<std::int32_t>::min();
+    case arith::CmpIPredicate::sle:
+      return static_cast<std::int32_t>(cmpVal) ==
+             std::numeric_limits<std::int32_t>::max();
+    case arith::CmpIPredicate::uge:
+      return static_cast<std::uint32_t>(cmpVal) ==
+             std::numeric_limits<std::uint32_t>::min();
+    case arith::CmpIPredicate::ule:
+      return static_cast<std::uint32_t>(cmpVal) ==
+             std::numeric_limits<std::uint32_t>::max();
+    default:
+      break;
+    }
+  } break;
+  case 64: {
+    switch (pred) {
+    case arith::CmpIPredicate::sge:
+      return static_cast<std::int64_t>(cmpVal) ==
+             std::numeric_limits<std::int64_t>::min();
+    case arith::CmpIPredicate::sle:
+      return static_cast<std::int64_t>(cmpVal) ==
+             std::numeric_limits<std::int64_t>::max();
+    case arith::CmpIPredicate::uge:
+      return static_cast<std::uint64_t>(cmpVal) ==
+             std::numeric_limits<std::uint64_t>::min();
+    case arith::CmpIPredicate::ule:
+      return static_cast<std::uint64_t>(cmpVal) ==
+             std::numeric_limits<std::uint64_t>::max();
+    default:
+      break;
+    }
+  } break;
+  default:
+    break;
+  }
+  return false;
+}
+
+bool opt::LoopComponents::hasAlwaysFalseCondition() const {
+  auto cmpValOpt = factory::maybeValueOfIntConstant(compareValue);
+  if (!cmpValOpt)
+    return false;
+  auto width = bitWidth(compareValue);
+  std::int64_t cmpVal = *cmpValOpt;
+  auto pred = cast<arith::CmpIOp>(compareOp).getPredicate();
+  switch (width) {
+  case 8: {
+    switch (pred) {
+    case arith::CmpIPredicate::slt:
+      return static_cast<std::int8_t>(cmpVal) ==
+             std::numeric_limits<std::int8_t>::min();
+    case arith::CmpIPredicate::sgt:
+      return static_cast<std::int8_t>(cmpVal) ==
+             std::numeric_limits<std::int8_t>::max();
+    case arith::CmpIPredicate::ult:
+      return static_cast<std::uint8_t>(cmpVal) ==
+             std::numeric_limits<std::uint8_t>::min();
+    case arith::CmpIPredicate::ugt:
+      return static_cast<std::uint8_t>(cmpVal) ==
+             std::numeric_limits<std::uint8_t>::max();
+    default:
+      break;
+    }
+  } break;
+  case 16: {
+    switch (pred) {
+    case arith::CmpIPredicate::slt:
+      return static_cast<std::int16_t>(cmpVal) ==
+             std::numeric_limits<std::int16_t>::min();
+    case arith::CmpIPredicate::sgt:
+      return static_cast<std::int16_t>(cmpVal) ==
+             std::numeric_limits<std::int16_t>::max();
+    case arith::CmpIPredicate::ult:
+      return static_cast<std::uint16_t>(cmpVal) ==
+             std::numeric_limits<std::uint16_t>::min();
+    case arith::CmpIPredicate::ugt:
+      return static_cast<std::uint16_t>(cmpVal) ==
+             std::numeric_limits<std::uint16_t>::max();
+    default:
+      break;
+    }
+  } break;
+  case 32: {
+    switch (pred) {
+    case arith::CmpIPredicate::slt:
+      return static_cast<std::int32_t>(cmpVal) ==
+             std::numeric_limits<std::int32_t>::min();
+    case arith::CmpIPredicate::sgt:
+      return static_cast<std::int32_t>(cmpVal) ==
+             std::numeric_limits<std::int32_t>::max();
+    case arith::CmpIPredicate::ult:
+      return static_cast<std::uint32_t>(cmpVal) ==
+             std::numeric_limits<std::uint32_t>::min();
+    case arith::CmpIPredicate::ugt:
+      return static_cast<std::uint32_t>(cmpVal) ==
+             std::numeric_limits<std::uint32_t>::max();
+    default:
+      break;
+    }
+  } break;
+  case 64: {
+    switch (pred) {
+    case arith::CmpIPredicate::slt:
+      return static_cast<std::int64_t>(cmpVal) ==
+             std::numeric_limits<std::int64_t>::min();
+    case arith::CmpIPredicate::sgt:
+      return static_cast<std::int64_t>(cmpVal) ==
+             std::numeric_limits<std::int64_t>::max();
+    case arith::CmpIPredicate::ult:
+      return static_cast<std::uint64_t>(cmpVal) ==
+             std::numeric_limits<std::uint64_t>::min();
+    case arith::CmpIPredicate::ugt:
+      return static_cast<std::uint64_t>(cmpVal) ==
+             std::numeric_limits<std::uint64_t>::max();
+    default:
+      break;
+    }
+  } break;
+  default:
+    break;
+  }
+  return false;
+}
+
+std::optional<std::size_t> opt::LoopComponents::getIterationsConstant() const {
+  auto initValOpt = factory::maybeValueOfIntConstant(initialValue);
+  if (!initValOpt)
+    return std::nullopt;
+  std::int64_t initVal = extendValue(bitWidth(initialValue), *initValOpt);
+  auto endValOpt = factory::maybeValueOfIntConstant(compareValue);
+  if (!endValOpt)
+    return std::nullopt;
+  std::int64_t endVal = extendValue(bitWidth(compareValue), *endValOpt);
+  auto stepValOpt = factory::maybeValueOfIntConstant(stepValue);
+  if (!stepValOpt)
+    return std::nullopt;
+  std::int64_t stepVal = extendValue(bitWidth(stepValue), *stepValOpt);
+  if (!stepIsAnAddOp())
+    stepVal = -stepVal;
+  if (isLinearExpr()) {
+    if (addendValue) {
+      auto addendOpt = factory::maybeValueOfIntConstant(addendValue);
+      if (!addendOpt)
+        return std::nullopt;
+      std::int64_t addend = extendValue(bitWidth(addendValue), *addendOpt);
+      if (negatedAddend)
+        endVal += addend;
+      else
+        endVal -= addend;
+    }
+    if (minusOneMult) {
+      initVal = -initVal;
+      stepVal = -stepVal;
+    }
+    if (scaleValue) {
+      auto scaleValOpt = factory::maybeValueOfIntConstant(scaleValue);
+      if (!scaleValOpt)
+        return std::nullopt;
+      std::int64_t scaleVal = extendValue(bitWidth(scaleValue), *scaleValOpt);
+      if (reciprocalScale) {
+        endVal *= scaleVal;
+      } else {
+        endVal *= scaleVal;
+        stepVal *= scaleVal;
+      }
+    }
+  }
+  if (!isClosedIntervalForm()) {
+    if (stepVal < 0)
+      endVal += 1;
+    else
+      endVal -= 1;
+  }
+  std::int64_t result = (endVal - initVal + stepVal) / stepVal;
+  if (result < 0)
+    result = 0;
+  return {result};
+}
 
 template <typename T>
 constexpr int computeArgsOffset() {
@@ -350,7 +622,9 @@ std::optional<opt::LoopComponents> opt::getLoopComponents(cc::LoopOp loop) {
   auto &whileEntry = whileRegion.front();
   auto condOp = cast<cc::ConditionOp>(whileRegion.back().back());
   result.compareOp = condOp.getCondition().getDefiningOp();
-  auto cmpOp = cast<arith::CmpIOp>(result.compareOp);
+  auto cmpOp = dyn_cast<arith::CmpIOp>(result.compareOp);
+  if (!cmpOp)
+    return {};
 
   auto argumentToCompare = [&](unsigned idx) -> bool {
     return (getLinearExpr(cmpOp.getLhs(), result, loop) ==
diff --git a/lib/Optimizer/Transforms/LoopAnalysis.h b/lib/Optimizer/Transforms/LoopAnalysis.h
index 1d2f6181f0..12f7310655 100644
--- a/lib/Optimizer/Transforms/LoopAnalysis.h
+++ b/lib/Optimizer/Transforms/LoopAnalysis.h
@@ -14,17 +14,29 @@ namespace cudaq::opt {
 
 // Loops that are transformed into normal form have this attribute.
 static constexpr char NormalizedLoopAttr[] = "normalized";
+static constexpr char DeadLoopAttr[] = "dead";
 
 struct LoopComponents {
   LoopComponents() = default;
 
   // Get the induction expression of the comparison.
-  mlir::Value getCompareInduction();
+  mlir::Value getCompareInduction() const;
 
-  bool stepIsAnAddOp();
-  bool shouldCommuteStepOp();
-  bool isClosedIntervalForm();
-  bool isLinearExpr();
+  bool stepIsAnAddOp() const;
+  bool shouldCommuteStepOp() const;
+  bool isClosedIntervalForm() const;
+  bool isLinearExpr() const;
+  std::optional<std::size_t> getIterationsConstant() const;
+
+  // Determine if the condition is always true. e.g., `x uge 0`.
+  bool hasAlwaysTrueCondition() const;
+  // Determine if the condition is always false. e.g., `x ult 0`.
+  bool hasAlwaysFalseCondition() const;
+  bool hasInvariantCondition() const {
+    return hasAlwaysTrueCondition() || hasAlwaysFalseCondition();
+  }
+
+  std::int64_t extendValue(unsigned width, std::size_t val) const;
 
   unsigned induction = 0;
   mlir::Value initialValue;
@@ -50,6 +62,7 @@ struct LoopComponents {
 /// Does boundary test defines a semi-open interval?
 bool isSemiOpenPredicate(mlir::arith::CmpIPredicate p);
 bool isUnsignedPredicate(mlir::arith::CmpIPredicate p);
+bool isSignedPredicate(mlir::arith::CmpIPredicate p);
 
 /// A counted loop is defined to be a loop that will execute some compile-time
 /// constant number of iterations. We recognize a normalized, semi-open interval
diff --git a/lib/Optimizer/Transforms/LoopNormalizePatterns.inc b/lib/Optimizer/Transforms/LoopNormalizePatterns.inc
index 3191d877a4..a147c78bd3 100644
--- a/lib/Optimizer/Transforms/LoopNormalizePatterns.inc
+++ b/lib/Optimizer/Transforms/LoopNormalizePatterns.inc
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/****************************************************************-*- C++ -*-****
  * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
  * All rights reserved.                                                        *
  *                                                                             *
@@ -13,8 +13,8 @@
 // identifiers from that namespace.
 
 // Return true if \p loop is not monotonic or it is an invariant loop.
-// Normalization is to be done on any loop that is monotonic and not
-// invariant (which includes loops that are already in counted form).
+// Normalization is to be done on any loop that is monotonic and not invariant
+// (which includes loops that are already in counted form).
 static bool isNotMonotonicOrInvariant(cudaq::cc::LoopOp loop,
                                       bool allowClosedInterval,
                                       bool allowEarlyExit) {
@@ -32,7 +32,8 @@ public:
 
   LogicalResult matchAndRewrite(cudaq::cc::LoopOp loop,
                                 PatternRewriter &rewriter) const override {
-    if (loop->hasAttr(cudaq::opt::NormalizedLoopAttr))
+    if (loop->hasAttr(cudaq::opt::NormalizedLoopAttr) ||
+        loop->hasAttr(cudaq::opt::DeadLoopAttr))
       return failure();
     if (isNotMonotonicOrInvariant(loop, allowClosedInterval, allowEarlyExit))
       return failure();
@@ -42,6 +43,19 @@ public:
     auto componentsOpt = cudaq::opt::getLoopComponents(loop);
     assert(componentsOpt && "loop must have components");
     auto c = *componentsOpt;
+    if (c.hasAlwaysTrueCondition()) {
+      loop->emitWarning("Loop condition is always true. This loop is not "
+                        "supported in a kernel.");
+      return failure();
+    }
+
+    if (c.hasAlwaysFalseCondition()) {
+      rewriter.startRootUpdate(loop);
+      rewriter.replaceOpWithNewOp<arith::ConstantIntOp>(c.compareOp, 0, 1);
+      loop->setAttr(cudaq::opt::DeadLoopAttr, rewriter.getUnitAttr());
+      rewriter.finalizeRootUpdate(loop);
+      return success();
+    }
     auto loc = loop.getLoc();
 
     // 1) Set initial value to 0.
@@ -104,11 +118,13 @@ public:
     Value diff = rewriter.create<arith::SubIOp>(loc, upper, lower);
     Value disp = rewriter.create<arith::AddIOp>(loc, diff, step);
     auto cmpOp = cast<arith::CmpIOp>(c.compareOp);
-    Value up1 = rewriter.create<arith::DivSIOp>(loc, disp, step);
-    Value noLoopCond = rewriter.create<arith::CmpIOp>(
-        loc, arith::CmpIPredicate::sgt, up1, zero);
-    Value newUpper =
-        rewriter.create<arith::SelectOp>(loc, ty, noLoopCond, up1, zero);
+    Value newUpper = rewriter.create<arith::DivSIOp>(loc, disp, step);
+    if (cudaq::opt::isSignedPredicate(cmpOp.getPredicate())) {
+      Value noLoopCond = rewriter.create<arith::CmpIOp>(
+          loc, arith::CmpIPredicate::sgt, newUpper, zero);
+      newUpper =
+          rewriter.create<arith::SelectOp>(loc, ty, noLoopCond, newUpper, zero);
+    }
 
     // 3) Rewrite the comparison (!=) and step operations (+1).
     Value v1 = c.getCompareInduction();
diff --git a/lib/Optimizer/Transforms/LoopUnroll.cpp b/lib/Optimizer/Transforms/LoopUnroll.cpp
index abfdb9cfa4..b9ba7f137c 100644
--- a/lib/Optimizer/Transforms/LoopUnroll.cpp
+++ b/lib/Optimizer/Transforms/LoopUnroll.cpp
@@ -70,7 +70,10 @@ class LoopUnrollPass : public cudaq::opt::impl::LoopUnrollBase<LoopUnrollPass> {
 
   static unsigned countLoopOps(Operation *op) {
     unsigned result = 0;
-    op->walk([&](cudaq::cc::LoopOp loop) { result++; });
+    op->walk([&](cudaq::cc::LoopOp loop) {
+      if (!loop->hasAttr(cudaq::opt::DeadLoopAttr))
+        result++;
+    });
     LLVM_DEBUG(llvm::dbgs() << "Total number of loops: " << result << '\n');
     return result;
   }
diff --git a/lib/Optimizer/Transforms/LoopUnrollPatterns.inc b/lib/Optimizer/Transforms/LoopUnrollPatterns.inc
index 1a299b9ca1..0db404d050 100644
--- a/lib/Optimizer/Transforms/LoopUnrollPatterns.inc
+++ b/lib/Optimizer/Transforms/LoopUnrollPatterns.inc
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/****************************************************************-*- C++ -*-****
  * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
  * All rights reserved.                                                        *
  *                                                                             *
@@ -20,17 +20,24 @@ static std::size_t
 unrollLoopByValue(cudaq::cc::LoopOp loop,
                   const cudaq::opt::LoopComponents &components) {
   auto c = components.compareValue.getDefiningOp<arith::ConstantOp>();
-  return cast<IntegerAttr>(c.getValue()).getInt();
-}
-
-static std::size_t unrollLoopByValue(cudaq::cc::LoopOp loop) {
-  auto components = cudaq::opt::getLoopComponents(loop);
-  return unrollLoopByValue(loop, *components);
+  if (loop->hasAttr(cudaq::opt::NormalizedLoopAttr))
+    return cast<IntegerAttr>(c.getValue()).getInt();
+  if (components.hasAlwaysFalseCondition())
+    return 0;
+  auto resultOpt = components.getIterationsConstant();
+  assert(resultOpt.has_value() && "must be counted loop");
+  return *resultOpt;
 }
 
 static bool exceedsThresholdValue(cudaq::cc::LoopOp loop,
                                   std::size_t threshold) {
-  auto upperBound = unrollLoopByValue(loop);
+  auto components = cudaq::opt::getLoopComponents(loop);
+  if (components->hasAlwaysTrueCondition()) {
+    loop->emitWarning("Loop condition is always true. This loop is not "
+                      "supported in a kernel.");
+    return true;
+  }
+  auto upperBound = unrollLoopByValue(loop, *components);
   return upperBound >= threshold;
 }
 
@@ -58,6 +65,8 @@ struct UnrollCountedLoop : public OpRewritePattern<cudaq::cc::LoopOp> {
     // requires that all LoopOp operations be rewritten. Despite the setting of
     // this flag, it may not be possible to fully unroll every LoopOp anyway.
     // Check for cases that are clearly not going to be unrolled.
+    if (loop->hasAttr(cudaq::opt::DeadLoopAttr))
+      return failure();
     if (!allowBreak && !cudaq::opt::isaCountedLoop(loop)) {
       if (signalFailure)
         loop.emitOpError("not a simple counted loop");
@@ -82,8 +91,6 @@ struct UnrollCountedLoop : public OpRewritePattern<cudaq::cc::LoopOp> {
     auto components = cudaq::opt::getLoopComponents(loop);
     assert(components && "counted loop must have components");
     auto unrollBy = unrollLoopByValue(loop, *components);
-    if (components->isClosedIntervalForm())
-      ++unrollBy;
     Type inductionTy = loop.getOperands()[components->induction].getType();
     LLVM_DEBUG(llvm::dbgs()
                << "unrolling loop by " << unrollBy << " iterations\n");
diff --git a/lib/Optimizer/Transforms/LowerToCFGPatterns.inc b/lib/Optimizer/Transforms/LowerToCFGPatterns.inc
index c094741aeb..a449d6df69 100644
--- a/lib/Optimizer/Transforms/LowerToCFGPatterns.inc
+++ b/lib/Optimizer/Transforms/LowerToCFGPatterns.inc
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/****************************************************************-*- C++ -*-****
  * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
  * All rights reserved.                                                        *
  *                                                                             *
diff --git a/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc b/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc
index 642a7a7663..f6fbf30d3f 100644
--- a/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc
+++ b/lib/Optimizer/Transforms/WriteAfterWriteEliminationPatterns.inc
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/****************************************************************-*- C++ -*-****
  * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
  * All rights reserved.                                                        *
  *                                                                             *
diff --git a/targettests/execution/uccsd.cpp b/targettests/execution/uccsd.cpp
index efc076abad..1011e2191e 100644
--- a/targettests/execution/uccsd.cpp
+++ b/targettests/execution/uccsd.cpp
@@ -8,7 +8,7 @@
 
 // clang-format off
 // RUN: nvq++ %cpp_std --target anyon --emulate %s -o %t && %t | FileCheck %s
-// RUN: nvq++ %cpp_std --target braket --emulate %s -o %t && %t | FileCheck %s
+// XUN: if %braket_avail; then nvq++ %cpp_std --target braket --emulate %s -o %t && %t | FileCheck %s ; fi
 // RUN: nvq++ %cpp_std --target ionq --emulate %s -o %t && %t | FileCheck %s
 // RUN: nvq++ %cpp_std --target iqm --iqm-machine Apollo --emulate %s -o %t && %t | FileCheck %s
 // RUN: nvq++ %cpp_std --target oqc --emulate %s -o %t && %t | FileCheck %s
diff --git a/test/AST-Quake/infinite_loop.cpp b/test/AST-Quake/infinite_loop.cpp
new file mode 100644
index 0000000000..10c803a78d
--- /dev/null
+++ b/test/AST-Quake/infinite_loop.cpp
@@ -0,0 +1,63 @@
+/*******************************************************************************
+ * Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                  *
+ * All rights reserved.                                                        *
+ *                                                                             *
+ * This source code and the accompanying materials are made available under    *
+ * the terms of the Apache License 2.0 which accompanies this distribution.    *
+ ******************************************************************************/
+
+// RUN: cudaq-quake %cpp_std %s | cudaq-opt --memtoreg=quantum=0 --canonicalize --cc-loop-normalize |& FileCheck %s
+
+#include <cudaq.h>
+
+// Counted loop structure when condition is always true
+
+__qpu__ int t1() {
+  cudaq::qubit q;
+  for (std::uint32_t u = 1; u <= 0xffffffff; u++)
+    x(q);
+  return 0;
+}
+
+__qpu__ int t2() {
+  cudaq::qubit q;
+  for (std::int32_t u = 1; u <= 0x7fffffff; u++)
+    x(q);
+  return 0;
+}
+
+__qpu__ int t3() {
+  cudaq::qubit q;
+  for (std::uint64_t u = 5; u <= 0xffffffffffffffff; u++)
+    x(q);
+  return 0;
+}
+
+__qpu__ int t4() {
+  cudaq::qubit q;
+  for (std::int64_t u = 16; u <= 0x7fffffffffffffff; u++)
+    x(q);
+  return 0;
+}
+
+__qpu__ int t5() {
+  cudaq::qubit q;
+  for (std::uint64_t u = -14; u >= 0; u--)
+    x(q);
+  return 0;
+}
+
+__qpu__ int t6() {
+  cudaq::qubit q;
+  std::int64_t cmp = 0x8000000000000000;
+  for (std::int64_t u = 83; u >= cmp; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK: Loop condition is always true. This loop is not supported in a kernel.
+// CHECK: Loop condition is always true. This loop is not supported in a kernel.
+// CHECK: Loop condition is always true. This loop is not supported in a kernel.
+// CHECK: Loop condition is always true. This loop is not supported in a kernel.
+// CHECK: Loop condition is always true. This loop is not supported in a kernel.
+// CHECK: Loop condition is always true. This loop is not supported in a kernel.
diff --git a/test/AST-Quake/loop_normal.cpp b/test/AST-Quake/loop_normal.cpp
index a4bf4bfad5..62ff0aa75b 100644
--- a/test/AST-Quake/loop_normal.cpp
+++ b/test/AST-Quake/loop_normal.cpp
@@ -402,3 +402,278 @@ __qpu__ void linear_expr6() {
 // CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[VAL_1]] : i32
 // CHECK:             cc.continue %[[VAL_15]] : i32
 // CHECK:           } {normalized}
+
+// In cases where the number of iterations is invalid, we should normalize to
+// a count of 0.
+
+__qpu__ void non_iterating_loop2() {
+  cudaq::qvector q(100);
+  for (std::int64_t i = 1; i < -1; i++)
+    x(q[i]);   
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_non_iterating_loop2._Z19non_iterating_loop2v() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK-DAG:       %[[VAL_0:.*]] = arith.constant 0 : i64
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 1 : i64
+// CHECK-DAG:       %[[VAL_2:.*]] = quake.alloca !quake.veq<100>
+// CHECK:           %[[VAL_3:.*]] = cc.loop while ((%[[VAL_4:.*]] = %[[VAL_0]]) -> (i64)) {
+// CHECK:             %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_4]], %[[VAL_0]] : i64
+// CHECK:             cc.condition %[[VAL_5]](%[[VAL_4]] : i64)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_6:.*]]: i64):
+// CHECK:             %[[VAL_7:.*]] = arith.addi %[[VAL_6]], %[[VAL_1]] : i64
+// CHECK:             %[[VAL_8:.*]] = quake.extract_ref %[[VAL_2]]{{\[}}%[[VAL_7]]] : (!quake.veq<100>, i64) -> !quake.ref
+// CHECK:             quake.x %[[VAL_8]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_6]] : i64
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: i64):
+// CHECK:             %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[VAL_1]] : i64
+// CHECK:             cc.continue %[[VAL_10]] : i64
+// CHECK:           } {normalized}
+// CHECK:           return
+// CHECK:         }
+
+__qpu__ int f2a() {
+  cudaq::qubit q;
+  for (int u = 1; u < 0; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_f2a._Z3f2av() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK-DAG:       %[[VAL_0:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[VAL_2:.*]] = quake.alloca !quake.ref
+// CHECK:           %[[VAL_3:.*]] = cc.loop while ((%[[VAL_4:.*]] = %[[VAL_0]]) -> (i32)) {
+// CHECK:             %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_4]], %[[VAL_0]] : i32
+// CHECK:             cc.condition %[[VAL_5]](%[[VAL_4]] : i32)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_6:.*]]: i32):
+// CHECK:             quake.x %[[VAL_2]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_6]] : i32
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_7:.*]]: i32):
+// CHECK:             %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_1]] : i32
+// CHECK:             cc.continue %[[VAL_8]] : i32
+// CHECK:           } {normalized}
+// CHECK:           return %[[VAL_0]] : i32
+// CHECK:         }
+
+__qpu__ int f2b() {
+  cudaq::qubit q;
+  for (int u = 10; u < 0; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_f2b._Z3f2bv() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK-DAG:       %[[VAL_0:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[VAL_2:.*]] = quake.alloca !quake.ref
+// CHECK:           %[[VAL_3:.*]] = cc.loop while ((%[[VAL_4:.*]] = %[[VAL_0]]) -> (i32)) {
+// CHECK:             %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_4]], %[[VAL_0]] : i32
+// CHECK:             cc.condition %[[VAL_5]](%[[VAL_4]] : i32)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_6:.*]]: i32):
+// CHECK:             quake.x %[[VAL_2]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_6]] : i32
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_7:.*]]: i32):
+// CHECK:             %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_1]] : i32
+// CHECK:             cc.continue %[[VAL_8]] : i32
+// CHECK:           } {normalized}
+// CHECK:           return %[[VAL_0]] : i32
+// CHECK:         }
+
+__qpu__ int f4() {
+  cudaq::qubit q;
+  for (std::int64_t u = 6; u < 0; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_f4._Z2f4v() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK-DAG:       %[[VAL_0:.*]] = arith.constant 0 : i64
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 1 : i64
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_3:.*]] = quake.alloca !quake.ref
+// CHECK:           %[[VAL_4:.*]] = cc.loop while ((%[[VAL_5:.*]] = %[[VAL_0]]) -> (i64)) {
+// CHECK:             %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_0]] : i64
+// CHECK:             cc.condition %[[VAL_6]](%[[VAL_5]] : i64)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_7:.*]]: i64):
+// CHECK:             quake.x %[[VAL_3]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_7]] : i64
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: i64):
+// CHECK:             %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_1]] : i64
+// CHECK:             cc.continue %[[VAL_9]] : i64
+// CHECK:           } {normalized}
+// CHECK:           return %[[VAL_2]] : i32
+// CHECK:         }
+
+__qpu__ int m1(unsigned z) {
+  cudaq::qubit q;
+  for (unsigned u = 1; u < z; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_m1._Z2m1j(
+// CHECK-SAME:      %[[VAL_0:.*]]: i32) -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_3:.*]] = quake.alloca !quake.ref
+// CHECK:           %[[VAL_4:.*]] = arith.subi %[[VAL_0]], %[[VAL_2]] : i32
+// CHECK:           %[[VAL_7:.*]] = cc.loop while ((%[[VAL_8:.*]] = %[[VAL_1]]) -> (i32)) {
+// CHECK:             %[[VAL_9:.*]] = arith.cmpi ne, %[[VAL_8]], %[[VAL_4]] : i32
+// CHECK:             cc.condition %[[VAL_9]](%[[VAL_8]] : i32)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: i32):
+// CHECK:             quake.x %[[VAL_3]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_10]] : i32
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: i32):
+// CHECK:             %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_2]] : i32
+// CHECK:             cc.continue %[[VAL_12]] : i32
+// CHECK:           } {normalized}
+// CHECK:           return %[[VAL_1]] : i32
+// CHECK:         }
+
+__qpu__ int m2(int z) {
+  cudaq::qubit q;
+  for (int u = 1; u < z; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_m2._Z2m2i(
+// CHECK-SAME:      %[[VAL_0:.*]]: i32) -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_3:.*]] = quake.alloca !quake.ref
+// CHECK:           %[[VAL_4:.*]] = arith.subi %[[VAL_0]], %[[VAL_2]] : i32
+// CHECK:           %[[VAL_5:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_1]] : i32
+// CHECK:           %[[VAL_6:.*]] = arith.select %[[VAL_5]], %[[VAL_4]], %[[VAL_1]] : i32
+// CHECK:           %[[VAL_7:.*]] = cc.loop while ((%[[VAL_8:.*]] = %[[VAL_1]]) -> (i32)) {
+// CHECK:             %[[VAL_9:.*]] = arith.cmpi ne, %[[VAL_8]], %[[VAL_6]] : i32
+// CHECK:             cc.condition %[[VAL_9]](%[[VAL_8]] : i32)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: i32):
+// CHECK:             quake.x %[[VAL_3]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_10]] : i32
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: i32):
+// CHECK:             %[[VAL_12:.*]] = arith.addi %[[VAL_11]], %[[VAL_2]] : i32
+// CHECK:             cc.continue %[[VAL_12]] : i32
+// CHECK:           } {normalized}
+// CHECK:           return %[[VAL_1]] : i32
+// CHECK:         }
+
+// Dead loops: no unsigned value will ever be less than 0, so these loops will
+// never execute. Make sure they are marked "dead" by the normalizer.
+
+__qpu__ void non_iterating_loop1() {
+  cudaq::qvector q(100);
+  for (std::uint64_t i = 1; i < 0; i++)
+    x(q[i]);   
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_non_iterating_loop1._Z19non_iterating_loop1v() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK:           %[[VAL_0:.*]] = arith.constant false
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_2:.*]] = quake.alloca !quake.veq<100>
+// CHECK:           %[[VAL_3:.*]] = cc.loop while ((%[[VAL_4:.*]] = %[[VAL_1]]) -> (i64)) {
+// CHECK:             cc.condition %[[VAL_0]](%[[VAL_4]] : i64)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_5:.*]]: i64):
+// CHECK:             %[[VAL_6:.*]] = quake.extract_ref %[[VAL_2]]{{\[}}%[[VAL_5]]] : (!quake.veq<100>, i64) -> !quake.ref
+// CHECK:             quake.x %[[VAL_6]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_5]] : i64
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_7:.*]]: i64):
+// CHECK:             %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_1]] : i64
+// CHECK:             cc.continue %[[VAL_8]] : i64
+// CHECK:           } {dead}
+// CHECK:           return
+// CHECK:         }
+
+__qpu__ int f1a() {
+  cudaq::qubit q;
+  for (unsigned u = 1; u < 0; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_f1a._Z3f1av() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK:           %[[VAL_0:.*]] = arith.constant false
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_3:.*]] = quake.alloca !quake.ref
+// CHECK:           %[[VAL_4:.*]] = cc.loop while ((%[[VAL_5:.*]] = %[[VAL_2]]) -> (i32)) {
+// CHECK:             cc.condition %[[VAL_0]](%[[VAL_5]] : i32)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_6:.*]]: i32):
+// CHECK:             quake.x %[[VAL_3]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_6]] : i32
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_7:.*]]: i32):
+// CHECK:             %[[VAL_8:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] : i32
+// CHECK:             cc.continue %[[VAL_8]] : i32
+// CHECK:           } {dead}
+// CHECK:           return %[[VAL_1]] : i32
+// CHECK:         }
+
+__qpu__ int f1b() {
+  cudaq::qubit q;
+  for (unsigned u = 10; u < 0; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_f1b._Z3f1bv() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK:           %[[VAL_0:.*]] = arith.constant false
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 10 : i32
+// CHECK:           %[[VAL_4:.*]] = quake.alloca !quake.ref
+// CHECK:           %[[VAL_5:.*]] = cc.loop while ((%[[VAL_6:.*]] = %[[VAL_3]]) -> (i32)) {
+// CHECK:             cc.condition %[[VAL_0]](%[[VAL_6]] : i32)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_7:.*]]: i32):
+// CHECK:             quake.x %[[VAL_4]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_7]] : i32
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: i32):
+// CHECK:             %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_1]] : i32
+// CHECK:             cc.continue %[[VAL_9]] : i32
+// CHECK:           } {dead}
+// CHECK:           return %[[VAL_2]] : i32
+// CHECK:         }
+
+__qpu__ int f3() {
+  cudaq::qubit q;
+  for (std::uint64_t u = 22; u < 0; u++)
+    x(q);
+  return 0;
+}
+
+// CHECK-LABEL:   func.func @__nvqpp__mlirgen__function_f3._Z2f3v() -> i32 attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} {
+// CHECK:           %[[VAL_0:.*]] = arith.constant false
+// CHECK:           %[[VAL_1:.*]] = arith.constant 22 : i64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_4:.*]] = quake.alloca !quake.ref
+// CHECK:           %[[VAL_5:.*]] = cc.loop while ((%[[VAL_6:.*]] = %[[VAL_1]]) -> (i64)) {
+// CHECK:             cc.condition %[[VAL_0]](%[[VAL_6]] : i64)
+// CHECK:           } do {
+// CHECK:           ^bb0(%[[VAL_7:.*]]: i64):
+// CHECK:             quake.x %[[VAL_4]] : (!quake.ref) -> ()
+// CHECK:             cc.continue %[[VAL_7]] : i64
+// CHECK:           } step {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: i64):
+// CHECK:             %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : i64
+// CHECK:             cc.continue %[[VAL_9]] : i64
+// CHECK:           } {dead}
+// CHECK:           return %[[VAL_3]] : i32
+// CHECK:         }
diff --git a/test/Quake/loop_normalize.qke b/test/Quake/loop_normalize.qke
deleted file mode 100644
index f040f87843..0000000000
--- a/test/Quake/loop_normalize.qke
+++ /dev/null
@@ -1,95 +0,0 @@
-// ========================================================================== //
-// Copyright (c) 2022 - 2025 NVIDIA Corporation & Affiliates.                 //
-// All rights reserved.                                                       //
-//                                                                            //
-// This source code and the accompanying materials are made available under   //
-// the terms of the Apache License 2.0 which accompanies this distribution.   //
-// ========================================================================== //
-
-// RUN: cudaq-opt -cc-loop-normalize %s | FileCheck %s
-
-module {
-  func.func @test_positive_boundaries() {
-    %c0_i64 = arith.constant 0 : i64
-    %c1_i64 = arith.constant 1 : i64
-    %0 = quake.alloca !quake.veq<0>
-    %1 = cc.loop while ((%arg0 = %c1_i64) -> (i64)) {
-      %2 = arith.cmpi ult, %arg0, %c0_i64 : i64
-      cc.condition %2(%arg0 : i64)
-    } do {
-    ^bb0(%arg0: i64):
-      %2 = arith.subi %arg0, %c1_i64 : i64
-      %3 = quake.extract_ref %0[%2] : (!quake.veq<0>, i64) -> !quake.ref
-      quake.x %3 : (!quake.ref) -> ()
-      cc.continue %arg0 : i64
-    } step {
-    ^bb0(%arg0: i64):
-      %2 = arith.addi %arg0, %c1_i64 : i64
-      cc.continue %2 : i64
-    }
-    return
-  }
-
-// CHECK-LABEL:   func.func @test_positive_boundaries() {
-// CHECK:     %[[VAL_0:.*]] = arith.constant 0 : i64
-// CHECK:     %[[VAL_1:.*]] = arith.constant 1 : i64
-// CHECK:     %[[VAL_2:.*]] = quake.alloca !quake.veq<0>
-// CHECK:     %[[VAL_3:.*]] = cc.loop while ((%arg0 = %[[VAL_0]]) -> (i64)) {
-// CHECK:       %[[VAL_4:.*]] = arith.cmpi ne, %arg0, %[[VAL_0]] : i64
-// CHECK:       cc.condition %[[VAL_4]](%arg0 : i64)
-// CHECK:     } do {
-// CHECK:     ^bb0(%arg0: i64):
-// CHECK:       %[[VAL_4:.*]] = quake.extract_ref %[[VAL_2]][%arg0] : (!quake.veq<0>, i64) -> !quake.ref
-// CHECK:       quake.x %[[VAL_4]] : (!quake.ref) -> ()
-// CHECK:       cc.continue %arg0 : i64
-// CHECK:     } step {
-// CHECK:     ^bb0(%arg0: i64):
-// CHECK:       %[[VAL_4:.*]] = arith.addi %arg0, %[[VAL_1]] : i64
-// CHECK:       cc.continue %[[VAL_4]] : i64
-// CHECK:     } {normalized}
-// CHECK:     return
-// CHECK:   }
-
-  func.func @test_negative_boundaries() {
-    %c-1_i32 = arith.constant -1 : i32
-    %c1_i32 = arith.constant 1 : i32
-    %c0_i32 = arith.constant 0 : i32
-    %0 = quake.alloca !quake.veq<0>
-    %1 = cc.loop while ((%arg0 = %c0_i32) -> (i32)) {
-      %2 = arith.cmpi slt, %arg0, %c-1_i32 : i32
-      cc.condition %2(%arg0 : i32)
-    } do {
-    ^bb0(%arg0: i32):
-      %2 = cc.cast signed %arg0 : (i32) -> i64
-      %3 = quake.extract_ref %0[%2] : (!quake.veq<0>, i64) -> !quake.ref
-      quake.x %3 : (!quake.ref) -> ()
-      cc.continue %arg0 : i32
-    } step {
-    ^bb0(%arg0: i32):
-      %2 = arith.addi %arg0, %c1_i32 : i32
-      cc.continue %2 : i32
-    }
-    return
-  }
-
-// CHECK-LABEL:   func.func @test_negative_boundaries() {
-// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i32
-// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i32
-// CHECK:           %[[VAL_2:.*]] = quake.alloca !quake.veq<0>
-// CHECK:           %[[VAL_3:.*]] = cc.loop while ((%arg0 = %[[VAL_0]]) -> (i32)) {
-// CHECK:             %[[VAL_4:.*]] = arith.cmpi ne, %arg0, %[[VAL_0]] : i32
-// CHECK:             cc.condition %[[VAL_4]](%arg0 : i32)
-// CHECK:           } do {
-// CHECK:           ^bb0(%arg0: i32):
-// CHECK:             %[[VAL_4:.*]] = cc.cast signed %arg0 : (i32) -> i64
-// CHECK:             %[[VAL_5:.*]] = quake.extract_ref %[[VAL_2]][%[[VAL_4]]] : (!quake.veq<0>, i64) -> !quake.ref
-// CHECK:             quake.x %[[VAL_5]] : (!quake.ref) -> ()
-// CHECK:             cc.continue %arg0 : i32
-// CHECK:           } step {
-// CHECK:           ^bb0(%arg0: i32):
-// CHECK:             %[[VAL_4:.*]] = arith.addi %arg0, %[[VAL_1]] : i32
-// CHECK:             cc.continue %[[VAL_4]] : i32
-// CHECK:           } {normalized}
-// CHECK:           return
-// CHECK:         }
-}