diff --git a/docs/Dialects/Pipeline/RationalePipeline.md b/docs/Dialects/Pipeline/RationalePipeline.md
index b52593d6d0..4f25273f53 100644
--- a/docs/Dialects/Pipeline/RationalePipeline.md
+++ b/docs/Dialects/Pipeline/RationalePipeline.md
@@ -32,21 +32,24 @@ operations representing a dataflow graph.
 
 ### Phase 2: Scheduled
 
-Uisng e.g. the `pipeline-schedule-linear` pass, a pipeline may be scheduled wrt.
+Using e.g. the `pipeline-schedule-linear` pass, a pipeline may be scheduled wrt.
 an operator library denoting the latency of each operation. The result of a scheduling
 problem is the movement of operations to specific blocks.
 Each block represents a pipeline stage, with `pipeline.stage` operations being
 stage-terminating operations that determine the order of stages.
 
 At this level, the semantics of the pipeline are that **any SSA def-use edge that
-crosses a stage is a pipeline register**.  
-Note that we also intend to add support for attaching multi-cycle latencies to
-SSA values in the future, which will allow for more fine-grained control over
-the registers in the pipeline.  
+crosses a stage is a pipeline register**. To prevent cross-block canonicalization
+to occur post-scheduling, these def-use edges must be expressed using the
+`pipeline.src` operation, which is used to refer a value from within the basic
+block that uses it, but which is defined in _any_ prior, dominating stage (block).  
 Given these relaxed semantics, this level of abstraction is suitable for pipeline
 retiming. Operations may be moved from one stage to another, or new blocks may be
 inserted between existing blocks, without changing the semantics of the pipeline.
 The only requirement is that def-use edges wrt. the order of stages are preserved.
+For expressing semantics about multi-cycle latencies of SSA, please refer to the
+`multicycle operations` segment below.  
+
 
 ```mlir
 %out = pipeline.scheduled(%arg0, %arg1, %go) clock %clk reset %rst : (i32, i32, i1) -> (i32) {
@@ -55,12 +58,18 @@ The only requirement is that def-use edges wrt. the order of stages are preserve
   pipeline.stage ^bb1 enable %go
 
 ^bb1:
-  %add1 = comb.add %add0, %a0 : i32 // %a0 is a block argument fed through a stage.
+  // %add0, %a0 is referenced in this stage via. dominance, w/ pipeline.src
+  // preventing cross-block canonicalization.
+  %add0_bb1 = pipeline.src %add0 : i32
+  %a0_bb1 = pipeline.src %a0 : i32
+  %add1 = comb.add %add0_bb1, %a0_bb1 : i32
   pipeline.stage ^bb2 enable %go
 
 ^bb2:
-  %add2 = comb.add %add1, %add0 : i32 // %add0 crosses multiple stages.
-  pipeline.return %add2 enable %go : i32 // %go crosses multiple stages
+  %add0_bb2 = pipeline.src %add0 : i32
+  %add1_bb2 = pipeline.src %add1 : i32
+  %add2 = comb.add %add1_bb2, %add0_bb2 : i32 // %add0 crosses multiple stages.
+  pipeline.return %add2 enable %go : i32   // %go crosses multiple stages
 }
 ```
 
@@ -145,7 +154,8 @@ pipeline.stage ^bb4
 ^bb4:
 // It is legal to reference %out here. This will also imply a register
 // between stage bb3 and bb4.
-foo.bar %out : i32
+%out_bb4 = pipeline.src %out : i32
+foo.bar %out_bb4 : i32
 ```
 
 which will register materialize to:
@@ -157,14 +167,14 @@ which will register materialize to:
   %dl2 = seq.compreg %dl1 : i32
   pipeline.latency.return %dl2 : i32
 }
-pipeline.stage ^bb2 pass(%out : i32)
+pipeline.stage ^bb2 pass(%out : i32)    // %out is passed through
 
 ^bb2(%out_s2 : i32):
-pipeline.stage ^bb3 pass(%out_s2 : i32)
+pipeline.stage ^bb3 pass(%out_s2 : i32) // %out is passed through
 
 
 ^bb3(%out_s3 : i32):
-pipeline.stage ^bb4 regs(%out_s3 : i32)
+pipeline.stage ^bb4 regs(%out_s3 : i32) // %out is registered
 
 ^bb4(%out_s4 : i32):
 foo.bar %out_s4 : i32
diff --git a/include/circt/Dialect/Pipeline/PipelineOps.td b/include/circt/Dialect/Pipeline/PipelineOps.td
index 898a7df313..58ba95dbe4 100644
--- a/include/circt/Dialect/Pipeline/PipelineOps.td
+++ b/include/circt/Dialect/Pipeline/PipelineOps.td
@@ -244,6 +244,26 @@ def ScheduledPipelineOp : PipelineBase<"scheduled"> {
   }];
 }
 
+def SourceOp : Op<Pipeline_Dialect, "src", [
+  TypesMatchWith<"input and result types are equivalent", "input", "output", "$_self">,
+  HasParent<"ScheduledPipelineOp">,
+  ]> {
+  let summary = "Pipeline source operation";
+  let description = [{
+    The `pipeline.src` operation represents a source operation in a scheduled,
+    non-register materialized pipeline.
+    It is used as a canonicalization barrier to prevent cross-block canonicalization
+    of operations that are not allowed to be moved or mutated across pipeline
+    stages (i.e. MLIR blocks).
+
+    To facilitate this, the operation is _not_ marked as `Pure`.
+  }];
+  let arguments = (ins AnyType:$input);
+  let results = (outs AnyType:$output);
+  let assemblyFormat = [{
+    $input `:` type($input) attr-dict
+  }];
+}
 
 def StageOp : Op<Pipeline_Dialect, "stage", [
     AttrSizedOperandSegments,
diff --git a/integration_test/Dialect/Pipeline/nonstallable/test1/nonstallable_test1.mlir b/integration_test/Dialect/Pipeline/nonstallable/test1/nonstallable_test1.mlir
index c0a038939f..ad7f0f872c 100644
--- a/integration_test/Dialect/Pipeline/nonstallable/test1/nonstallable_test1.mlir
+++ b/integration_test/Dialect/Pipeline/nonstallable/test1/nonstallable_test1.mlir
@@ -24,7 +24,8 @@ hw.module @nonstallable_test1(in %arg0: i32, in %go: i1, in %clock: !seq.clock,
   ^bb4(%s4_enable: i1):
     pipeline.stage ^bb5
   ^bb5(%s5_enable: i1):
-    pipeline.return %a0 : i32
+    %a0_bb5 = pipeline.src %a0 : i32
+    pipeline.return %a0_bb5 : i32
   }
   hw.output %out, %done : i32, i1
 }
diff --git a/integration_test/Dialect/Pipeline/nonstallable/test2/nonstallable_test2.mlir b/integration_test/Dialect/Pipeline/nonstallable/test2/nonstallable_test2.mlir
index 20ccdfe981..ac8f3b4105 100644
--- a/integration_test/Dialect/Pipeline/nonstallable/test2/nonstallable_test2.mlir
+++ b/integration_test/Dialect/Pipeline/nonstallable/test2/nonstallable_test2.mlir
@@ -24,7 +24,8 @@ hw.module @nonstallable_test2(in %arg0: i32, in %go: i1, in %clock: !seq.clock,
   ^bb4(%s4_enable: i1):
     pipeline.stage ^bb5
   ^bb5(%s5_enable: i1):
-    pipeline.return %a0 : i32
+    %a0_bb5 = pipeline.src %a0 : i32
+    pipeline.return %a0_bb5 : i32
   }
   hw.output %out, %done : i32, i1
 }
diff --git a/integration_test/Dialect/Pipeline/simple/simple.mlir b/integration_test/Dialect/Pipeline/simple/simple.mlir
index b41f5d8ba3..1786be9986 100644
--- a/integration_test/Dialect/Pipeline/simple/simple.mlir
+++ b/integration_test/Dialect/Pipeline/simple/simple.mlir
@@ -26,11 +26,15 @@ hw.module @simple(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clock : !seq.
       pipeline.stage ^bb1
 
     ^bb1(%s1_enable : i1):
-      %add1 = comb.add %add0, %a0 : i32
+      %add0_bb1 = pipeline.src %add0 : i32
+      %a0_bb1 = pipeline.src %a0 : i32
+      %add1 = comb.add %add0_bb1, %a0_bb1 : i32
       pipeline.stage ^bb2
 
     ^bb2(%s2_enable : i1):
-      %add2 = comb.add %add1, %add0 : i32
+      %add0_bb2 = pipeline.src %add0 : i32
+      %add1_bb2 = pipeline.src %add1 : i32
+      %add2 = comb.add %add1_bb2, %add0_bb2 : i32
       pipeline.return %add2 : i32
   }
   hw.output %out, %done : i32, i1
diff --git a/integration_test/Dialect/Pipeline/stall/stallTest.mlir b/integration_test/Dialect/Pipeline/stall/stallTest.mlir
index a1c1ff0850..beba598efb 100644
--- a/integration_test/Dialect/Pipeline/stall/stallTest.mlir
+++ b/integration_test/Dialect/Pipeline/stall/stallTest.mlir
@@ -26,11 +26,15 @@ hw.module @stallTest(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %stall : i1
       pipeline.stage ^bb1
 
     ^bb1(%s1_enable : i1):
-      %add1 = comb.add %add0, %a0 : i32
+      %add0_bb1 = pipeline.src %add0 : i32
+      %a0_bb1 = pipeline.src %a0 : i32
+      %add1 = comb.add %add0_bb1, %a0_bb1 : i32
       pipeline.stage ^bb2
 
     ^bb2(%s2_enable : i1):
-      %add2 = comb.add %add1, %add0 : i32
+      %add0_bb2 = pipeline.src %add0 : i32
+      %add1_bb2 = pipeline.src %add1 : i32
+      %add2 = comb.add %add1_bb2, %add0_bb2 : i32
       pipeline.return %add2 : i32
   }
   hw.output %out, %done : i32, i1
diff --git a/lib/Dialect/Pipeline/PipelineOps.cpp b/lib/Dialect/Pipeline/PipelineOps.cpp
index 6809e4a18f..98579959dc 100644
--- a/lib/Dialect/Pipeline/PipelineOps.cpp
+++ b/lib/Dialect/Pipeline/PipelineOps.cpp
@@ -533,7 +533,7 @@ Block *ScheduledPipelineOp::getLastStage() { return getOrderedStages().back(); }
 
 bool ScheduledPipelineOp::isMaterialized() {
   // We determine materialization as if any pipeline stage has an explicit
-  // input (apart from the stage valid signal).
+  // input (apart from the stage enable signal).
   return llvm::any_of(getStages(), [this](Block &block) {
     // The entry stage doesn't count since it'll always have arguments.
     if (&block == getEntryStage())
@@ -542,6 +542,43 @@ bool ScheduledPipelineOp::isMaterialized() {
   });
 }
 
+// Returns true if 'current' is nested somewhere within the 'parent' block,
+// or current == parent.
+// `stopAt` is provided as a termination condition for the recursive lookup.
+// Once stopAt is encountered, `isNestedBlock` will return false.
+static bool isNestedBlock(Block *stopAt, Block *parent, Block *current) {
+  while (current) {
+    if (current == stopAt)
+      return false;
+    if (current == parent)
+      return true;
+    current = current->getParentOp()->getBlock();
+  }
+  return false;
+}
+
+// Check whether the value referenced by `use` is defined within the provided
+// `stage`. It is assumed that the OpOperand `use` (i.e. the operation that owns
+// `use`) is defined within `stage`.
+// `stopAt` is provided as a termination condition for the recursive lookup.
+// Once stopAt is encountered, `isNestedBlock` will return false.
+static bool useDefinedInStage(Block *stopAt, Block *stage, OpOperand &use) {
+  Block *useBlock = use.getOwner()->getBlock();
+  Block *definingBlock = use.get().getParentBlock();
+
+  assert(isNestedBlock(stopAt, stage, useBlock) &&
+         "use` must originate from within `stage`");
+
+  // Common-case checks...
+  if (useBlock == definingBlock || stage == definingBlock)
+    return true;
+
+  // Else, recurse upwards from the defining block to see if we can find the
+  // stage.
+  Block *currBlock = definingBlock;
+  return isNestedBlock(stopAt, stage, currBlock);
+}
+
 LogicalResult ScheduledPipelineOp::verify() {
   // Verify that all block are terminated properly.
   auto &stages = getStages();
@@ -579,38 +616,61 @@ LogicalResult ScheduledPipelineOp::verify() {
   if (hasStall())
     extLikeInputs.insert(getStall());
 
-  // Phase invariant - if any block has arguments apart from the stage valid
-  // argument, we are in register materialized mode. Check that all values
-  // used within a stage are defined within the stage.
+  // Phase invariant - Check that all values used within a stage are valid
+  // based on the materialization mode. This is a walk, since this condition
+  // should also apply to nested operations.
   bool materialized = isMaterialized();
-  if (materialized) {
-    for (auto &stage : stages) {
-      for (auto &op : stage) {
-        for (auto [index, operand] : llvm::enumerate(op.getOperands())) {
-          bool err = false;
-          if (extLikeInputs.contains(operand)) {
-            // This is an external input; legal to reference everywhere.
+  Block *parentBlock = getOperation()->getBlock();
+  for (auto &stage : stages) {
+    auto walkRes = stage.walk([&](Operation *op) {
+      // Skip pipeline.src operations in non-materialized mode
+      if (isa<SourceOp>(op)) {
+        if (materialized) {
+          op->emitOpError(
+              "Pipeline is in register materialized mode - pipeline.src "
+              "operations are not allowed");
+          return WalkResult::interrupt();
+        }
+
+        // In non-materialized mode, pipeline.src operations are required, and
+        // is what is implicitly allowing cross-stage referenced by not
+        // reaching the below verification code.
+        return WalkResult::advance();
+      }
+
+      for (auto [index, operand] : llvm::enumerate(op->getOpOperands())) {
+        // External inputs (including clock, reset, stall) are allowed
+        // everywhere
+        if (extLikeInputs.contains(operand.get()))
+          continue;
+
+        // Constant-like inputs are allowed everywhere
+        if (auto *definingOp = operand.get().getDefiningOp()) {
+          // Constants are allowed to be used across stages.
+          if (definingOp->hasTrait<OpTrait::ConstantLike>())
             continue;
-          }
+        }
 
-          if (auto *definingOp = operand.getDefiningOp()) {
-            // Constants are allowed to be used across stages.
-            if (definingOp->hasTrait<OpTrait::ConstantLike>())
-              continue;
-            err = definingOp->getBlock() != &stage;
+        // Values must always be defined in the same stage.
+        // Materialization mode defines the actual mitigation method.
+        if (!useDefinedInStage(parentBlock, &stage, operand)) {
+          auto err = op->emitOpError("operand ")
+                     << index << " is defined in a different stage. ";
+          if (materialized) {
+            err << "Value should have been passed through block arguments";
           } else {
-            // This is a block argument;
-            err = !llvm::is_contained(stage.getArguments(), operand);
+            err << "Value should have been passed through a `pipeline.src` "
+                   "op";
           }
-
-          if (err)
-            return op.emitOpError(
-                       "Pipeline is in register materialized mode - operand ")
-                   << index
-                   << " is defined in a different stage, which is illegal.";
+          return WalkResult::interrupt();
         }
       }
-    }
+
+      return WalkResult::advance();
+    });
+
+    if (walkRes.wasInterrupted())
+      return failure();
   }
 
   if (auto stallability = getStallability()) {
@@ -1001,9 +1061,9 @@ LogicalResult LatencyOp::verify() {
     return success();
   }
 
-  // Verify that there's at least one result type. Latency ops don't make sense
-  // if they're not delaying anything, and we're not yet prepared to support
-  // side-effectful bodies.
+  // Verify that there's at least one result type. Latency ops don't make
+  // sense if they're not delaying anything, and we're not yet prepared to
+  // support side-effectful bodies.
   if (getNumResults() == 0)
     return emitOpError("expected at least one result type.");
 
diff --git a/lib/Dialect/Pipeline/Transforms/ExplicitRegs.cpp b/lib/Dialect/Pipeline/Transforms/ExplicitRegs.cpp
index a1dd15344c..ca4001bd53 100644
--- a/lib/Dialect/Pipeline/Transforms/ExplicitRegs.cpp
+++ b/lib/Dialect/Pipeline/Transforms/ExplicitRegs.cpp
@@ -199,6 +199,16 @@ void ExplicitRegsPass::runOnPipeline(ScheduledPipelineOp pipeline) {
           // resides within the current pipeline stage. No routing needed.
           continue;
         }
+
+        // At this point, only `pipeline.src` operations are legally allowed to
+        // reference operands from other stages.
+        SourceOp srcOp =
+            llvm::dyn_cast_or_null<pipeline::SourceOp>(operand.getOwner());
+        assert(
+            srcOp &&
+            "Only pipeline.srcOp's should be allowed to reference "
+            "values outside of this block. Verifiers should have caught this");
+
         Value reroutedValue = routeThroughStage(operand.get(), stage);
         if (reroutedValue != operand.get())
           op->setOperand(operand.getOperandNumber(), reroutedValue);
@@ -290,6 +300,12 @@ void ExplicitRegsPass::runOnPipeline(ScheduledPipelineOp pipeline) {
 
   // Clear internal state. See https://github.com/llvm/circt/issues/3235
   stageRegOrPassMap.clear();
+
+  // Finally, erase all of the pipeline.src ops now that they've become no-ops.
+  for (auto srcOp : llvm::make_early_inc_range(pipeline.getOps<SourceOp>())) {
+    srcOp.getResult().replaceAllUsesWith(srcOp.getInput());
+    srcOp.erase();
+  }
 }
 
 void ExplicitRegsPass::runOnOperation() {
diff --git a/lib/Dialect/Pipeline/Transforms/ScheduleLinearPipeline.cpp b/lib/Dialect/Pipeline/Transforms/ScheduleLinearPipeline.cpp
index fbced692a4..c57dc066ab 100644
--- a/lib/Dialect/Pipeline/Transforms/ScheduleLinearPipeline.cpp
+++ b/lib/Dialect/Pipeline/Transforms/ScheduleLinearPipeline.cpp
@@ -198,10 +198,33 @@ ScheduleLinearPipelinePass::schedulePipeline(UnscheduledPipelineOp pipeline) {
 
   for (auto [startTime, ops] : stageMap) {
     Block *stage = schedPipeline.getStage(startTime);
+
+    // Caching of SourceOp passthrough values defined in this stage.
+    mlir::DenseMap<Value, Value> sourceOps;
+    auto getOrCreateSourceOp = [&](OpOperand &opOperand) -> Value {
+      Value v = opOperand.get();
+      auto it = sourceOps.find(v);
+      if (it == sourceOps.end()) {
+        b.setInsertionPoint(opOperand.getOwner());
+        it = sourceOps
+                 .try_emplace(v, b.create<SourceOp>(v.getLoc(), v).getResult())
+                 .first;
+      }
+      return it->second;
+    };
+
     assert(stage && "Stage not found");
     Operation *stageTerminator = stage->getTerminator();
-    for (auto *op : ops)
+    for (auto *op : ops) {
       op->moveBefore(stageTerminator);
+
+      // If the operation references values defined outside of this stage,
+      // modify their uses to point to the corresponding SourceOp.
+      for (OpOperand &operand : op->getOpOperands()) {
+        if (operand.get().getParentBlock() != stage)
+          operand.set(getOrCreateSourceOp(operand));
+      }
+    }
   }
 
   // Remove the unscheduled pipeline
diff --git a/test/Dialect/Kanagawa/Transforms/schedule_pipeline.mlir b/test/Dialect/Kanagawa/Transforms/schedule_pipeline.mlir
index 399ec3b07e..7910a3edcf 100644
--- a/test/Dialect/Kanagawa/Transforms/schedule_pipeline.mlir
+++ b/test/Dialect/Kanagawa/Transforms/schedule_pipeline.mlir
@@ -16,32 +16,39 @@
 // CHECK:           operator_type @comb.shrs [latency<1>]
 // CHECK:         }
 
-// CHECK-LABEL:   kanagawa.class sym @SchedulePipeline {
-// CHECK:           kanagawa.method.df @foo(%[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i32) -> i32 {
-// CHECK:             %[[VAL_3:.*]] = kanagawa.sblock.isolated (%[[VAL_4:.*]] : i32 = %[[VAL_1]], %[[VAL_5:.*]] : i32 = %[[VAL_2]]) -> i32 {
-// CHECK:               %[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]], %[[VAL_9:.*]] = kanagawa.pipeline.header
-// CHECK:               %[[VAL_10:.*]], %[[VAL_11:.*]] = pipeline.scheduled(%[[VAL_12:.*]] : i32 = %[[VAL_4]], %[[VAL_13:.*]] : i32 = %[[VAL_5]]) stall(%[[VAL_9]]) clock(%[[VAL_6]]) reset(%[[VAL_7]]) go(%[[VAL_8]]) entryEn(%[[VAL_14:.*]])  -> (out0 : i32) {
-// CHECK:                 %[[VAL_15:.*]] = comb.mul %[[VAL_12]], %[[VAL_13]] {ssp.operator_type = @comb.mul} : i32
-// CHECK:                 pipeline.stage ^bb1
-// CHECK:               ^bb1(%[[VAL_16:.*]]: i1):
-// CHECK:                 %[[VAL_17:.*]] = comb.add %[[VAL_12]], %[[VAL_13]] {ssp.operator_type = @comb.add} : i32
-// CHECK:                 pipeline.stage ^bb2
-// CHECK:               ^bb2(%[[VAL_18:.*]]: i1):
-// CHECK:                 %[[VAL_19:.*]] = comb.sub %[[VAL_17]], %[[VAL_15]] {ssp.operator_type = @comb.sub} : i32
-// CHECK:                 pipeline.stage ^bb3
-// CHECK:               ^bb3(%[[VAL_20:.*]]: i1):
-// CHECK:                 %[[VAL_21:.*]] = comb.mul %[[VAL_19]], %[[VAL_17]] {ssp.operator_type = @comb.mul} : i32
-// CHECK:                 pipeline.stage ^bb4
-// CHECK:               ^bb4(%[[VAL_22:.*]]: i1):
-// CHECK:                 pipeline.stage ^bb5
-// CHECK:               ^bb5(%[[VAL_23:.*]]: i1):
-// CHECK:                 pipeline.return %[[VAL_21]] : i32
+// CHECK:           kanagawa.class sym @SchedulePipeline {
+// CHECK:             kanagawa.method.df @foo(%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32)  -> i32 {
+// CHECK:               %[[VAL_2:.*]] = kanagawa.sblock.isolated (%[[VAL_3:.*]] : i32 = %[[VAL_0]], %[[VAL_4:.*]] : i32 = %[[VAL_1]]) -> i32 {
+// CHECK:                 %[[VAL_5:.*]], %[[VAL_6:.*]], %[[VAL_7:.*]], %[[VAL_8:.*]] = kanagawa.pipeline.header
+// CHECK:                 %[[VAL_9:.*]], %[[VAL_10:.*]] = pipeline.scheduled(%[[VAL_11:.*]] : i32 = %[[VAL_3]], %[[VAL_12:.*]] : i32 = %[[VAL_4]]) stall(%[[VAL_8]]) clock(%[[VAL_5]]) reset(%[[VAL_6]]) go(%[[VAL_7]]) entryEn(%[[VAL_13:.*]])  -> (out0 : i32) {
+// CHECK:                   %[[VAL_14:.*]] = comb.mul %[[VAL_11]], %[[VAL_12]] {ssp.operator_type = @comb.mul} : i32
+// CHECK:                   pipeline.stage ^bb1
+// CHECK:                 ^bb1(%[[VAL_15:.*]]: i1):
+// CHECK:                   %[[VAL_16:.*]] = pipeline.src %[[VAL_11]] : i32
+// CHECK:                   %[[VAL_17:.*]] = pipeline.src %[[VAL_12]] : i32
+// CHECK:                   %[[VAL_18:.*]] = comb.add %[[VAL_16]], %[[VAL_17]] {ssp.operator_type = @comb.add} : i32
+// CHECK:                   pipeline.stage ^bb2
+// CHECK:                 ^bb2(%[[VAL_19:.*]]: i1):
+// CHECK:                   %[[VAL_20:.*]] = pipeline.src %[[VAL_18]] : i32
+// CHECK:                   %[[VAL_21:.*]] = pipeline.src %[[VAL_14]] : i32
+// CHECK:                   %[[VAL_22:.*]] = comb.sub %[[VAL_20]], %[[VAL_21]] {ssp.operator_type = @comb.sub} : i32
+// CHECK:                   pipeline.stage ^bb3
+// CHECK:                 ^bb3(%[[VAL_23:.*]]: i1):
+// CHECK:                   %[[VAL_24:.*]] = pipeline.src %[[VAL_22]] : i32
+// CHECK:                   %[[VAL_25:.*]] = pipeline.src %[[VAL_18]] : i32
+// CHECK:                   %[[VAL_26:.*]] = comb.mul %[[VAL_24]], %[[VAL_25]] {ssp.operator_type = @comb.mul} : i32
+// CHECK:                   pipeline.stage ^bb4
+// CHECK:                 ^bb4(%[[VAL_27:.*]]: i1):
+// CHECK:                   pipeline.stage ^bb5
+// CHECK:                 ^bb5(%[[VAL_28:.*]]: i1):
+// CHECK:                   %[[VAL_29:.*]] = pipeline.src %[[VAL_26]] : i32
+// CHECK:                   pipeline.return %[[VAL_29]] : i32
+// CHECK:                 }
+// CHECK:                 kanagawa.sblock.return %[[VAL_30:.*]] : i32
 // CHECK:               }
-// CHECK:               kanagawa.sblock.return %[[VAL_24:.*]] : i32
+// CHECK:               kanagawa.return %[[VAL_2]] : i32
 // CHECK:             }
-// CHECK:             kanagawa.return %[[VAL_3]] : i32
 // CHECK:           }
-// CHECK:         }
 
 kanagawa.design @foo {
 kanagawa.class sym @SchedulePipeline {
diff --git a/test/Dialect/Pipeline/Transforms/explicit-regs.mlir b/test/Dialect/Pipeline/Transforms/explicit-regs.mlir
index 796a43a004..4408725ecb 100644
--- a/test/Dialect/Pipeline/Transforms/explicit-regs.mlir
+++ b/test/Dialect/Pipeline/Transforms/explicit-regs.mlir
@@ -20,11 +20,15 @@ hw.module @testRegsOnly(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !
       pipeline.stage ^bb1
 
     ^bb1(%s1_enable : i1):
-      %add1 = comb.add %add0, %a0 : i32 // %a0 is a block argument fed through a stage.
+      %add0_bb1 = pipeline.src %add0 : i32
+      %a0_bb1 = pipeline.src %a0 : i32
+      %add1 = comb.add %add0_bb1, %a0_bb1 : i32
       pipeline.stage ^bb2
 
     ^bb2(%s2_enable : i1):
-      %add2 = comb.add %add1, %add0 : i32 // %add0 crosses multiple stages.
+      %add1_bb2 = pipeline.src %add1 : i32
+      %add0_bb2 = pipeline.src %add0_bb1 : i32
+      %add2 = comb.add %add1_bb2, %add0_bb2 : i32 // %add0 crosses multiple stages.
       pipeline.return %add2 : i32
   }
   hw.output %out#0, %out#1 : i32, i1
@@ -65,7 +69,8 @@ hw.module @testLatency1(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !
   ^bb3(%s3_enable : i1):
     pipeline.stage ^bb4
   ^bb4(%s4_enable : i1):
-    pipeline.return %out : i32
+    %out_bb4 = pipeline.src %out : i32
+    pipeline.return %out_bb4 : i32
   }
   hw.output %out#0 : i32
 }
@@ -86,15 +91,16 @@ hw.module @testLatency1(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !
 // CHECK:               %[[VAL_20:.*]] = comb.sub %[[VAL_17]], %[[VAL_17]] : i32
 // CHECK:               pipeline.latency.return %[[VAL_20]] : i32
 // CHECK:             }
-// CHECK:             pipeline.stage ^bb3 regs(%[[VAL_17]] : i32) pass(%[[VAL_21:.*]] : i32)
+// CHECK:             pipeline.stage ^bb3 regs(%[[VAL_17]] : i32) pass(%[[VAL_19]] : i32)
 // CHECK:           ^bb3(%[[VAL_22:.*]]: i32, %[[VAL_23:.*]]: i32, %[[VAL_24:.*]]: i1):
 // CHECK:             pipeline.stage ^bb4 regs(%[[VAL_22]] : i32) pass(%[[VAL_23]] : i32)
 // CHECK:           ^bb4(%[[VAL_25:.*]]: i32, %[[VAL_26:.*]]: i32, %[[VAL_27:.*]]: i1):
 // CHECK:             %[[VAL_28:.*]] = comb.add %[[VAL_25]], %[[VAL_26]] : i32
-// CHECK:             pipeline.return %[[VAL_25]] : i32
+// CHECK:             pipeline.return %[[VAL_28]] : i32
 // CHECK:           }
 // CHECK:           hw.output %[[VAL_29:.*]] : i32
 // CHECK:         }
+
 hw.module @testLatency2(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out : i32) {
   %out:2 = pipeline.scheduled(%a0 : i32 = %arg0) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32) {
     %true = hw.constant true
@@ -106,16 +112,19 @@ hw.module @testLatency2(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !
   ^bb1(%s1_enable : i1):
     pipeline.stage ^bb2
   ^bb2(%s2_enable : i1):
+    %out_bb2 = pipeline.src %out : i32
     %out2 = pipeline.latency 2 -> (i32) {
-      %d = comb.sub %out, %out : i32
+      %d = comb.sub %out_bb2, %out_bb2 : i32
       pipeline.latency.return %d : i32
     }
     pipeline.stage ^bb3
   ^bb3(%s3_enable : i1):
     pipeline.stage ^bb4
   ^bb4(%s4_enable : i1):
-    %res = comb.add %out, %out2 : i32
-    pipeline.return %out : i32
+    %out_bb4 = pipeline.src %out : i32
+    %out2_bb4 = pipeline.src %out2 : i32
+    %res = comb.add %out_bb4, %out2_bb4 : i32
+    pipeline.return %res : i32
   }
   hw.output %out#0 : i32
 }
@@ -157,9 +166,10 @@ hw.module @testLatencyToLatency(in %arg0: i32, in %arg1: i32, in %go: i1, in %cl
     pipeline.stage ^bb2
 
   ^bb2(%s2_enable : i1):
+    %bb2_1 = pipeline.src %1 : i32
     %2 = pipeline.latency 2 -> (i32) {
       %c1_i32 = hw.constant 1 : i32
-      %res2 = comb.add %1, %c1_i32 : i32
+      %res2 = comb.add %bb2_1, %c1_i32 : i32
       pipeline.latency.return %res2 : i32
     }
     pipeline.stage ^bb3
@@ -168,7 +178,8 @@ hw.module @testLatencyToLatency(in %arg0: i32, in %arg1: i32, in %go: i1, in %cl
     pipeline.stage ^bb4
 
   ^bb4(%s4_enable : i1):
-    pipeline.return %2 : i32
+    %bb4_2 = pipeline.src %2 : i32
+    pipeline.return %bb4_2 : i32
   }
   hw.output %0#0 : i32
 }
@@ -199,10 +210,11 @@ hw.module @test_arbitrary_nesting(in %arg0 : i32, in %arg1 : i32, in %go : i1, i
     %true = hw.constant true
     pipeline.stage ^bb1
   ^bb1(%s1_enable : i1):
-    %foo = "foo.foo" (%a0) : (i32) -> (i32)
+    %a0_bb1 = pipeline.src %a0 : i32
+    %foo = "foo.foo" (%a0_bb1) : (i32) -> (i32)
     "foo.bar" () ({
       ^bb0:
-      %foo2 = "foo.foo" (%a0) : (i32) -> (i32)
+      %foo2 = "foo.foo" (%a0_bb1) : (i32) -> (i32)
       "foo.baz" () ({
         ^bb0(%innerArg0 : i32):
         // Reference all of the values defined above - none of these should
@@ -210,13 +222,14 @@ hw.module @test_arbitrary_nesting(in %arg0 : i32, in %arg1 : i32, in %go : i1, i
         "foo.foobar" (%foo, %foo2, %innerArg0) : (i32, i32, i32) -> ()
 
         // Reference %a0 - this should be registered.
-        "foo.foobar" (%a0) : (i32) -> ()
+        "foo.foobar" (%a0_bb1) : (i32) -> ()
       }) : () -> ()
     }) : () -> ()
 
     pipeline.stage ^bb2
   ^bb2(%s2_enable : i1):
-    pipeline.return %a0 : i32
+    %a0_bb2 = pipeline.src %a0 : i32
+    pipeline.return %a0_bb2 : i32
   }
   hw.output %out#0 : i32
 }
@@ -239,7 +252,8 @@ hw.module @testExtInput(in %arg0 : i32, in %ext1 : i32, in %go : i1, in %clk : !
       pipeline.stage ^bb1
 
     ^bb1(%s1_enable : i1):
-      pipeline.return %add0, %ext1 : i32, i32
+      %add0_bb1 = pipeline.src %add0 : i32
+      pipeline.return %add0_bb1, %ext1 : i32, i32
   }
   hw.output %out#0, %out#1 : i32, i32
 }
@@ -271,10 +285,13 @@ hw.module @testNaming(in %myArg : i32, in %go : i1, in %clk : !seq.clock, in %rs
   ^bb1(%s1_enable : i1):
     pipeline.stage ^bb2
   ^bb2(%s2_enable : i1):
-    %0 = comb.add %A, %res  {"sv.namehint" = "bar"} : i32
+    %A_bb2 = pipeline.src %A : i32
+    %res_bb2 = pipeline.src %res : i32
+    %0 = comb.add %A_bb2, %res_bb2  {"sv.namehint" = "bar"} : i32
     pipeline.stage ^bb3
   ^bb3(%s3_enable : i1):
-    pipeline.return %0 : i32
+    %bb3_0 = pipeline.src %0 : i32
+    pipeline.return %bb3_0 : i32
   }
   hw.output %out#0 : i32
 }
@@ -318,10 +335,12 @@ hw.module @pipelineLatencyCrashRepro(in %clk : !seq.clock, in %rst: i1, in %go:
   ^bb2(%s2_enable: i1):  // pred: ^bb1
     pipeline.stage ^bb3
   ^bb3(%s3_enable: i1):  // pred: ^bb2
-    "dummy.op"(%1) : (i54) -> ()
+    %bb3_1 = pipeline.src %1 : i54
+    "dummy.op"(%bb3_1) : (i54) -> ()
     pipeline.stage ^bb4
   ^bb4(%s4_enable: i1):  // pred: ^bb3
-    pipeline.return %0 : i128
+    %bb4_0 = pipeline.src %0 : i128
+    pipeline.return %bb4_0 : i128
   }
   hw.output
 }
diff --git a/test/Dialect/Pipeline/Transforms/schedule-linear-pipeline.mlir b/test/Dialect/Pipeline/Transforms/schedule-linear-pipeline.mlir
index 7b5373d20b..258dcf81d4 100644
--- a/test/Dialect/Pipeline/Transforms/schedule-linear-pipeline.mlir
+++ b/test/Dialect/Pipeline/Transforms/schedule-linear-pipeline.mlir
@@ -1,27 +1,32 @@
 // RUN: circt-opt --pass-pipeline='builtin.module(any(pipeline-schedule-linear))' %s | FileCheck %s
 
-// CHECK-LABEL:   hw.module @pipeline(
-// CHECK-SAME:          in %[[VAL_0:.*]] : i32, in %[[VAL_1:.*]] : i32, in %[[GO:.*]] : i1, in %[[CLOCK:.*]] : !seq.clock, in %[[RESET:.*]] : i1, out out : i32) {
-// CHECK:           %[[VAL_5:.*]], %[[VAL_6:.*]] = pipeline.scheduled(%[[VAL_7:.*]] : i32 = %[[VAL_0]], %[[VAL_8:.*]] : i32 = %[[VAL_1]]) clock(%[[CLOCK]]) reset(%[[RESET]]) go(%[[GO]]) entryEn(%[[VAL_9:.*]]) -> (out : i32) {
-// CHECK:             %[[VAL_10:.*]] = comb.add %[[VAL_7]], %[[VAL_8]] {ssp.operator_type = @add1} : i32
-// CHECK:             %[[VAL_11:.*]] = comb.add %[[VAL_8]], %[[VAL_7]] {ssp.operator_type = @add1} : i32
+// CHECK-LABEL:   hw.module @pipeline(in 
+// CHECK-SAME:      %[[ARG0:.*]] : i32, in %[[ARG1:.*]] : i32, in %[[GO:.*]] : i1, in %[[CLK:.*]] : !seq.clock, in %[[RST:.*]] : i1, out out : i32) {
+// CHECK:           %[[VAL_0:.*]], %[[VAL_1:.*]] = pipeline.scheduled(%[[VAL_2:.*]] : i32 = %[[ARG0]], %[[VAL_3:.*]] : i32 = %[[ARG1]]) clock(%[[CLK]]) reset(%[[RST]]) go(%[[GO]]) entryEn(%[[VAL_4:.*]])  -> (out : i32) {
+// CHECK:             %[[VAL_5:.*]] = comb.add %[[VAL_2]], %[[VAL_3]] {ssp.operator_type = @add1} : i32
+// CHECK:             %[[VAL_6:.*]] = comb.add %[[VAL_3]], %[[VAL_2]] {ssp.operator_type = @add1} : i32
 // CHECK:             pipeline.stage ^bb1
-// CHECK:           ^bb1(%[[VAL_12:.*]]: i1):
+// CHECK:           ^bb1(%[[VAL_7:.*]]: i1):
 // CHECK:             pipeline.stage ^bb2
-// CHECK:           ^bb2(%[[VAL_13:.*]]: i1):
-// CHECK:             %[[VAL_14:.*]] = comb.mul %[[VAL_7]], %[[VAL_10]] {ssp.operator_type = @mul2} : i32
+// CHECK:           ^bb2(%[[VAL_8:.*]]: i1):
+// CHECK:             %[[VAL_9:.*]] = pipeline.src %[[VAL_2]] : i32
+// CHECK:             %[[VAL_10:.*]] = pipeline.src %[[VAL_5]] : i32
+// CHECK:             %[[VAL_11:.*]] = comb.mul %[[VAL_9]], %[[VAL_10]] {ssp.operator_type = @mul2} : i32
 // CHECK:             pipeline.stage ^bb3
-// CHECK:           ^bb3(%[[VAL_15:.*]]: i1):
+// CHECK:           ^bb3(%[[VAL_12:.*]]: i1):
 // CHECK:             pipeline.stage ^bb4
-// CHECK:           ^bb4(%[[VAL_16:.*]]: i1):
+// CHECK:           ^bb4(%[[VAL_13:.*]]: i1):
 // CHECK:             pipeline.stage ^bb5
-// CHECK:           ^bb5(%[[VAL_17:.*]]: i1):
-// CHECK:             %[[VAL_18:.*]] = comb.add %[[VAL_14]], %[[VAL_11]] {ssp.operator_type = @add1} : i32
+// CHECK:           ^bb5(%[[VAL_14:.*]]: i1):
+// CHECK:             %[[VAL_15:.*]] = pipeline.src %[[VAL_11]] : i32
+// CHECK:             %[[VAL_16:.*]] = pipeline.src %[[VAL_6]] : i32
+// CHECK:             %[[VAL_17:.*]] = comb.add %[[VAL_15]], %[[VAL_16]] {ssp.operator_type = @add1} : i32
 // CHECK:             pipeline.stage ^bb6
-// CHECK:           ^bb6(%[[VAL_19:.*]]: i1):
+// CHECK:           ^bb6(%[[VAL_18:.*]]: i1):
 // CHECK:             pipeline.stage ^bb7
-// CHECK:           ^bb7(%[[VAL_20:.*]]: i1):
-// CHECK:             pipeline.return %[[VAL_18]] : i32
+// CHECK:           ^bb7(%[[VAL_19:.*]]: i1):
+// CHECK:             %[[VAL_20:.*]] = pipeline.src %[[VAL_17]] : i32
+// CHECK:             pipeline.return %[[VAL_20]] : i32
 // CHECK:           }
 // CHECK:           hw.output %[[VAL_21:.*]] : i32
 // CHECK:         }
diff --git a/test/Dialect/Pipeline/errors.mlir b/test/Dialect/Pipeline/errors.mlir
index bae8a7fae4..711fa3e78d 100644
--- a/test/Dialect/Pipeline/errors.mlir
+++ b/test/Dialect/Pipeline/errors.mlir
@@ -27,7 +27,8 @@ hw.module @unterminated(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !
     %0 = comb.add %a0, %a1 : i32
 
   ^bb1(%s1_enable : i1):
-    pipeline.stage ^bb2 regs(%0 : i32)
+    %bb1_0 = pipeline.src %0 : i32
+    pipeline.stage ^bb2 regs(%bb1_0 : i32)
 
   ^bb2(%s2_s0 : i32, %s2_enable : i1):
     pipeline.return %s2_s0 : i32
@@ -43,7 +44,7 @@ hw.module @mixed_stages(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !
     pipeline.stage ^bb1
 
   ^bb1(%s1_enable : i1):
-  // expected-error @+1 {{'pipeline.stage' op Pipeline is in register materialized mode - operand 0 is defined in a different stage, which is illegal.}}
+  // expected-error @+1 {{'pipeline.stage' op operand 0 is defined in a different stage. Value should have been passed through block arguments}}
     pipeline.stage ^bb2 regs(%0: i32)
 
   ^bb2(%s2_s0 : i32, %s2_enable : i1):
@@ -100,42 +101,13 @@ hw.module @earlyAccess(in %arg0: i32, in %arg1: i32, in %go: i1, in %clk : !seq.
     }
     pipeline.stage ^bb1
   ^bb1(%s1_enable : i1):
-    // expected-note@+1 {{use was operand 0. The result is available 1 stages later than this use.}}
-    pipeline.return %1 : i32
+    // expected-note@below {{use was operand 0. The result is available 1 stages later than this use.}}
+    %bb1_1 = pipeline.src %1 : i32
+    pipeline.return %bb1_1 : i32
   }
   hw.output %0#0 : i32
 }
 
-// -----
-
-// Test which verifies that the values referenced within the body of a
-// latency operation also adhere to the latency constraints.
-hw.module @earlyAccess2(in %arg0: i32, in %arg1: i32, in %go: i1, in %clk : !seq.clock, in %rst: i1, out out: i32) {
-  %0:2 = pipeline.scheduled(%a0 : i32 = %arg0) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32) {
-    // expected-error @+1 {{'pipeline.latency' op result 0 is used before it is available.}}
-    %1 = pipeline.latency 2 -> (i32) {
-      %res = comb.add %a0, %a0 : i32
-      pipeline.latency.return %res : i32
-    }
-    pipeline.stage ^bb1
-
-  ^bb1(%s1_enable : i1):
-    %2 = pipeline.latency 2 -> (i32) {
-      %c1_i32 = hw.constant 1 : i32
-      // expected-note@+1 {{use was operand 0. The result is available 1 stages later than this use.}}
-      %res2 = comb.add %1, %c1_i32 : i32
-      pipeline.latency.return %res2 : i32
-    }
-    pipeline.stage ^bb2
-
-  ^bb2(%s2_enable : i1):
-    pipeline.stage ^bb3
-
-  ^bb3(%s3_enable : i1):
-    pipeline.return %2 : i32
-  }
-  hw.output %0#0 : i32
-}
 
 // -----
 
@@ -194,7 +166,8 @@ hw.module @noStallSignalWithStallability(in %arg0 : i32, in %go : i1, in %clk :
    ^bb2(%s2_enable : i1):
     pipeline.stage ^bb3
    ^bb3(%s3_enable : i1):
-    pipeline.return %a0 : i32
+    %bb3_a0 = pipeline.src %a0 : i32
+    pipeline.return %bb3_a0 : i32
   }
   hw.output %0 : i32
 }
@@ -212,7 +185,41 @@ hw.module @incorrectStallabilitySize(in %arg0 : i32, in %go : i1, in %clk : !seq
    ^bb2(%s2_enable : i1):
     pipeline.stage ^bb3
    ^bb3(%s3_enable : i1):
-    pipeline.return %a0 : i32
+    %a0_bb3 = pipeline.src %a0 : i32
+    pipeline.return %a0_bb3 : i32
   }
   hw.output %0 : i32
 }
+
+// -----
+
+hw.module @unmaterialized_latency_with_missing_src(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
+  %0:2 = pipeline.scheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32){
+    %0 = comb.add %a0, %a1 : i32
+    pipeline.stage ^bb1
+   ^bb1(%s1_enable : i1):
+    %1 = pipeline.latency 1 -> (i32) {
+      // expected-error @below {{'comb.add' op operand 0 is defined in a different stage. Value should have been passed through a `pipeline.src` op}}
+      %2 = comb.add %0, %0 : i32
+      pipeline.latency.return %2 : i32
+    }
+    pipeline.stage ^bb2
+  ^bb2(%s2_enable : i1):
+    %bb2_1 = pipeline.src %1 : i32
+    pipeline.return %bb2_1 : i32
+  }
+  hw.output %0 : i32
+}
+
+// -----
+
+hw.module @invalid_pipeline_src(in %arg : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1) {
+  %res, %done = pipeline.scheduled(%a0 : i32 = %arg) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32) {
+     pipeline.stage ^bb1 regs(%a0 : i32)
+   ^bb1(%0 : i32, %s1_enable : i1):
+      // expected-error @below {{'pipeline.src' op Pipeline is in register materialized mode - pipeline.src operations are not allowed}}
+      %a0_bb1 = pipeline.src %a0 : i32
+      pipeline.return %0 : i32
+  }
+  hw.output
+}
\ No newline at end of file
diff --git a/test/Dialect/Pipeline/round-trip.mlir b/test/Dialect/Pipeline/round-trip.mlir
index b6f9bdb852..3f0a495221 100644
--- a/test/Dialect/Pipeline/round-trip.mlir
+++ b/test/Dialect/Pipeline/round-trip.mlir
@@ -1,15 +1,5 @@
-// RUN: circt-opt %s -verify-diagnostics | circt-opt -verify-diagnostics | FileCheck %s
+// RUN: circt-opt %s --verify-roundtrip
 
-// CHECK-LABEL:  hw.module @unscheduled1
-// CHECK-NEXT:    %out, %done = pipeline.unscheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable)  -> (out : i32) {
-// CHECK-NEXT:      %0 = pipeline.latency 2 -> (i32) {
-// CHECK-NEXT:        %1 = comb.add %a0, %a1 : i32
-// CHECK-NEXT:        pipeline.latency.return %1 : i32
-// CHECK-NEXT:      }
-// CHECK-NEXT:      pipeline.return %0 : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out : i32
-// CHECK-NEXT:  }
 hw.module @unscheduled1(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
   %0:2 = pipeline.unscheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32){
     %0 = pipeline.latency 2 -> (i32) {
@@ -21,36 +11,39 @@ hw.module @unscheduled1(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !
   hw.output %0 : i32
 }
 
-// CHECK-LABEL:  hw.module @scheduled1
-// CHECK-NEXT:    %out, %done = pipeline.scheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out : i32) {
-// CHECK-NEXT:      %0 = comb.add %a0, %a1 : i32
-// CHECK-NEXT:      pipeline.stage ^bb1
-// CHECK-NEXT:    ^bb1(%s1_enable: i1):  // pred: ^bb0
-// CHECK-NEXT:      pipeline.return %0 : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out : i32
-// CHECK-NEXT:  }
+
 hw.module @scheduled1(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
   %0:2 = pipeline.scheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32){
     %0 = comb.add %a0, %a1 : i32
     pipeline.stage ^bb1
 
    ^bb1(%s1_enable : i1):
-    pipeline.return %0 : i32
+    %bb1_0 = pipeline.src %0 : i32
+    pipeline.return %bb1_0 : i32
+  }
+  hw.output %0 : i32
+}
+
+hw.module @scheduled_with_latency(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
+  %0:2 = pipeline.scheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32){
+    %0 = comb.add %a0, %a1 : i32
+    pipeline.stage ^bb1
+   ^bb1(%s1_enable : i1):
+    %bb1_0 = pipeline.src %0 : i32
+    %1 = pipeline.latency 1 -> (i32) {
+      %2 = comb.add %bb1_0, %bb1_0 : i32
+      pipeline.latency.return %2 : i32
+    }
+    pipeline.stage ^bb2
+  ^bb2(%s2_enable : i1):
+    %bb2_1 = pipeline.src %1 : i32
+    pipeline.return %bb2_1 : i32
   }
   hw.output %0 : i32
 }
 
 
-// CHECK-LABEL:  hw.module @scheduled2
-// CHECK-NEXT:    %out, %done = pipeline.scheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out : i32) {
-// CHECK-NEXT:      %0 = comb.add %a0, %a1 : i32
-// CHECK-NEXT:      pipeline.stage ^bb1 regs(%0 : i32)
-// CHECK-NEXT:    ^bb1(%s1_reg0: i32, %s1_enable: i1):  // pred: ^bb0
-// CHECK-NEXT:      pipeline.return %s1_reg0 : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out : i32
-// CHECK-NEXT:  }
+
 hw.module @scheduled2(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
   %0:2 = pipeline.scheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32) {
     %0 = comb.add %a0, %a1 : i32
@@ -62,15 +55,7 @@ hw.module @scheduled2(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !se
   hw.output %0 : i32
 }
 
-// CHECK-LABEL:  hw.module @scheduledWithPassthrough
-// CHECK-NEXT:    %out0, %out1, %done = pipeline.scheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out0 : i32, out1 : i32) {
-// CHECK-NEXT:      %0 = comb.add %a0, %a1 : i32
-// CHECK-NEXT:      pipeline.stage ^bb1 regs(%0 : i32) pass(%a1 : i32)
-// CHECK-NEXT:    ^bb1(%s1_reg0: i32, %s1_pass0: i32, %s1_enable: i1):  // pred: ^bb0
-// CHECK-NEXT:      pipeline.return %s1_reg0, %s1_pass0 : i32, i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out0 : i32
-// CHECK-NEXT:  }
+
 hw.module @scheduledWithPassthrough(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
   %0:3 = pipeline.scheduled(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out0: i32, out1: i32) {
     %0 = comb.add %a0, %a1 : i32
@@ -82,12 +67,7 @@ hw.module @scheduledWithPassthrough(in %arg0 : i32, in %arg1 : i32, in %go : i1,
   hw.output %0#0 : i32
 }
 
-// CHECK-LABEL:  hw.module @withStall
-// CHECK-NEXT:    %out, %done = pipeline.scheduled(%a0 : i32 = %arg0) stall(%stall) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out : i32) {
-// CHECK-NEXT:      pipeline.return %a0 : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out : i32
-// CHECK-NEXT:  }
+
 hw.module @withStall(in %arg0 : i32, in %stall : i1, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
   %0:2 = pipeline.scheduled(%a0 : i32 = %arg0) stall(%stall) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32) {
     pipeline.return %a0 : i32
@@ -95,14 +75,7 @@ hw.module @withStall(in %arg0 : i32, in %stall : i1, in %go : i1, in %clk : !seq
   hw.output %0 : i32
 }
 
-// CHECK-LABEL:  hw.module @withMultipleRegs
-// CHECK-NEXT:    %out, %done = pipeline.scheduled(%a0 : i32 = %arg0) stall(%stall) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out : i32) {
-// CHECK-NEXT:      pipeline.stage ^bb1 regs(%a0 : i32, %a0 : i32)
-// CHECK-NEXT:    ^bb1(%s1_reg0: i32, %s1_reg1: i32, %s1_enable: i1):  // pred: ^bb0
-// CHECK-NEXT:      pipeline.return %s1_reg0 : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out : i32
-// CHECK-NEXT:  }
+
 hw.module @withMultipleRegs(in %arg0 : i32, in %stall : i1, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
   %0:2 = pipeline.scheduled(%a0 : i32 = %arg0) stall(%stall) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32) {
     pipeline.stage ^bb1 regs(%a0 : i32, %a0 : i32)
@@ -113,17 +86,7 @@ hw.module @withMultipleRegs(in %arg0 : i32, in %stall : i1, in %go : i1, in %clk
   hw.output %0 : i32
 }
 
-// CHECK-LABEL:  hw.module @withClockGates
-// CHECK-NEXT:    %out, %done = pipeline.scheduled(%a0 : i32 = %arg0) stall(%stall) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out : i32) {
-// CHECK-NEXT:      %true = hw.constant true
-// CHECK-NEXT:      %true_0 = hw.constant true
-// CHECK-NEXT:      %true_1 = hw.constant true
-// CHECK-NEXT:      pipeline.stage ^bb1 regs(%a0 : i32 gated by [%true], %a0 : i32, %a0 : i32 gated by [%true_0, %true_1])
-// CHECK-NEXT:    ^bb1(%s1_reg0: i32, %s1_reg1: i32, %s1_reg2: i32, %s1_enable: i1):  // pred: ^bb0
-// CHECK-NEXT:      pipeline.return %s1_reg0 : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out : i32
-// CHECK-NEXT:  }
+
 hw.module @withClockGates(in %arg0 : i32, in %stall : i1, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
   %0:2 = pipeline.scheduled(%a0 : i32 = %arg0) stall(%stall) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32) {
     %true1 = hw.constant true
@@ -137,15 +100,7 @@ hw.module @withClockGates(in %arg0 : i32, in %stall : i1, in %go : i1, in %clk :
   hw.output %0 : i32
 }
 
-// CHECK-LABEL:  hw.module @withNames
-// CHECK-NEXT:    %out, %done = pipeline.scheduled "MyPipeline"(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out : i32) {
-// CHECK-NEXT:      %0 = comb.add %a0, %a1 : i32
-// CHECK-NEXT:      pipeline.stage ^bb1 regs("myAdd" = %0 : i32, %0 : i32, "myOtherAdd" = %0 : i32)
-// CHECK-NEXT:    ^bb1(%myAdd: i32, %s1_reg1: i32, %myOtherAdd: i32, %s1_enable: i1):  // pred: ^bb0
-// CHECK-NEXT:      pipeline.return %myAdd : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out : i32
-// CHECK-NEXT:  }
+
 hw.module @withNames(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, out out: i32) {
   %0:2 = pipeline.scheduled "MyPipeline"(%a0 : i32 = %arg0, %a1 : i32 = %arg1) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) -> (out: i32){
     %0 = comb.add %a0, %a1 : i32
@@ -157,8 +112,6 @@ hw.module @withNames(in %arg0 : i32, in %arg1 : i32, in %go : i1, in %clk : !seq
   hw.output %0 : i32
 }
 
-// CHECK-LABEL:   hw.module @withStallability
-// CHECK:           %out, %done = pipeline.scheduled "MyPipeline"(%a0 : i32 = %arg0) stall(%stall) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable) {stallability = [true, false, true]} -> (out : i32)
 hw.module @withStallability(in %arg0 : i32, in %go : i1, in %clk : !seq.clock, in %rst : i1, in %stall : i1, out out: i32) {
   %0:2 = pipeline.scheduled "MyPipeline"(%a0 : i32 = %arg0) stall(%stall) clock(%clk) reset(%rst) go(%go) entryEn(%s0_enable)
     {stallability = [true, false, true]}
@@ -169,27 +122,19 @@ hw.module @withStallability(in %arg0 : i32, in %go : i1, in %clk : !seq.clock, i
    ^bb2(%s2_enable : i1):
     pipeline.stage ^bb3
    ^bb3(%s3_enable : i1):
-    pipeline.return %a0 : i32
+    %bb3_0 = pipeline.src %a0 : i32
+    pipeline.return %bb3_0 : i32
   }
   hw.output %0 : i32
 }
 
-// CHECK-LABEL:  hw.module @withoutReset(in %arg0 : i32, in %stall : i1, in %go : i1, in %clk : !seq.clock, out out : i32) {
-// CHECK-NEXT:    %out, %done = pipeline.scheduled(%a0 : i32 = %arg0) clock(%clk) go(%go) entryEn(%s0_enable)  -> (out : i32) {
-// CHECK-NEXT:      pipeline.stage ^bb1  
-// CHECK-NEXT:    ^bb1(%s1_enable: i1):  // pred: ^bb0
-// CHECK-NEXT:      pipeline.return %a0 : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    %out_0, %done_1 = pipeline.unscheduled(%a0 : i32 = %arg0) stall(%stall) clock(%clk) go(%go) entryEn(%s0_enable)  -> (out : i32) {
-// CHECK-NEXT:      pipeline.return %a0 : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hw.output %out : i32
-// CHECK-NEXT:  }
+
 hw.module @withoutReset(in %arg0 : i32, in %stall : i1, in %go : i1, in %clk : !seq.clock, out out: i32) {
   %0:2 = pipeline.scheduled(%a0 : i32 = %arg0) clock(%clk) go(%go) entryEn(%s0_enable) -> (out: i32) {
     pipeline.stage ^bb1
    ^bb1(%s1_enable : i1):
-    pipeline.return %a0 : i32
+    %bb1_0 = pipeline.src %a0 : i32
+    pipeline.return %bb1_0 : i32
   }
 
   %1:2 = pipeline.unscheduled (%a0 : i32 = %arg0) stall (%stall) clock (%clk) go (%go) entryEn (%s0_enable) -> (out: i32) {