[LoopSchedule] Move PipelineWhile and Related Ops from Pipeline to LoopSchedule (#4947)

2023-04-18 11:56:07 -04:00 · 2023-04-18 11:56:07 -04:00 · 80c76afb86
parent 3251bdb2b9
commit 80c76afb86
23 changed files with 1169 additions and 1065 deletions
--- a/docs/Dialects/LoopSchedule/LoopSchedule.md
+++ b/docs/Dialects/LoopSchedule/LoopSchedule.md
@ -9,7 +9,7 @@ Rationale docs](https://mlir.llvm.org/docs/Rationale/).
 The `loopschedule` dialect provides a collection of ops to represent software-like loops
 after scheduling. There are currently two main kinds of loops that can be represented:
 pipelined and sequential. Pipelined loops allow multiple iterations of the loop to be
-in-flight at a time and have an associated initiation interval (II) to specify the number
+in-flight at a time and have an associated initiation interval (`II`) to specify the number
 of cycles between the start of successive loop iterations. In contrast, sequential loops
 are guaranteed to only have one iteration in-flight at any given time.

@ -17,6 +17,91 @@ A primary goal of the `loopschedule` dialect, as opposed to many other High-Leve
 (HLS) representations, is to maintain the structure of loops after scheduling. As such, the
 `loopschedule` ops are inspired by the `scf` and `affine` dialect ops.

+## Pipelined Loops
+
+Pipelined loops are represented with the `loopschedule.pipeline` op. A `pipeline`
+loop resembles a `while` loop in the `scf` dialect, but the body must contain only 
+`loopschedule.pipeline.stage` and `loopschedule.terminator` ops. To have a better 
+understanding of how  `loopschedule.pipeline` works, we will look at the following 
+example:
+
+```
+func.func @test1(%arg0: memref<10xi32>) -> i32 {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %c0_i32 = arith.constant 0 : i32
+  %0 = loopschedule.pipeline II = 1 iter_args(%arg1 = %c0, %arg2 = %c0_i32) : (index, i32) -> i32 {
+    %1 = arith.cmpi ult, %arg1, %c10 : index
+    loopschedule.register %1 : i1
+  } do {
+    %1:2 = loopschedule.pipeline.stage start = 0 {
+      %3 = arith.addi %arg1, %c1 : index
+      %4 = memref.load %arg0[%arg1] : memref<10xi32>
+      loopschedule.register %3, %4 : index, i32
+    } : index, i32
+    %2 = loopschedule.pipeline.stage start = 1 {
+      %3 = arith.addi %1#1, %arg2 : i32
+      pipeline.register %3 : i32
+    } : i32
+    loopschedule.terminator iter_args(%1#0, %2), results(%2) : (index, i32) -> i32
+  }
+  return %0 : i32
+}
+```
+
+A `pipeline` op first defines the initial values for the `iter_args`. `iter_args` are values that will
+be passed back to the first stage after the last stage of the pipeline. The pipeline also defines a
+specific, static `II`. Each pipeline stage in the `do` block represents a series of ops run in parallel.
+
+Values are registered at the end of a stage and passed out as results for future pipeline stages to
+use. Each pipeline stage must have a defined start time, which is the number of cycles between the 
+start of the pipeline and when the first valid data will be available as input to that stage.
+
+Finally, the terminator is called with the `iter_args` for the next iteration and the result values
+that will be returned when the pipeline completes. Even though the terminator is located at the
+end of the loop body, its values are passed back to a previous stage whenever needed. We do not
+need to wait for an entire iteration to finish before `iter_args` become valid for the next iteration.
+
+Multi-cycle and pipelined ops can also be supported in `pipeline` loops. In the following example,
+assume the multiply op is bound to a 3-stage pipelined multiplier:
+
+```
+func.func @test1(%arg0: memref<10xi32>, %arg1: memref<10xi32>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %c1_i32 = arith.constant 1 : i32
+  loopschedule.pipeline II = 1 iter_args(%arg2 = %c0) : (index, i32) -> () {
+    %1 = arith.cmpi ult, %arg1, %c10 : index
+    loopschedule.register %1 : i1
+  } do {
+    %1:2 = loopschedule.pipeline.stage start = 0 {
+      %3 = arith.addi %arg1, %c1 : index
+      %4 = memref.load %arg0[%arg2] : memref<10xi32>
+      loopschedule.register %3, %4 : index, i32
+    } : index, i32
+    %2:2 = loopschedule.pipeline.stage start = 1 {
+      %3 = arith.muli %1#1, %c1_i32 : i32
+      pipeline.register %3, %1#0 : i32
+    } : i32
+    loopschedule.pipeline.stage start = 4 {
+      memref.store %2#0, %arg0[%2#1] : i32
+      pipeline.register
+    } : i32
+    loopschedule.terminator iter_args(%1#0), results() : (index, i32) -> ()
+  }
+  return
+}
+```
+
+Here, the `II` is still 1 because new values can be introduced to the multiplier every cycle. The last
+stage is delayed by 3 cycles because of the 3 cycle latency of the multiplier. The `pipeline` op is 
+currently tightly coupled to the lowering implementation used, as the latency of operators is not 
+represented in the IR, but rather an implicit assumption made when lowering later. The scheduling 
+problem is constructed with these implicit operator latencies in mind. This coupling can be addressed 
+in the future with a proper operator library to maintain explicit operator latencies in the IR.
+
 ## Status

-Initial dialect addition, more documentation and rationale to come as ops are added.
+Added pipeline loop representation, more documentation and rationale to come as ops are added.
--- a/include/circt/Conversion/AffineToLoopSchedule.h
+++ b/include/circt/Conversion/AffineToLoopSchedule.h
@ -1,4 +1,5 @@
-//===- AffineToPipeline.h -------------------------------------------------===//
+//===- AffineToLoopSchedule.h
+//-------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@ -6,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//

-#ifndef CIRCT_CONVERSION_AFFINETOPIPELINE_H_
-#define CIRCT_CONVERSION_AFFINETOPIPELINE_H_
+#ifndef CIRCT_CONVERSION_AFFINETOLOOPSCHEDULE_H_
+#define CIRCT_CONVERSION_AFFINETOLOOPSCHEDULE_H_

 #include <memory>

@ -16,7 +17,7 @@ class Pass;
 } // namespace mlir

 namespace circt {
-std::unique_ptr<mlir::Pass> createAffineToPipeline();
+std::unique_ptr<mlir::Pass> createAffineToLoopSchedule();
 } // namespace circt

-#endif // CIRCT_CONVERSION_AFFINETOPIPELINE_H_
+#endif // CIRCT_CONVERSION_AFFINETOLOOPSCHEDULE_H_
--- a/include/circt/Conversion/LoopScheduleToCalyx.h
+++ b/include/circt/Conversion/LoopScheduleToCalyx.h
@ -0,0 +1,28 @@
+//===- LoopScheduleToCalyx.h - LoopSchedule to Calyx pass entry point -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes that expose the LoopScheduleToCalyx pass
+// constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CIRCT_CONVERSION_LOOPSCHEDULETOCALYX_H
+#define CIRCT_CONVERSION_LOOPSCHEDULETOCALYX_H
+
+#include "circt/Dialect/LoopSchedule/LoopScheduleDialect.h"
+#include "circt/Support/LLVM.h"
+#include <memory>
+
+namespace circt {
+
+/// Create a LoopSchedule to Calyx conversion pass.
+std::unique_ptr<OperationPass<ModuleOp>> createLoopScheduleToCalyxPass();
+
+} // namespace circt
+
+#endif // CIRCT_CONVERSION_LOOPSCHEDULETOCALYX_H
--- a/include/circt/Conversion/Passes.h
+++ b/include/circt/Conversion/Passes.h
@ -13,7 +13,7 @@
 #ifndef CIRCT_CONVERSION_PASSES_H
 #define CIRCT_CONVERSION_PASSES_H

-#include "circt/Conversion/AffineToPipeline.h"
+#include "circt/Conversion/AffineToLoopSchedule.h"
 #include "circt/Conversion/ArcToLLVM.h"
 #include "circt/Conversion/CalyxToFSM.h"
 #include "circt/Conversion/CalyxToHW.h"
@ -30,8 +30,8 @@
 #include "circt/Conversion/HandshakeToFIRRTL.h"
 #include "circt/Conversion/HandshakeToHW.h"
 #include "circt/Conversion/LLHDToLLVM.h"
+#include "circt/Conversion/LoopScheduleToCalyx.h"
 #include "circt/Conversion/MooreToCore.h"
-#include "circt/Conversion/PipelineToCalyx.h"
 #include "circt/Conversion/PipelineToHW.h"
 #include "circt/Conversion/SCFToCalyx.h"
 #include "circt/Conversion/StandardToHandshake.h"
--- a/include/circt/Conversion/Passes.td
+++ b/include/circt/Conversion/Passes.td
@ -19,16 +19,16 @@ include "mlir/Pass/PassBase.td"
 // AffineToPipeline
 //===----------------------------------------------------------------------===//

-def AffineToPipeline : Pass<"convert-affine-to-pipeline", "mlir::func::FuncOp"> {
-  let summary = "Convert Affine dialect to Pipeline pipelines";
+def AffineToLoopSchedule : Pass<"convert-affine-to-loopschedule", "mlir::func::FuncOp"> {
+  let summary = "Convert Affine dialect to LoopSchedule scheduled loops";
  let description = [{
    This pass analyzes Affine loops and control flow, creates a Scheduling
    problem using the Calyx operator library, solves the problem, and lowers
-    the loops to a Pipeline pipeline.
+    the loops to a LoopSchedule.
  }];
-  let constructor = "circt::createAffineToPipeline()";
+  let constructor = "circt::createAffineToLoopSchedule()";
  let dependentDialects = [
-    "circt::pipeline::PipelineDialect",
+    "circt::loopschedule::LoopScheduleDialect",
    "mlir::arith::ArithDialect",
    "mlir::cf::ControlFlowDialect",
    "mlir::memref::MemRefDialect",
@ -162,17 +162,17 @@ def SCFToCalyx : Pass<"lower-scf-to-calyx", "mlir::ModuleOp"> {
 }

 //===----------------------------------------------------------------------===//
-// PipelineToCalyx
+// LoopScheduleToCalyx
 //===----------------------------------------------------------------------===//

-def PipelineToCalyx : Pass<"lower-static-logic-to-calyx", "mlir::ModuleOp"> {
-  let summary = "Lower Pipeline to Calyx";
+def LoopScheduleToCalyx : Pass<"lower-loopschedule-to-calyx", "mlir::ModuleOp"> {
+  let summary = "Lower LoopSchedule to Calyx";
  let description = [{
-    This pass lowers Pipeline to Calyx.
+    This pass lowers LoopSchedule to Calyx.
  }];
-  let constructor = "circt::createPipelineToCalyxPass()";
+  let constructor = "circt::createLoopScheduleToCalyxPass()";
  let dependentDialects = [
-    "calyx::CalyxDialect", "::mlir::scf::SCFDialect", "hw::HWDialect",
+    "calyx::CalyxDialect", "loopschedule::LoopScheduleDialect", "hw::HWDialect",
    "comb::CombDialect"
  ];
  let options = [
--- a/include/circt/Conversion/PipelineToCalyx.h
+++ b/include/circt/Conversion/PipelineToCalyx.h
@ -1,28 +0,0 @@
-//===- PipelineToCalyx.h - Pipeline to Calyx pass entry point -----------*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This header file defines prototypes that expose the PipelineToCalyx pass
-// constructor.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CIRCT_CONVERSION_PIPELINETOCALYX_H
-#define CIRCT_CONVERSION_PIPELINETOCALYX_H
-
-#include "circt/Support/LLVM.h"
-#include "mlir/Dialect/SCF/IR/SCF.h"
-#include <memory>
-
-namespace circt {
-
-/// Create a Pipeline to Calyx conversion pass.
-std::unique_ptr<OperationPass<ModuleOp>> createPipelineToCalyxPass();
-
-} // namespace circt
-
-#endif // CIRCT_CONVERSION_PIPELINETOCALYX_H
--- a/include/circt/Dialect/LoopSchedule/LoopScheduleOps.td
+++ b/include/circt/Dialect/LoopSchedule/LoopScheduleOps.td
@ -21,4 +21,186 @@ include "circt/Dialect/LoopSchedule/LoopSchedule.td"
 class LoopScheduleOp<string mnemonic, list<Trait> traits = []> :
  Op<LoopSchedule_Dialect, mnemonic, traits>;

+def LoopSchedulePipelineOp : LoopScheduleOp<"pipeline", []> {
+  let summary = "LoopSchedule dialect pipeline-loop.";
+  let description = [{
+    The `loopschedule.pipeline` operation represents a statically scheduled
+    pipeline stucture that executes while a condition is true. For more details,
+    see: https://llvm.discourse.group/t/rfc-representing-pipelined-loops/4171.
+
+    A pipeline captures the result of scheduling, and is not generally safe to
+    transform, besides lowering to hardware dialects. For more discussion about
+    relaxing this, see: https://github.com/llvm/circt/issues/2204.
+
+    This is the top-level operation representing a high-level pipeline. It is
+    not isolated from above, but could be if this is helpful. A pipeline
+    contains two regions: `condition` and `stages`.
+
+    The pipeline may accept an optional `iter_args`, similar to the SCF dialect,
+    for representing loop-carried values like induction variables or reductions.
+    When the pipeline starts execution, the registers indicated as `iter_args`
+    by `pipeline.terminator` should be initialized to the initial
+    values specified in the `iter_args` section here. The `iter_args` relate to
+    the initiation interval of the loop. The maximum distance in stages between
+    where an `iter_arg` is used and where that `iter_arg` is registered must be
+    less than the loop's initiation interval. For example, with II=1, each
+    `iter_arg` must be used and registered in the same stage.
+
+    The single-block `condition` region dictates the condition under which the
+    pipeline should execute. It has a `register` terminator, and the
+    pipeline initiates new iterations while the registered value is `true : i1`.
+    It may access SSA values dominating the pipeline, as well as `iter_args`,
+    which are block arguments. The body of the block may only contain
+    "combinational" operations, which are currently defined to be simple
+    arithmetic, comparisons, and selects from the `Standard` dialect.
+
+    The single-block `stages` region wraps `loopschedule.pipeline.stage`
+    operations. It has a `loopschedule.terminator` terminator, which can
+    both return results from the pipeline and register `iter_args`. Stages may
+    access SSA values dominating the pipeline, as well as `iter_args`, which are
+    block arguments.
+  }];
+
+  let arguments = (ins
+    I64Attr:$II,
+    OptionalAttr<I64Attr>:$tripCount,
+    Variadic<AnyType>:$iterArgs
+  );
+
+  let results = (outs
+    Variadic<AnyType>:$results
+  );
+
+  let regions = (region
+    SizedRegion<1>:$condition,
+    SizedRegion<1>:$stages
+  );
+
+  let hasCustomAssemblyFormat = 1;
+
+  let hasVerifier = 1;
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins "mlir::TypeRange":$resultTypes, "mlir::IntegerAttr":$II,
+                   "std::optional<IntegerAttr>": $tripCount,
+                   "mlir::ValueRange":$iterArgs)>
+  ];
+
+  let extraClassDeclaration = [{
+    Block &getCondBlock() { return getCondition().front(); }
+    Block &getStagesBlock() { return getStages().front(); }
+  }];
+}
+
+def LoopSchedulePipelineStageOp : LoopScheduleOp<"pipeline.stage",
+    [HasParent<"LoopSchedulePipelineOp">]> {
+  let summary = "LoopSchedule dialect pipeline stage.";
+  let description = [{
+    This operation has a single-block region which dictates the operations that
+    may occur concurrently.
+
+    It has a `start` attribute, which indicates the start cycle for this stage.
+
+    It may have an optional `when` predicate, which supports conditional
+    execution for each stage. This is in addition to the `condition` region that
+    controls the execution of the whole pipeline. A stage with a `when`
+    predicate should only execute when the predicate is `true : i1`, and push a
+    bubble through the pipeline otherwise.
+
+    It has a `register` terminator, which passes the concurrently
+    computed values forward to the next stage.
+
+    Any stage may access `iter_args`. If a stage accesses an `iter_arg` after
+    the stage in which it is defined, it is up to lowering passes to preserve
+    this value until the last stage that needs it.
+
+    Other than `iter_args`, stages may only access SSA values dominating the
+    pipeline or SSA values computed by any previous stage. This ensures the
+    stages capture the coarse-grained schedule of the pipeline and how values
+    feed forward and backward.
+  }];
+
+  let arguments = (ins
+    SI64Attr:$start,
+    Optional<I1>:$when
+  );
+
+  let results = (outs
+    Variadic<AnyType>:$results
+  );
+
+  let regions = (region
+    SizedRegion<1>:$body
+  );
+
+  let assemblyFormat = [{
+    `start` `=` $start (`when` $when^)? $body (`:` qualified(type($results))^)? attr-dict
+  }];
+
+  let hasVerifier = 1;
+
+  let skipDefaultBuilders = 1;
+  let builders = [
+    OpBuilder<(ins "mlir::TypeRange":$resultTypes, "mlir::IntegerAttr":$start)>
+  ];
+
+  let extraClassDeclaration = [{
+    Block &getBodyBlock() { return getBody().front(); }
+    unsigned getStageNumber();
+  }];
+}
+
+def LoopScheduleRegisterOp : LoopScheduleOp<"register",
+    [ParentOneOf<["LoopSchedulePipelineOp", "LoopSchedulePipelineStageOp"]>, Terminator]> {
+  let summary = "LoopSchedule dialect loop register.";
+  let description = [{
+    The `loopschedule.register` terminates a pipeline stage and
+    "registers" the values specified as operands. These values become the
+    results of the stage.
+  }];
+
+  let arguments = (ins
+    Variadic<AnyType>:$operands
+  );
+
+  let assemblyFormat = [{
+    $operands (`:` qualified(type($operands))^)? attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
+def LoopScheduleTerminatorOp : LoopScheduleOp<"terminator",
+    [HasParent<"LoopSchedulePipelineOp">, Terminator, AttrSizedOperandSegments]> {
+  let summary = "LoopSchedule dialect terminator.";
+  let description = [{
+    The `loopschedule.terminator` operation represents the terminator of
+    a `loopschedule.pipeline`.
+
+    The `results` section accepts a variadic list of values which become the
+    pipeline’s return values. These must be results of a stage, and their types
+    must match the pipeline's return types. The results need not be defined in
+    the final stage, and it is up to lowering passes to preserve these values
+    until the final stage is complete.
+
+    The `iter_args` section accepts a variadic list of values which become the
+    next iteration’s `iter_args`. These may be the results of any stage, and
+    their types must match the pipeline's `iter_args` types.
+  }];
+
+  let arguments = (ins
+    Variadic<AnyType>:$iter_args,
+    Variadic<AnyType>:$results
+  );
+
+  let assemblyFormat = [{
+    `iter_args` `(` $iter_args `)` `,`
+    `results` `(` $results `)` `:`
+    functional-type($iter_args, $results) attr-dict
+  }];
+
+  let hasVerifier = 1;
+}
+
 #endif // CIRCIT_LOOP_SCHEDULE_OPS_TD
--- a/include/circt/Dialect/Pipeline/Pipeline.td
+++ b/include/circt/Dialect/Pipeline/Pipeline.td
@ -204,186 +204,4 @@ def ReturnOp : Op<Pipeline_Dialect, "return", [Terminator]> {
  let assemblyFormat = [{ ($outputs^)? `valid` $valid attr-dict (`:` type($outputs)^)? }];
 }

-def PipelineWhileOp : Op<Pipeline_Dialect, "while", []> {
-  let summary = "Pipeline dialect pipeline while-loop.";
-  let description = [{
-    The `pipeline.while` operation represents a statically scheduled
-    pipeline stucture that executes while a condition is true. For more details,
-    see: https://llvm.discourse.group/t/rfc-representing-pipelined-loops/4171.
-
-    A pipeline captures the result of scheduling, and is not generally safe to
-    transform, besides lowering to hardware dialects. For more discussion about
-    relaxing this, see: https://github.com/llvm/circt/issues/2204.
-
-    This is the top-level operation representing a high-level pipeline. It is
-    not isolated from above, but could be if this is helpful. A pipeline
-    contains two regions: `condition` and `stages`.
-
-    The pipeline may accept an optional `iter_args`, similar to the SCF dialect,
-    for representing loop-carried values like induction variables or reductions.
-    When the pipeline starts execution, the registers indicated as `iter_args`
-    by `pipeline.terminator` should be initialized to the initial
-    values specified in the `iter_args` section here. The `iter_args` relate to
-    the initiation interval of the loop. The maximum distance in stages between
-    where an `iter_arg` is used and where that `iter_arg` is registered must be
-    less than the loop's initiation interval. For example, with II=1, each
-    `iter_arg` must be used and registered in the same stage.
-
-    The single-block `condition` region dictates the condition under which the
-    pipeline should execute. It has a `register` terminator, and the
-    pipeline initiates new iterations while the registered value is `true : i1`.
-    It may access SSA values dominating the pipeline, as well as `iter_args`,
-    which are block arguments. The body of the block may only contain
-    "combinational" operations, which are currently defined to be simple
-    arithmetic, comparisons, and selects from the `Standard` dialect.
-
-    The single-block `stages` region wraps `pipeline.stage`
-    operations. It has a `pipeline.terminator` terminator, which can
-    both return results from the pipeline and register `iter_args`. Stages may
-    access SSA values dominating the pipeline, as well as `iter_args`, which are
-    block arguments.
-  }];
-
-  let arguments = (ins
-    I64Attr:$II,
-    OptionalAttr<I64Attr>:$tripCount,
-    Variadic<AnyType>:$iterArgs
-  );
-
-  let results = (outs
-    Variadic<AnyType>:$results
-  );
-
-  let regions = (region
-    SizedRegion<1>:$condition,
-    SizedRegion<1>:$stages
-  );
-
-  let hasCustomAssemblyFormat = 1;
-
-  let hasVerifier = 1;
-
-  let skipDefaultBuilders = 1;
-  let builders = [
-    OpBuilder<(ins "mlir::TypeRange":$resultTypes, "mlir::IntegerAttr":$II,
-                   "std::optional<IntegerAttr>": $tripCount,
-                   "mlir::ValueRange":$iterArgs)>
-  ];
-
-  let extraClassDeclaration = [{
-    Block &getCondBlock() { return getCondition().front(); }
-    Block &getStagesBlock() { return getStages().front(); }
-  }];
-}
-
-def PipelineWhileStageOp : Op<Pipeline_Dialect, "while.stage",
-    [HasParent<"PipelineWhileOp">]> {
-  let summary = "Pipeline dialect while pipeline stage.";
-  let description = [{
-    This operation has a single-block region which dictates the operations that
-    may occur concurrently.
-
-    It has a `start` attribute, which indicates the start cycle for this stage.
-
-    It may have an optional `when` predicate, which supports conditional
-    execution for each stage. This is in addition to the `condition` region that
-    controls the execution of the whole pipeline. A stage with a `when`
-    predicate should only execute when the predicate is `true : i1`, and push a
-    bubble through the pipeline otherwise.
-
-    It has a `register` terminator, which passes the concurrently
-    computed values forward to the next stage.
-
-    Any stage may access `iter_args`. If a stage accesses an `iter_arg` after
-    the stage in which it is defined, it is up to lowering passes to preserve
-    this value until the last stage that needs it.
-
-    Other than `iter_args`, stages may only access SSA values dominating the
-    pipeline or SSA values computed by any previous stage. This ensures the
-    stages capture the coarse-grained schedule of the pipeline and how values
-    feed forward and backward.
-  }];
-
-  let arguments = (ins
-    SI64Attr:$start,
-    Optional<I1>:$when
-  );
-
-  let results = (outs
-    Variadic<AnyType>:$results
-  );
-
-  let regions = (region
-    SizedRegion<1>:$body
-  );
-
-  let assemblyFormat = [{
-    `start` `=` $start (`when` $when^)? $body (`:` qualified(type($results))^)? attr-dict
-  }];
-
-  let hasVerifier = 1;
-
-  let skipDefaultBuilders = 1;
-  let builders = [
-    OpBuilder<(ins "mlir::TypeRange":$resultTypes, "mlir::IntegerAttr":$start)>
-  ];
-
-  let extraClassDeclaration = [{
-    Block &getBodyBlock() { return getBody().front(); }
-    unsigned getStageNumber();
-  }];
-}
-
-def PipelineRegisterOp : Op<Pipeline_Dialect, "register",
-    [ParentOneOf<["PipelineWhileOp", "PipelineWhileStageOp"]>, Terminator]> {
-  let summary = "Pipeline dialect pipeline register.";
-  let description = [{
-    The `pipeline.register` terminates a pipeline stage and
-    "registers" the values specified as operands. These values become the
-    results of the stage.
-  }];
-
-  let arguments = (ins
-    Variadic<AnyType>:$operands
-  );
-
-  let assemblyFormat = [{
-    $operands (`:` qualified(type($operands))^)? attr-dict
-  }];
-
-  let hasVerifier = 1;
-}
-
-def PipelineTerminatorOp : Op<Pipeline_Dialect, "terminator",
-    [HasParent<"PipelineWhileOp">, Terminator, AttrSizedOperandSegments]> {
-  let summary = "Pipeline dialect pipeline terminator.";
-  let description = [{
-    The `pipeline.terminator` operation represents the terminator of
-    a `pipeline.while`.
-
-    The `results` section accepts a variadic list of values which become the
-    pipeline’s return values. These must be results of a stage, and their types
-    must match the pipeline's return types. The results need not be defined in
-    the final stage, and it is up to lowering passes to preserve these values
-    until the final stage is complete.
-
-    The `iter_args` section accepts a variadic list of values which become the
-    next iteration’s `iter_args`. These may be the results of any stage, and
-    their types must match the pipeline's `iter_args` types.
-  }];
-
-  let arguments = (ins
-    Variadic<AnyType>:$iter_args,
-    Variadic<AnyType>:$results
-  );
-
-  let assemblyFormat = [{
-    `iter_args` `(` $iter_args `)` `,`
-    `results` `(` $results `)` `:`
-    functional-type($iter_args, $results) attr-dict
-  }];
-
-  let hasVerifier = 1;
-}
-
 #endif // PIPELINE_OPS
--- a/lib/Conversion/AffineToLoopSchedule/AffineToLoopSchedule.cpp
+++ b/lib/Conversion/AffineToLoopSchedule/AffineToLoopSchedule.cpp
@ -1,4 +1,4 @@
-//===- AffineToStaticlogic.cpp --------------------------------------------===//
+//===- AffineToLoopSchedule.cpp--------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@ -6,11 +6,11 @@
 //
 //===----------------------------------------------------------------------===//

-#include "circt/Conversion/AffineToPipeline.h"
+#include "circt/Conversion/AffineToLoopSchedule.h"
 #include "../PassDetail.h"
 #include "circt/Analysis/DependenceAnalysis.h"
 #include "circt/Analysis/SchedulingAnalysis.h"
-#include "circt/Dialect/Pipeline/Pipeline.h"
+#include "circt/Dialect/LoopSchedule/LoopScheduleOps.h"
 #include "circt/Scheduling/Algorithms.h"
 #include "circt/Scheduling/Problems.h"
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
@ -36,7 +36,7 @@
 #include <cassert>
 #include <limits>

-#define DEBUG_TYPE "affine-to-pipeline"
+#define DEBUG_TYPE "affine-to-loopschedule"

 using namespace mlir;
 using namespace mlir::arith;
@ -46,11 +46,12 @@ using namespace mlir::func;
 using namespace circt;
 using namespace circt::analysis;
 using namespace circt::scheduling;
-using namespace circt::pipeline;
+using namespace circt::loopschedule;

 namespace {

-struct AffineToPipeline : public AffineToPipelineBase<AffineToPipeline> {
+struct AffineToLoopSchedule
+    : public AffineToLoopScheduleBase<AffineToLoopSchedule> {
  void runOnOperation() override;

 private:
@ -61,15 +62,16 @@ private:
                                      ModuloProblem &problem);
  LogicalResult solveSchedulingProblem(SmallVectorImpl<AffineForOp> &loopNest,
                                       ModuloProblem &problem);
-  LogicalResult createPipelinePipeline(SmallVectorImpl<AffineForOp> &loopNest,
-                                       ModuloProblem &problem);
+  LogicalResult
+  createLoopSchedulePipeline(SmallVectorImpl<AffineForOp> &loopNest,
+                             ModuloProblem &problem);

  CyclicSchedulingAnalysis *schedulingAnalysis;
 };

 } // namespace

-ModuloProblem AffineToPipeline::getModuloProblem(CyclicProblem &prob) {
+ModuloProblem AffineToLoopSchedule::getModuloProblem(CyclicProblem &prob) {
  auto modProb = ModuloProblem::get(prob.getContainingOp());
  for (auto *op : prob.getOperations()) {
    auto opr = prob.getLinkedOperatorType(op);
@ -98,7 +100,7 @@ ModuloProblem AffineToPipeline::getModuloProblem(CyclicProblem &prob) {
  return modProb;
 }

-void AffineToPipeline::runOnOperation() {
+void AffineToLoopSchedule::runOnOperation() {
  // Get dependence analysis for the whole function.
  auto dependenceAnalysis = getAnalysis<MemoryDependenceAnalysis>();

@ -131,7 +133,7 @@ void AffineToPipeline::runOnOperation() {
      return signalPassFailure();

    // Convert the IR.
-    if (failed(createPipelinePipeline(nestedLoops, moduloProblem)))
+    if (failed(createLoopSchedulePipeline(nestedLoops, moduloProblem)))
      return signalPassFailure();
  }
 }
@ -247,7 +249,7 @@ static bool yieldOpLegalityCallback(AffineYieldOp op) {
 /// computations in the condition of ifs, or the addresses of loads and stores.
 /// The dependence analysis will be updated so the dependences from the affine
 /// loads and stores are now on the memref loads and stores.
-LogicalResult AffineToPipeline::lowerAffineStructures(
+LogicalResult AffineToLoopSchedule::lowerAffineStructures(
    MemoryDependenceAnalysis &dependenceAnalysis) {
  auto *context = &getContext();
  auto op = getOperation();
@ -275,9 +277,8 @@ LogicalResult AffineToPipeline::lowerAffineStructures(
 /// targetting. Right now, we assume Calyx, which has a standard library with
 /// well-defined operator latencies. Ultimately, we should move this to a
 /// dialect interface in the Scheduling dialect.
-LogicalResult
-AffineToPipeline::populateOperatorTypes(SmallVectorImpl<AffineForOp> &loopNest,
-                                        ModuloProblem &problem) {
+LogicalResult AffineToLoopSchedule::populateOperatorTypes(
+    SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
  // Scheduling analyis only considers the innermost loop nest for now.
  auto forOp = loopNest.back();

@ -352,9 +353,8 @@ AffineToPipeline::populateOperatorTypes(SmallVectorImpl<AffineForOp> &loopNest,
 }

 /// Solve the pre-computed scheduling problem.
-LogicalResult
-AffineToPipeline::solveSchedulingProblem(SmallVectorImpl<AffineForOp> &loopNest,
-                                         ModuloProblem &problem) {
+LogicalResult AffineToLoopSchedule::solveSchedulingProblem(
+    SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
  // Scheduling analyis only considers the innermost loop nest for now.
  auto forOp = loopNest.back();

@ -397,10 +397,9 @@ AffineToPipeline::solveSchedulingProblem(SmallVectorImpl<AffineForOp> &loopNest,
  return success();
 }

-/// Create the pipeline op for a loop nest.
-LogicalResult
-AffineToPipeline::createPipelinePipeline(SmallVectorImpl<AffineForOp> &loopNest,
-                                         ModuloProblem &problem) {
+/// Create the loopschedule pipeline op for a loop nest.
+LogicalResult AffineToLoopSchedule::createLoopSchedulePipeline(
+    SmallVectorImpl<AffineForOp> &loopNest, ModuloProblem &problem) {
  // Scheduling analyis only considers the innermost loop nest for now.
  auto forOp = loopNest.back();

@ -432,8 +431,8 @@ AffineToPipeline::createPipelinePipeline(SmallVectorImpl<AffineForOp> &loopNest,
  if (auto tripCount = getConstantTripCount(forOp))
    tripCountAttr = builder.getI64IntegerAttr(*tripCount);

-  auto pipeline =
-      builder.create<PipelineWhileOp>(resultTypes, ii, tripCountAttr, iterArgs);
+  auto pipeline = builder.create<LoopSchedulePipelineOp>(
+      resultTypes, ii, tripCountAttr, iterArgs);

  // Create the condition, which currently just compares the induction variable
  // to the upper bound.
@ -562,7 +561,7 @@ AffineToPipeline::createPipelinePipeline(SmallVectorImpl<AffineForOp> &loopNest,
    auto startTimeAttr = builder.getIntegerAttr(
        builder.getIntegerType(64, /*isSigned=*/true), startTime);
    auto stage =
-        builder.create<PipelineWhileStageOp>(stageTypes, startTimeAttr);
+        builder.create<LoopSchedulePipelineStageOp>(stageTypes, startTimeAttr);
    auto &stageBlock = stage.getBodyBlock();
    auto *stageTerminator = stageBlock.getTerminator();
    builder.setInsertionPointToStart(&stageBlock);
@ -609,7 +608,7 @@ AffineToPipeline::createPipelinePipeline(SmallVectorImpl<AffineForOp> &loopNest,

  // Add the iter args and results to the terminator.
  auto stagesTerminator =
-      cast<PipelineTerminatorOp>(stagesBlock.getTerminator());
+      cast<LoopScheduleTerminatorOp>(stagesBlock.getTerminator());

  // Collect iter args and results from the induction variable increment and any
  // mapped values that were originally yielded.
@ -644,6 +643,6 @@ AffineToPipeline::createPipelinePipeline(SmallVectorImpl<AffineForOp> &loopNest,
  return success();
 }

-std::unique_ptr<mlir::Pass> circt::createAffineToPipeline() {
-  return std::make_unique<AffineToPipeline>();
+std::unique_ptr<mlir::Pass> circt::createAffineToLoopSchedule() {
+  return std::make_unique<AffineToLoopSchedule>();
 }
--- a/lib/Conversion/AffineToLoopSchedule/CMakeLists.txt
+++ b/lib/Conversion/AffineToLoopSchedule/CMakeLists.txt
@ -1,5 +1,5 @@
-add_circt_library(CIRCTAffineToPipeline
-  AffineToPipeline.cpp
+add_circt_library(CIRCTAffineToLoopSchedule
+  AffineToLoopSchedule.cpp

  DEPENDS
  CIRCTConversionPassIncGen
@ -9,5 +9,5 @@ add_circt_library(CIRCTAffineToPipeline
  MLIRPass
  CIRCTScheduling
  CIRCTSchedulingAnalysis
-  CIRCTPipelineOps
+  CIRCTLoopSchedule
  )
--- a/lib/Conversion/CMakeLists.txt
+++ b/lib/Conversion/CMakeLists.txt
@ -1,4 +1,4 @@
-add_subdirectory(AffineToPipeline)
+add_subdirectory(AffineToLoopSchedule)
 add_subdirectory(ArcToLLVM)
 add_subdirectory(CalyxToFSM)
 add_subdirectory(ConvertToArcs)
@ -17,7 +17,7 @@ add_subdirectory(CombToArith)
 add_subdirectory(CombToLLVM)
 add_subdirectory(MooreToCore)
 add_subdirectory(SCFToCalyx)
-add_subdirectory(PipelineToCalyx)
+add_subdirectory(LoopScheduleToCalyx)
 add_subdirectory(StandardToHandshake)
 add_subdirectory(FSMToSV)
 add_subdirectory(PipelineToHW)
--- a/lib/Conversion/LoopScheduleToCalyx/CMakeLists.txt
+++ b/lib/Conversion/LoopScheduleToCalyx/CMakeLists.txt
@ -1,5 +1,5 @@
-add_circt_conversion_library(CIRCTPipelineToCalyx
-  PipelineToCalyx.cpp
+add_circt_conversion_library(CIRCTLoopScheduleToCalyx
+  LoopScheduleToCalyx.cpp

  DEPENDS
  CIRCTConversionPassIncGen
@ -10,7 +10,7 @@ add_circt_conversion_library(CIRCTPipelineToCalyx
  LINK_LIBS PUBLIC
  CIRCTCalyx
  CIRCTCalyxTransforms
-  CIRCTPipelineOps
+  CIRCTLoopSchedule
  MLIRIR
  MLIRPass
  MLIRArithDialect
--- a/lib/Conversion/LoopScheduleToCalyx/LoopScheduleToCalyx.cpp
+++ b/lib/Conversion/LoopScheduleToCalyx/LoopScheduleToCalyx.cpp
@ -1,4 +1,4 @@
-//=== PipelineToCalyx.cpp - Pipeline to Calyx pass entry point ------*-----===//
+//=== LoopScheduleToCalyx.cpp - LoopSchedule to Calyx pass entry point-----===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@ -6,18 +6,18 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This is the main Pipeline to Calyx conversion pass implementation.
+// This is the main LoopSchedule to Calyx conversion pass implementation.
 //
 //===----------------------------------------------------------------------===//

-#include "circt/Conversion/PipelineToCalyx.h"
+#include "circt/Conversion/LoopScheduleToCalyx.h"
 #include "../PassDetail.h"
 #include "circt/Dialect/Calyx/CalyxHelpers.h"
 #include "circt/Dialect/Calyx/CalyxLoweringUtils.h"
 #include "circt/Dialect/Calyx/CalyxOps.h"
 #include "circt/Dialect/Comb/CombOps.h"
 #include "circt/Dialect/HW/HWOps.h"
-#include "circt/Dialect/Pipeline/Pipeline.h"
+#include "circt/Dialect/LoopSchedule/LoopScheduleOps.h"
 #include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
 #include "mlir/Conversion/LLVMCommon/Pattern.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
@ -36,6 +36,7 @@ using namespace mlir;
 using namespace mlir::arith;
 using namespace mlir::cf;
 using namespace mlir::func;
+using namespace circt::loopschedule;

 namespace circt {
 namespace pipelinetocalyx {
@ -44,11 +45,10 @@ namespace pipelinetocalyx {
 // Utility types
 //===----------------------------------------------------------------------===//

-class PipelineWhileOp
-    : public calyx::WhileOpInterface<pipeline::PipelineWhileOp> {
+class PipelineWhileOp : public calyx::WhileOpInterface<LoopSchedulePipelineOp> {
 public:
-  explicit PipelineWhileOp(pipeline::PipelineWhileOp op)
-      : calyx::WhileOpInterface<pipeline::PipelineWhileOp>(op) {}
+  explicit PipelineWhileOp(LoopSchedulePipelineOp op)
+      : calyx::WhileOpInterface<LoopSchedulePipelineOp>(op) {}

  Block::BlockArgListType getBodyArgs() override {
    return getOperation().getStagesBlock().getArguments();
@ -221,11 +221,11 @@ class BuildOpGroups : public calyx::FuncOpPartialLoweringPattern {
                             AndIOp, XOrIOp, OrIOp, ExtUIOp, TruncIOp, MulIOp,
                             DivUIOp, RemUIOp, IndexCastOp,
                             /// static logic
-                             pipeline::PipelineTerminatorOp>(
+                             LoopScheduleTerminatorOp>(
                  [&](auto op) { return buildOp(rewriter, op).succeeded(); })
-              .template Case<FuncOp, pipeline::PipelineWhileOp,
-                             pipeline::PipelineRegisterOp,
-                             pipeline::PipelineWhileStageOp>([&](auto) {
+              .template Case<FuncOp, LoopSchedulePipelineOp,
+                             LoopScheduleRegisterOp,
+                             LoopSchedulePipelineStageOp>([&](auto) {
                /// Skip: these special cases will be handled separately.
                return true;
              })
@ -268,7 +268,7 @@ private:
  LogicalResult buildOp(PatternRewriter &rewriter, memref::LoadOp op) const;
  LogicalResult buildOp(PatternRewriter &rewriter, memref::StoreOp op) const;
  LogicalResult buildOp(PatternRewriter &rewriter,
-                        pipeline::PipelineTerminatorOp op) const;
+                        LoopScheduleTerminatorOp op) const;

  /// buildLibraryOp will build a TCalyxLibOp inside a TGroupOp based on the
  /// source operation TSrcOp.
@ -545,9 +545,8 @@ LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter,
  return buildAllocOp(getState<ComponentLoweringState>(), rewriter, allocOp);
 }

-LogicalResult
-BuildOpGroups::buildOp(PatternRewriter &rewriter,
-                       pipeline::PipelineTerminatorOp term) const {
+LogicalResult BuildOpGroups::buildOp(PatternRewriter &rewriter,
+                                     LoopScheduleTerminatorOp term) const {
  if (term.getOperands().size() == 0)
    return success();

@ -844,10 +843,10 @@ class BuildWhileGroups : public calyx::FuncOpPartialLoweringPattern {
                           PatternRewriter &rewriter) const override {
    LogicalResult res = success();
    funcOp.walk([&](Operation *op) {
-      if (!isa<pipeline::PipelineWhileOp>(op))
+      if (!isa<LoopSchedulePipelineOp>(op))
        return WalkResult::advance();

-      PipelineWhileOp whileOp(cast<pipeline::PipelineWhileOp>(op));
+      PipelineWhileOp whileOp(cast<LoopSchedulePipelineOp>(op));

      getState<ComponentLoweringState>().setUniqueName(whileOp.getOperation(),
                                                       "while");
@ -910,10 +909,10 @@ class BuildPipelineRegs : public calyx::FuncOpPartialLoweringPattern {
  LogicalResult
  partiallyLowerFuncToComp(FuncOp funcOp,
                           PatternRewriter &rewriter) const override {
-    funcOp.walk([&](pipeline::PipelineRegisterOp op) {
+    funcOp.walk([&](LoopScheduleRegisterOp op) {
      // Condition registers are handled in BuildWhileGroups.
      auto *parent = op->getParentOp();
-      auto stage = dyn_cast<pipeline::PipelineWhileStageOp>(parent);
+      auto stage = dyn_cast<LoopSchedulePipelineStageOp>(parent);
      if (!stage)
        return;

@ -925,11 +924,10 @@ class BuildPipelineRegs : public calyx::FuncOpPartialLoweringPattern {
        Value stageResult = stage.getResult(i);
        bool isIterArg = false;
        for (auto &use : stageResult.getUses()) {
-          if (auto term =
-                  dyn_cast<pipeline::PipelineTerminatorOp>(use.getOwner())) {
+          if (auto term = dyn_cast<LoopScheduleTerminatorOp>(use.getOwner())) {
            if (use.getOperandNumber() < term.getIterArgs().size()) {
              PipelineWhileOp whileOp(
-                  dyn_cast<pipeline::PipelineWhileOp>(stage->getParentOp()));
+                  dyn_cast<LoopSchedulePipelineOp>(stage->getParentOp()));
              auto reg = getState<ComponentLoweringState>().getLoopIterReg(
                  whileOp, use.getOperandNumber());
              getState<ComponentLoweringState>().addPipelineReg(stage, reg, i);
@ -971,17 +969,17 @@ class BuildPipelineGroups : public calyx::FuncOpPartialLoweringPattern {
  LogicalResult
  partiallyLowerFuncToComp(FuncOp funcOp,
                           PatternRewriter &rewriter) const override {
-    for (auto pipeline : funcOp.getOps<pipeline::PipelineWhileOp>())
+    for (auto pipeline : funcOp.getOps<LoopSchedulePipelineOp>())
      for (auto stage :
-           pipeline.getStagesBlock().getOps<pipeline::PipelineWhileStageOp>())
+           pipeline.getStagesBlock().getOps<LoopSchedulePipelineStageOp>())
        if (failed(buildStageGroups(pipeline, stage, rewriter)))
          return failure();

    return success();
  }

-  LogicalResult buildStageGroups(pipeline::PipelineWhileOp whileOp,
-                                 pipeline::PipelineWhileStageOp stage,
+  LogicalResult buildStageGroups(LoopSchedulePipelineOp whileOp,
+                                 LoopSchedulePipelineStageOp stage,
                                 PatternRewriter &rewriter) const {
    // Collect pipeline registers for stage.
    auto pipelineRegisters =
@ -1366,10 +1364,11 @@ class CleanupFuncOps : public calyx::FuncOpPartialLoweringPattern {
 //===----------------------------------------------------------------------===//
 // Pass driver
 //===----------------------------------------------------------------------===//
-class PipelineToCalyxPass : public PipelineToCalyxBase<PipelineToCalyxPass> {
+class LoopScheduleToCalyxPass
+    : public LoopScheduleToCalyxBase<LoopScheduleToCalyxPass> {
 public:
-  PipelineToCalyxPass()
-      : PipelineToCalyxBase<PipelineToCalyxPass>(),
+  LoopScheduleToCalyxPass()
+      : LoopScheduleToCalyxBase<LoopScheduleToCalyxPass>(),
        partialPatternRes(success()) {}
  void runOnOperation() override;

@ -1496,7 +1495,7 @@ private:
  std::shared_ptr<calyx::CalyxLoweringState> loweringState = nullptr;
 };

-void PipelineToCalyxPass::runOnOperation() {
+void LoopScheduleToCalyxPass::runOnOperation() {
  // Clear internal state. See https://github.com/llvm/circt/issues/3235
  loweringState.reset();
  partialPatternRes = LogicalResult::failure();
@ -1641,8 +1640,8 @@ void PipelineToCalyxPass::runOnOperation() {
 // Pass initialization
 //===----------------------------------------------------------------------===//

-std::unique_ptr<OperationPass<ModuleOp>> createPipelineToCalyxPass() {
-  return std::make_unique<pipelinetocalyx::PipelineToCalyxPass>();
+std::unique_ptr<OperationPass<ModuleOp>> createLoopScheduleToCalyxPass() {
+  return std::make_unique<pipelinetocalyx::LoopScheduleToCalyxPass>();
 }

 } // namespace circt
--- a/lib/Conversion/PassDetail.h
+++ b/lib/Conversion/PassDetail.h
@ -84,6 +84,10 @@ namespace llhd {
 class LLHDDialect;
 } // namespace llhd

+namespace loopschedule {
+class LoopScheduleDialect;
+} // namespace loopschedule
+
 namespace comb {
 class CombDialect;
 } // namespace comb
--- a/lib/Dialect/LoopSchedule/LoopScheduleOps.cpp
+++ b/lib/Dialect/LoopSchedule/LoopScheduleOps.cpp
@ -21,6 +21,307 @@ using namespace mlir;
 using namespace circt;
 using namespace circt::loopschedule;

-void LoopScheduleDialect::initialize() {}
+//===----------------------------------------------------------------------===//
+// LoopSchedulePipelineWhileOp
+//===----------------------------------------------------------------------===//
+
+ParseResult LoopSchedulePipelineOp::parse(OpAsmParser &parser,
+                                          OperationState &result) {
+  // Parse initiation interval.
+  IntegerAttr ii;
+  if (parser.parseKeyword("II") || parser.parseEqual() ||
+      parser.parseAttribute(ii))
+    return failure();
+  result.addAttribute("II", ii);
+
+  // Parse optional trip count.
+  if (succeeded(parser.parseOptionalKeyword("trip_count"))) {
+    IntegerAttr tripCount;
+    if (parser.parseEqual() || parser.parseAttribute(tripCount))
+      return failure();
+    result.addAttribute("tripCount", tripCount);
+  }
+
+  // Parse iter_args assignment list.
+  SmallVector<OpAsmParser::Argument> regionArgs;
+  SmallVector<OpAsmParser::UnresolvedOperand> operands;
+  if (succeeded(parser.parseOptionalKeyword("iter_args"))) {
+    if (parser.parseAssignmentList(regionArgs, operands))
+      return failure();
+  }
+
+  // Parse function type from iter_args to results.
+  FunctionType type;
+  if (parser.parseColon() || parser.parseType(type))
+    return failure();
+
+  // Function result type is the pipeline result type.
+  result.addTypes(type.getResults());
+
+  // Resolve iter_args operands.
+  for (auto [regionArg, operand, type] :
+       llvm::zip(regionArgs, operands, type.getInputs())) {
+    regionArg.type = type;
+    if (parser.resolveOperand(operand, type, result.operands))
+      return failure();
+  }
+
+  // Parse condition region.
+  Region *condition = result.addRegion();
+  if (parser.parseRegion(*condition, regionArgs))
+    return failure();
+
+  // Parse stages region.
+  if (parser.parseKeyword("do"))
+    return failure();
+  Region *stages = result.addRegion();
+  if (parser.parseRegion(*stages, regionArgs))
+    return failure();
+
+  return success();
+}
+
+void LoopSchedulePipelineOp::print(OpAsmPrinter &p) {
+  // Print the initiation interval.
+  p << " II = " << ' ' << getII();
+
+  // Print the optional tripCount.
+  if (getTripCount())
+    p << " trip_count = " << ' ' << *getTripCount();
+
+  // Print iter_args assignment list.
+  p << " iter_args(";
+  llvm::interleaveComma(
+      llvm::zip(getStages().getArguments(), getIterArgs()), p,
+      [&](auto it) { p << std::get<0>(it) << " = " << std::get<1>(it); });
+  p << ") : ";
+
+  // Print function type from iter_args to results.
+  auto type = FunctionType::get(getContext(), getStages().getArgumentTypes(),
+                                getResultTypes());
+  p.printType(type);
+
+  // Print condition region.
+  p << ' ';
+  p.printRegion(getCondition(), /*printEntryBlockArgs=*/false);
+  p << " do";
+
+  // Print stages region.
+  p << ' ';
+  p.printRegion(getStages(), /*printEntryBlockArgs=*/false);
+}
+
+LogicalResult LoopSchedulePipelineOp::verify() {
+  // Verify the condition block is "combinational" based on an allowlist of
+  // Arithmetic ops.
+  Block &conditionBlock = getCondition().front();
+  Operation *nonCombinational;
+  WalkResult conditionWalk = conditionBlock.walk([&](Operation *op) {
+    if (isa<LoopScheduleDialect>(op->getDialect()))
+      return WalkResult::advance();
+
+    if (!isa<arith::AddIOp, arith::AndIOp, arith::BitcastOp, arith::CmpIOp,
+             arith::ConstantOp, arith::IndexCastOp, arith::MulIOp, arith::OrIOp,
+             arith::SelectOp, arith::ShLIOp, arith::ExtSIOp, arith::CeilDivSIOp,
+             arith::DivSIOp, arith::FloorDivSIOp, arith::RemSIOp,
+             arith::ShRSIOp, arith::SubIOp, arith::TruncIOp, arith::DivUIOp,
+             arith::RemUIOp, arith::ShRUIOp, arith::XOrIOp, arith::ExtUIOp>(
+            op)) {
+      nonCombinational = op;
+      return WalkResult::interrupt();
+    }
+
+    return WalkResult::advance();
+  });
+
+  if (conditionWalk.wasInterrupted())
+    return emitOpError("condition must have a combinational body, found ")
+           << *nonCombinational;
+
+  // Verify the condition block terminates with a value of type i1.
+  TypeRange conditionResults =
+      conditionBlock.getTerminator()->getOperandTypes();
+  if (conditionResults.size() != 1)
+    return emitOpError("condition must terminate with a single result, found ")
+           << conditionResults;
+
+  if (conditionResults.front() != IntegerType::get(getContext(), 1))
+    return emitOpError("condition must terminate with an i1 result, found ")
+           << conditionResults.front();
+
+  // Verify the stages block contains at least one stage and a terminator.
+  Block &stagesBlock = getStages().front();
+  if (stagesBlock.getOperations().size() < 2)
+    return emitOpError("stages must contain at least one stage");
+
+  int64_t lastStartTime = -1;
+  for (Operation &inner : stagesBlock) {
+    // Verify the stages block contains only `loopschedule.pipeline.stage` and
+    // `loopschedule.terminator` ops.
+    if (!isa<LoopSchedulePipelineStageOp, LoopScheduleTerminatorOp>(inner))
+      return emitOpError(
+                 "stages may only contain 'loopschedule.pipeline.stage' or "
+                 "'loopschedule.terminator' ops, found ")
+             << inner;
+
+    // Verify the stage start times are monotonically increasing.
+    if (auto stage = dyn_cast<LoopSchedulePipelineStageOp>(inner)) {
+      if (lastStartTime == -1) {
+        lastStartTime = stage.getStart();
+        continue;
+      }
+
+      if (lastStartTime >= stage.getStart())
+        return stage.emitOpError("'start' must be after previous 'start' (")
+               << lastStartTime << ')';
+
+      lastStartTime = stage.getStart();
+    }
+  }
+
+  return success();
+}
+
+void LoopSchedulePipelineOp::build(OpBuilder &builder, OperationState &state,
+                                   TypeRange resultTypes, IntegerAttr ii,
+                                   std::optional<IntegerAttr> tripCount,
+                                   ValueRange iterArgs) {
+  OpBuilder::InsertionGuard g(builder);
+
+  state.addTypes(resultTypes);
+  state.addAttribute("II", ii);
+  if (tripCount)
+    state.addAttribute("tripCount", *tripCount);
+  state.addOperands(iterArgs);
+
+  Region *condRegion = state.addRegion();
+  Block &condBlock = condRegion->emplaceBlock();
+
+  SmallVector<Location, 4> argLocs;
+  for (auto arg : iterArgs)
+    argLocs.push_back(arg.getLoc());
+  condBlock.addArguments(iterArgs.getTypes(), argLocs);
+  builder.setInsertionPointToEnd(&condBlock);
+  builder.create<LoopScheduleRegisterOp>(builder.getUnknownLoc(), ValueRange());
+
+  Region *stagesRegion = state.addRegion();
+  Block &stagesBlock = stagesRegion->emplaceBlock();
+  stagesBlock.addArguments(iterArgs.getTypes(), argLocs);
+  builder.setInsertionPointToEnd(&stagesBlock);
+  builder.create<LoopScheduleTerminatorOp>(builder.getUnknownLoc(),
+                                           ValueRange(), ValueRange());
+}
+
+//===----------------------------------------------------------------------===//
+// PipelineWhileStageOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult LoopSchedulePipelineStageOp::verify() {
+  if (getStart() < 0)
+    return emitOpError("'start' must be non-negative");
+
+  return success();
+}
+
+void LoopSchedulePipelineStageOp::build(OpBuilder &builder,
+                                        OperationState &state,
+                                        TypeRange resultTypes,
+                                        IntegerAttr start) {
+  OpBuilder::InsertionGuard g(builder);
+
+  state.addTypes(resultTypes);
+  state.addAttribute("start", start);
+
+  Region *region = state.addRegion();
+  Block &block = region->emplaceBlock();
+  builder.setInsertionPointToEnd(&block);
+  builder.create<LoopScheduleRegisterOp>(builder.getUnknownLoc(), ValueRange());
+}
+
+unsigned LoopSchedulePipelineStageOp::getStageNumber() {
+  unsigned number = 0;
+  auto *op = getOperation();
+  auto parent = op->getParentOfType<LoopSchedulePipelineOp>();
+  Operation *stage = &parent.getStagesBlock().front();
+  while (stage != op && stage->getNextNode()) {
+    ++number;
+    stage = stage->getNextNode();
+  }
+  return number;
+}
+
+//===----------------------------------------------------------------------===//
+// PipelineRegisterOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult LoopScheduleRegisterOp::verify() {
+  LoopSchedulePipelineStageOp stage =
+      (*this)->getParentOfType<LoopSchedulePipelineStageOp>();
+
+  // If this doesn't terminate a stage, it is terminating the condition.
+  if (stage == nullptr)
+    return success();
+
+  // Verify stage terminates with the same types as the result types.
+  TypeRange registerTypes = getOperandTypes();
+  TypeRange resultTypes = stage.getResultTypes();
+  if (registerTypes != resultTypes)
+    return emitOpError("operand types (")
+           << registerTypes << ") must match result types (" << resultTypes
+           << ")";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// PipelineTerminatorOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult LoopScheduleTerminatorOp::verify() {
+  LoopSchedulePipelineOp pipeline =
+      (*this)->getParentOfType<LoopSchedulePipelineOp>();
+
+  // Verify pipeline terminates with the same `iter_args` types as the pipeline.
+  auto iterArgs = getIterArgs();
+  TypeRange terminatorArgTypes = iterArgs.getTypes();
+  TypeRange pipelineArgTypes = pipeline.getIterArgs().getTypes();
+  if (terminatorArgTypes != pipelineArgTypes)
+    return emitOpError("'iter_args' types (")
+           << terminatorArgTypes << ") must match pipeline 'iter_args' types ("
+           << pipelineArgTypes << ")";
+
+  // Verify `iter_args` are defined by a pipeline stage.
+  for (auto iterArg : iterArgs)
+    if (iterArg.getDefiningOp<LoopSchedulePipelineStageOp>() == nullptr)
+      return emitOpError(
+          "'iter_args' must be defined by a 'loopschedule.pipeline.stage'");
+
+  // Verify pipeline terminates with the same result types as the pipeline.
+  auto opResults = getResults();
+  TypeRange terminatorResultTypes = opResults.getTypes();
+  TypeRange pipelineResultTypes = pipeline.getResultTypes();
+  if (terminatorResultTypes != pipelineResultTypes)
+    return emitOpError("'results' types (")
+           << terminatorResultTypes << ") must match pipeline result types ("
+           << pipelineResultTypes << ")";
+
+  // Verify `results` are defined by a pipeline stage.
+  for (auto result : opResults)
+    if (result.getDefiningOp<LoopSchedulePipelineStageOp>() == nullptr)
+      return emitOpError(
+          "'results' must be defined by a 'loopschedule.pipeline.stage'");
+
+  return success();
+}
+
+#define GET_OP_CLASSES
+#include "circt/Dialect/LoopSchedule/LoopSchedule.cpp.inc"
+
+void LoopScheduleDialect::initialize() {
+  addOperations<
+#define GET_OP_LIST
+#include "circt/Dialect/LoopSchedule/LoopSchedule.cpp.inc"
+      >();
+}

 #include "circt/Dialect/LoopSchedule/LoopScheduleDialect.cpp.inc"
--- a/lib/Dialect/Pipeline/PipelineOps.cpp
+++ b/lib/Dialect/Pipeline/PipelineOps.cpp
@ -106,196 +106,6 @@ LogicalResult ReturnOp::verify() {
  return success();
 }

-//===----------------------------------------------------------------------===//
-// PipelineWhileOp
-//===----------------------------------------------------------------------===//
-
-ParseResult PipelineWhileOp::parse(OpAsmParser &parser,
-                                   OperationState &result) {
-  // Parse initiation interval.
-  IntegerAttr ii;
-  if (parser.parseKeyword("II") || parser.parseEqual() ||
-      parser.parseAttribute(ii))
-    return failure();
-  result.addAttribute("II", ii);
-
-  // Parse optional trip count.
-  if (succeeded(parser.parseOptionalKeyword("trip_count"))) {
-    IntegerAttr tripCount;
-    if (parser.parseEqual() || parser.parseAttribute(tripCount))
-      return failure();
-    result.addAttribute("tripCount", tripCount);
-  }
-
-  // Parse iter_args assignment list.
-  SmallVector<OpAsmParser::Argument> regionArgs;
-  SmallVector<OpAsmParser::UnresolvedOperand> operands;
-  if (succeeded(parser.parseOptionalKeyword("iter_args"))) {
-    if (parser.parseAssignmentList(regionArgs, operands))
-      return failure();
-  }
-
-  // Parse function type from iter_args to results.
-  FunctionType type;
-  if (parser.parseColon() || parser.parseType(type))
-    return failure();
-
-  // Function result type is the pipeline result type.
-  result.addTypes(type.getResults());
-
-  // Resolve iter_args operands.
-  for (auto [regionArg, operand, type] :
-       llvm::zip(regionArgs, operands, type.getInputs())) {
-    regionArg.type = type;
-    if (parser.resolveOperand(operand, type, result.operands))
-      return failure();
-  }
-
-  // Parse condition region.
-  Region *condition = result.addRegion();
-  if (parser.parseRegion(*condition, regionArgs))
-    return failure();
-
-  // Parse stages region.
-  if (parser.parseKeyword("do"))
-    return failure();
-  Region *stages = result.addRegion();
-  if (parser.parseRegion(*stages, regionArgs))
-    return failure();
-
-  return success();
-}
-
-void PipelineWhileOp::print(OpAsmPrinter &p) {
-  // Print the initiation interval.
-  p << " II = " << ' ' << getII();
-
-  // Print the optional tripCount.
-  if (getTripCount())
-    p << " trip_count = " << ' ' << *getTripCount();
-
-  // Print iter_args assignment list.
-  p << " iter_args(";
-  llvm::interleaveComma(
-      llvm::zip(getStages().getArguments(), getIterArgs()), p,
-      [&](auto it) { p << std::get<0>(it) << " = " << std::get<1>(it); });
-  p << ") : ";
-
-  // Print function type from iter_args to results.
-  auto type = FunctionType::get(getContext(), getStages().getArgumentTypes(),
-                                getResultTypes());
-  p.printType(type);
-
-  // Print condition region.
-  p << ' ';
-  p.printRegion(getCondition(), /*printEntryBlockArgs=*/false);
-  p << " do";
-
-  // Print stages region.
-  p << ' ';
-  p.printRegion(getStages(), /*printEntryBlockArgs=*/false);
-}
-
-LogicalResult PipelineWhileOp::verify() {
-  // Verify the condition block is "combinational" based on an allowlist of
-  // Arithmetic ops.
-  Block &conditionBlock = getCondition().front();
-  Operation *nonCombinational;
-  WalkResult conditionWalk = conditionBlock.walk([&](Operation *op) {
-    if (isa<PipelineDialect>(op->getDialect()))
-      return WalkResult::advance();
-
-    if (!isa<arith::AddIOp, arith::AndIOp, arith::BitcastOp, arith::CmpIOp,
-             arith::ConstantOp, arith::IndexCastOp, arith::MulIOp, arith::OrIOp,
-             arith::SelectOp, arith::ShLIOp, arith::ExtSIOp, arith::CeilDivSIOp,
-             arith::DivSIOp, arith::FloorDivSIOp, arith::RemSIOp,
-             arith::ShRSIOp, arith::SubIOp, arith::TruncIOp, arith::DivUIOp,
-             arith::RemUIOp, arith::ShRUIOp, arith::XOrIOp, arith::ExtUIOp>(
-            op)) {
-      nonCombinational = op;
-      return WalkResult::interrupt();
-    }
-
-    return WalkResult::advance();
-  });
-
-  if (conditionWalk.wasInterrupted())
-    return emitOpError("condition must have a combinational body, found ")
-           << *nonCombinational;
-
-  // Verify the condition block terminates with a value of type i1.
-  TypeRange conditionResults =
-      conditionBlock.getTerminator()->getOperandTypes();
-  if (conditionResults.size() != 1)
-    return emitOpError("condition must terminate with a single result, found ")
-           << conditionResults;
-
-  if (conditionResults.front() != IntegerType::get(getContext(), 1))
-    return emitOpError("condition must terminate with an i1 result, found ")
-           << conditionResults.front();
-
-  // Verify the stages block contains at least one stage and a terminator.
-  Block &stagesBlock = getStages().front();
-  if (stagesBlock.getOperations().size() < 2)
-    return emitOpError("stages must contain at least one stage");
-
-  int64_t lastStartTime = -1;
-  for (Operation &inner : stagesBlock) {
-    // Verify the stages block contains only `pipeline.while.stage` and
-    // `pipeline.terminator` ops.
-    if (!isa<PipelineWhileStageOp, PipelineTerminatorOp>(inner))
-      return emitOpError("stages may only contain 'pipeline.while.stage' or "
-                         "'pipeline.terminator' ops, found ")
-             << inner;
-
-    // Verify the stage start times are monotonically increasing.
-    if (auto stage = dyn_cast<PipelineWhileStageOp>(inner)) {
-      if (lastStartTime == -1) {
-        lastStartTime = stage.getStart();
-        continue;
-      }
-
-      if (lastStartTime >= stage.getStart())
-        return stage.emitOpError("'start' must be after previous 'start' (")
-               << lastStartTime << ')';
-
-      lastStartTime = stage.getStart();
-    }
-  }
-
-  return success();
-}
-
-void PipelineWhileOp::build(OpBuilder &builder, OperationState &state,
-                            TypeRange resultTypes, IntegerAttr ii,
-                            std::optional<IntegerAttr> tripCount,
-                            ValueRange iterArgs) {
-  OpBuilder::InsertionGuard g(builder);
-
-  state.addTypes(resultTypes);
-  state.addAttribute("II", ii);
-  if (tripCount)
-    state.addAttribute("tripCount", *tripCount);
-  state.addOperands(iterArgs);
-
-  Region *condRegion = state.addRegion();
-  Block &condBlock = condRegion->emplaceBlock();
-
-  SmallVector<Location, 4> argLocs;
-  for (auto arg : iterArgs)
-    argLocs.push_back(arg.getLoc());
-  condBlock.addArguments(iterArgs.getTypes(), argLocs);
-  builder.setInsertionPointToEnd(&condBlock);
-  builder.create<PipelineRegisterOp>(builder.getUnknownLoc(), ValueRange());
-
-  Region *stagesRegion = state.addRegion();
-  Block &stagesBlock = stagesRegion->emplaceBlock();
-  stagesBlock.addArguments(iterArgs.getTypes(), argLocs);
-  builder.setInsertionPointToEnd(&stagesBlock);
-  builder.create<PipelineTerminatorOp>(builder.getUnknownLoc(), ValueRange(),
-                                       ValueRange());
-}
-
 //===----------------------------------------------------------------------===//
 // PipelineStageRegisterOp
 //===----------------------------------------------------------------------===//
@ -307,104 +117,6 @@ void PipelineStageRegisterOp::build(OpBuilder &builder, OperationState &state,
  state.addTypes({when.getType()});
 }

-//===----------------------------------------------------------------------===//
-// PipelineWhileStageOp
-//===----------------------------------------------------------------------===//
-
-LogicalResult PipelineWhileStageOp::verify() {
-  if (getStart() < 0)
-    return emitOpError("'start' must be non-negative");
-
-  return success();
-}
-
-void PipelineWhileStageOp::build(OpBuilder &builder, OperationState &state,
-                                 TypeRange resultTypes, IntegerAttr start) {
-  OpBuilder::InsertionGuard g(builder);
-
-  state.addTypes(resultTypes);
-  state.addAttribute("start", start);
-
-  Region *region = state.addRegion();
-  Block &block = region->emplaceBlock();
-  builder.setInsertionPointToEnd(&block);
-  builder.create<PipelineRegisterOp>(builder.getUnknownLoc(), ValueRange());
-}
-
-unsigned PipelineWhileStageOp::getStageNumber() {
-  unsigned number = 0;
-  auto *op = getOperation();
-  auto parent = op->getParentOfType<PipelineWhileOp>();
-  Operation *stage = &parent.getStagesBlock().front();
-  while (stage != op && stage->getNextNode()) {
-    ++number;
-    stage = stage->getNextNode();
-  }
-  return number;
-}
-
-//===----------------------------------------------------------------------===//
-// PipelineRegisterOp
-//===----------------------------------------------------------------------===//
-
-LogicalResult PipelineRegisterOp::verify() {
-  PipelineWhileStageOp stage = (*this)->getParentOfType<PipelineWhileStageOp>();
-
-  // If this doesn't terminate a stage, it is terminating the condition.
-  if (stage == nullptr)
-    return success();
-
-  // Verify stage terminates with the same types as the result types.
-  TypeRange registerTypes = getOperandTypes();
-  TypeRange resultTypes = stage.getResultTypes();
-  if (registerTypes != resultTypes)
-    return emitOpError("operand types (")
-           << registerTypes << ") must match result types (" << resultTypes
-           << ")";
-
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// PipelineTerminatorOp
-//===----------------------------------------------------------------------===//
-
-LogicalResult PipelineTerminatorOp::verify() {
-  PipelineWhileOp pipeline = (*this)->getParentOfType<PipelineWhileOp>();
-
-  // Verify pipeline terminates with the same `iter_args` types as the pipeline.
-  auto iterArgs = getIterArgs();
-  TypeRange terminatorArgTypes = iterArgs.getTypes();
-  TypeRange pipelineArgTypes = pipeline.getIterArgs().getTypes();
-  if (terminatorArgTypes != pipelineArgTypes)
-    return emitOpError("'iter_args' types (")
-           << terminatorArgTypes << ") must match pipeline 'iter_args' types ("
-           << pipelineArgTypes << ")";
-
-  // Verify `iter_args` are defined by a pipeline stage.
-  for (auto iterArg : iterArgs)
-    if (iterArg.getDefiningOp<PipelineWhileStageOp>() == nullptr)
-      return emitOpError(
-          "'iter_args' must be defined by a 'pipeline.while.stage'");
-
-  // Verify pipeline terminates with the same result types as the pipeline.
-  auto opResults = getResults();
-  TypeRange terminatorResultTypes = opResults.getTypes();
-  TypeRange pipelineResultTypes = pipeline.getResultTypes();
-  if (terminatorResultTypes != pipelineResultTypes)
-    return emitOpError("'results' types (")
-           << terminatorResultTypes << ") must match pipeline result types ("
-           << pipelineResultTypes << ")";
-
-  // Verify `results` are defined by a pipeline stage.
-  for (auto result : opResults)
-    if (result.getDefiningOp<PipelineWhileStageOp>() == nullptr)
-      return emitOpError(
-          "'results' must be defined by a 'pipeline.while.stage'");
-
-  return success();
-}
-
 #define GET_OP_CLASSES
 #include "circt/Dialect/Pipeline/Pipeline.cpp.inc"

--- a/test/Conversion/AffineToLoopSchedule/loops.mlir
+++ b/test/Conversion/AffineToLoopSchedule/loops.mlir
@ -1,4 +1,4 @@
-// RUN: circt-opt -convert-affine-to-pipeline %s | FileCheck %s
+// RUN: circt-opt -convert-affine-to-loopschedule %s | FileCheck %s

 // CHECK-LABEL: func @minimal
 func.func @minimal(%arg0 : memref<10xindex>) {
@ -7,20 +7,20 @@ func.func @minimal(%arg0 : memref<10xindex>) {
  // CHECK: %[[UB:.+]] = arith.constant [[TRIP_COUNT:.+]] : [[ITER_TYPE]]
  // CHECK: %[[STEP:.+]] = arith.constant 1 : [[ITER_TYPE]]

-  // Pipeline header.
-  // CHECK: pipeline.while II = 1 trip_count = [[TRIP_COUNT]] iter_args(%[[ITER_ARG:.+]] = %[[LB]]) : ([[ITER_TYPE]]) -> ()
+  // LoopSchedule Pipeline header.
+  // CHECK: loopschedule.pipeline II = 1 trip_count = [[TRIP_COUNT]] iter_args(%[[ITER_ARG:.+]] = %[[LB]]) : ([[ITER_TYPE]]) -> ()

  // Condition block.
  // CHECK: %[[COND_RESULT:.+]] = arith.cmpi ult, %[[ITER_ARG]]
-  // CHECK: pipeline.register %[[COND_RESULT]]
+  // CHECK: loopschedule.register %[[COND_RESULT]]

  // First stage.
-  // CHECK: %[[STAGE0:.+]] = pipeline.while.stage
+  // CHECK: %[[STAGE0:.+]] = loopschedule.pipeline.stage
  // CHECK: %[[ITER_INC:.+]] = arith.addi %[[ITER_ARG]], %[[STEP]]
-  // CHECK: pipeline.register %[[ITER_INC]]
+  // CHECK: loopschedule.register %[[ITER_INC]]

-  // Pipeline terminator.
-  // CHECK: pipeline.terminator iter_args(%[[STAGE0]]), results()
+  // LoopSchedule Pipeline terminator.
+  // CHECK: loopschedule.terminator iter_args(%[[STAGE0]]), results()

  affine.for %arg1 = 0 to 10 {
    affine.store %arg1, %arg0[%arg1] : memref<10xindex>
@ -31,27 +31,27 @@ func.func @minimal(%arg0 : memref<10xindex>) {

 // CHECK-LABEL: func @dot
 func.func @dot(%arg0: memref<64xi32>, %arg1: memref<64xi32>) -> i32 {
-  // Pipeline boilerplate checked above, just check the stages computations.
+  // LoopSchedule Pipeline boilerplate checked above, just check the stages computations.

  // First stage.
-  // CHECK: %[[STAGE0:.+]]:3 = pipeline.while.stage
+  // CHECK: %[[STAGE0:.+]]:3 = loopschedule.pipeline.stage
  // CHECK-DAG: %[[STAGE0_0:.+]] = memref.load %arg0[%arg2]
  // CHECK-DAG: %[[STAGE0_1:.+]] = memref.load %arg1[%arg2]
  // CHECK-DAG: %[[STAGE0_2:.+]] = arith.addi %arg2, %c1
-  // CHECK: pipeline.register %[[STAGE0_0]], %[[STAGE0_1]], %[[STAGE0_2]]
+  // CHECK: loopschedule.register %[[STAGE0_0]], %[[STAGE0_1]], %[[STAGE0_2]]

  // Second stage.
-  // CHECK: %[[STAGE1:.+]] = pipeline.while.stage
+  // CHECK: %[[STAGE1:.+]] = loopschedule.pipeline.stage
  // CHECK-DAG: %[[STAGE1_0:.+]] = arith.muli %[[STAGE0]]#0, %[[STAGE0]]#1 : i32
-  // CHECK: pipeline.register %[[STAGE1_0]]
+  // CHECK: loopschedule.register %[[STAGE1_0]]

  // Third stage.
-  // CHECK: %[[STAGE2:.+]] = pipeline.while.stage
+  // CHECK: %[[STAGE2:.+]] = loopschedule.pipeline.stage
  // CHECK-DAG: %[[STAGE2_0:.+]] = arith.addi %arg3, %2
-  // CHECK: pipeline.register %[[STAGE2_0]]
+  // CHECK: loopschedule.register %[[STAGE2_0]]

-  // Pipeline terminator.
-  // CHECK: pipeline.terminator iter_args(%[[STAGE0]]#2, %[[STAGE2]]), results(%[[STAGE2]])
+  // LoopSchedule Pipeline terminator.
+  // CHECK: loopschedule.terminator iter_args(%[[STAGE0]]#2, %[[STAGE2]]), results(%[[STAGE2]])

  %c0_i32 = arith.constant 0 : i32
  %0 = affine.for %arg2 = 0 to 64 iter_args(%arg3 = %c0_i32) -> (i32) {
@ -101,28 +101,28 @@ func.func @affine_dimension(%arg0: i32) -> i32 {

 // CHECK-LABEL: func @dot_mul_accumulate
 func.func @dot_mul_accumulate(%arg0: memref<64xi32>, %arg1: memref<64xi32>) -> i32 {
-  // Pipeline boilerplate checked above, just check the stages computations.
+  // LoopSchedule Pipeline boilerplate checked above, just check the stages computations.

-  // CHECK: pipeline.while II = 3
+  // CHECK: loopschedule.pipeline II = 3
  // First stage.
-  // CHECK: %[[STAGE0:.+]]:3 = pipeline.while.stage
+  // CHECK: %[[STAGE0:.+]]:3 = loopschedule.pipeline.stage
  // CHECK-DAG: %[[STAGE0_0:.+]] = memref.load %arg0[%arg2]
  // CHECK-DAG: %[[STAGE0_1:.+]] = memref.load %arg1[%arg2]
  // CHECK-DAG: %[[STAGE0_2:.+]] = arith.addi %arg2, %c1
-  // CHECK: pipeline.register %[[STAGE0_0]], %[[STAGE0_1]], %[[STAGE0_2]]
+  // CHECK: loopschedule.register %[[STAGE0_0]], %[[STAGE0_1]], %[[STAGE0_2]]

  // Second stage.
-  // CHECK: %[[STAGE1:.+]] = pipeline.while.stage
+  // CHECK: %[[STAGE1:.+]] = loopschedule.pipeline.stage
  // CHECK: %[[STAGE1_0:.+]] = arith.muli %[[STAGE0]]#0, %[[STAGE0]]#1 : i32
-  // CHECK: pipeline.register %[[STAGE1_0]]
+  // CHECK: loopschedule.register %[[STAGE1_0]]

  // Third stage.
-  // CHECK: %[[STAGE2:.+]] = pipeline.while.stage
+  // CHECK: %[[STAGE2:.+]] = loopschedule.pipeline.stage
  // CHECK: %[[STAGE2_0:.+]] = arith.muli %arg3, %[[STAGE1]]
-  // CHECK: pipeline.register %[[STAGE2_0]]
+  // CHECK: loopschedule.register %[[STAGE2_0]]

-  // Pipeline terminator.
-  // CHECK: pipeline.terminator iter_args(%[[STAGE0]]#2, %[[STAGE2]]), results(%[[STAGE2]])
+  // LoopSchedule Pipeline terminator.
+  // CHECK: loopschedule.terminator iter_args(%[[STAGE0]]#2, %[[STAGE2]]), results(%[[STAGE2]])

  %c0_i32 = arith.constant 0 : i32
  %0 = affine.for %arg2 = 0 to 64 iter_args(%arg3 = %c0_i32) -> (i32) {
@ -138,33 +138,33 @@ func.func @dot_mul_accumulate(%arg0: memref<64xi32>, %arg1: memref<64xi32>) -> i

 // CHECK-LABEL: func @dot_shared_mem
 func.func @dot_shared_mem(%arg0: memref<128xi32>) -> i32 {
-  // Pipeline boilerplate checked above, just check the stages computations.
+  // LoopSchedule Pipeline boilerplate checked above, just check the stages computations.

-  // CHECK: pipeline.while II = 2
+  // CHECK: loopschedule.pipeline II = 2
  // First stage.
-  // CHECK: %[[STAGE0:.+]]:3 = pipeline.while.stage
+  // CHECK: %[[STAGE0:.+]]:3 = loopschedule.pipeline.stage
  // CHECK-DAG: %[[STAGE0_0:.+]] = memref.load %arg0[%arg1] : memref<128xi32>
  // CHECK-DAG: %[[STAGE0_1:.+]] = arith.addi %arg1, %c64 : index
  // CHECK-DAG: %[[STAGE0_2:.+]] = arith.addi %arg1, %c1 : index
-  // CHECK: pipeline.register %[[STAGE0_0]], %[[STAGE0_1]], %[[STAGE0_2]]
+  // CHECK: loopschedule.register %[[STAGE0_0]], %[[STAGE0_1]], %[[STAGE0_2]]

  // Second stage.
-  // CHECK: %[[STAGE1:.+]]:2 = pipeline.while.stage
+  // CHECK: %[[STAGE1:.+]]:2 = loopschedule.pipeline.stage
  // CHECK: %[[STAGE1_0:.+]] = memref.load %arg0[%[[STAGE0]]#1] : memref<128xi32>
-  // CHECK: pipeline.register %[[STAGE0]]#0, %[[STAGE1_0]]
+  // CHECK: loopschedule.register %[[STAGE0]]#0, %[[STAGE1_0]]

  // Third stage.
-  // CHECK: %[[STAGE2:.+]] = pipeline.while.stage
+  // CHECK: %[[STAGE2:.+]] = loopschedule.pipeline.stage
  // CHECK: %[[STAGE2_0:.+]] = arith.muli %[[STAGE1]]#0, %[[STAGE1]]#1 : i32
-  // CHECK: pipeline.register %[[STAGE2_0]]
+  // CHECK: loopschedule.register %[[STAGE2_0]]

  // Fourth stage.
-  // CHECK: %[[STAGE3:.+]] = pipeline.while.stage
+  // CHECK: %[[STAGE3:.+]] = loopschedule.pipeline.stage
  // CHECK: %[[STAGE3_0:.+]] = arith.addi %arg2, %[[STAGE2]] : i32
-  // CHECK: pipeline.register %[[STAGE3_0]]
+  // CHECK: loopschedule.register %[[STAGE3_0]]

-  // Pipeline terminator.
-  // CHECK: pipeline.terminator iter_args(%[[STAGE0]]#2, %[[STAGE3]]), results(%[[STAGE3]])
+  // LoopSchedule Pipeline terminator.
+  // CHECK: loopschedule.terminator iter_args(%[[STAGE0]]#2, %[[STAGE3]]), results(%[[STAGE3]])

  %c0_i32 = arith.constant 0 : i32
  %c64_index = arith.constant 64 : index
--- a/test/Conversion/LoopScheduleToCalyx/convert_pipeline.mlir
+++ b/test/Conversion/LoopScheduleToCalyx/convert_pipeline.mlir
@ -1,4 +1,4 @@
-// RUN: circt-opt %s -lower-static-logic-to-calyx -split-input-file | FileCheck %s
+// RUN: circt-opt %s -lower-loopschedule-to-calyx -split-input-file | FileCheck %s

 // CHECK:     module attributes {calyx.entrypoint = "minimal"} {
 // CHECK:       calyx.component @minimal
@ -38,15 +38,15 @@ func.func @minimal() {
  %c0_i64 = arith.constant 0 : i64
  %c10_i64 = arith.constant 10 : i64
  %c1_i64 = arith.constant 1 : i64
-  pipeline.while II =  1 trip_count = 10 iter_args(%arg0 = %c0_i64) : (i64) -> () {
+  loopschedule.pipeline II =  1 trip_count = 10 iter_args(%arg0 = %c0_i64) : (i64) -> () {
    %0 = arith.cmpi ult, %arg0, %c10_i64 : i64
-    pipeline.register %0 : i1
+    loopschedule.register %0 : i1
  } do {
-    %0 = pipeline.while.stage start = 0 {
+    %0 = loopschedule.pipeline.stage start = 0 {
      %1 = arith.addi %arg0, %c1_i64 : i64
-      pipeline.register %1 : i64
+      loopschedule.register %1 : i64
    } : i64
-    pipeline.terminator iter_args(%0), results() : (i64) -> ()
+    loopschedule.terminator iter_args(%0), results() : (i64) -> ()
  }
  return
 }
@ -163,25 +163,25 @@ func.func @dot(%arg0: memref<64xi32>, %arg1: memref<64xi32>) -> i32 {
  %c0 = arith.constant 0 : index
  %c64 = arith.constant 64 : index
  %c1 = arith.constant 1 : index
-  %0 = pipeline.while II =  1 trip_count = 5 iter_args(%arg2 = %c0, %arg3 = %c0_i32) : (index, i32) -> i32 {
+  %0 = loopschedule.pipeline II =  1 trip_count = 5 iter_args(%arg2 = %c0, %arg3 = %c0_i32) : (index, i32) -> i32 {
    %1 = arith.cmpi ult, %arg2, %c64 : index
-    pipeline.register %1 : i1
+    loopschedule.register %1 : i1
  } do {
-    %1:3 = pipeline.while.stage start = 0  {
+    %1:3 = loopschedule.pipeline.stage start = 0  {
      %4 = memref.load %arg0[%arg2] : memref<64xi32>
      %5 = memref.load %arg1[%arg2] : memref<64xi32>
      %6 = arith.addi %arg2, %c1 : index
-      pipeline.register %4, %5, %6 : i32, i32, index
+      loopschedule.register %4, %5, %6 : i32, i32, index
    } : i32, i32, index
-    %2 = pipeline.while.stage start = 1  {
+    %2 = loopschedule.pipeline.stage start = 1  {
      %4 = arith.muli %1#0, %1#1 : i32
-      pipeline.register %4 : i32
+      loopschedule.register %4 : i32
    } : i32
-    %3 = pipeline.while.stage start = 4  {
+    %3 = loopschedule.pipeline.stage start = 4  {
      %4 = arith.addi %arg3, %2 : i32
-      pipeline.register %4 : i32
+      loopschedule.register %4 : i32
    } : i32
-    pipeline.terminator iter_args(%1#2, %3), results(%3) : (index, i32) -> i32
+    loopschedule.terminator iter_args(%1#2, %3), results(%3) : (index, i32) -> i32
  }
  return %0 : i32
 }
@ -224,21 +224,21 @@ module {
    %c0 = arith.constant 0 : index
    %c4 = arith.constant 4 : index
    %c1 = arith.constant 1 : index
-    pipeline.while II =  1 trip_count =  4 iter_args(%arg2 = %c0) : (index) -> () {
+    loopschedule.pipeline II =  1 trip_count =  4 iter_args(%arg2 = %c0) : (index) -> () {
      %0 = arith.cmpi ult, %arg2, %c4 : index
-      pipeline.register %0 : i1
+      loopschedule.register %0 : i1
    } do {
-      %0:2 = pipeline.while.stage start = 0 {
+      %0:2 = loopschedule.pipeline.stage start = 0 {
        %1 = memref.load %arg0[%arg2] : memref<4xi32>
        %2 = arith.addi %arg2, %c1 : index
-        pipeline.register %1, %2 : i32, index
+        loopschedule.register %1, %2 : i32, index
      } : i32, index
-      pipeline.while.stage start = 1 {
+      loopschedule.pipeline.stage start = 1 {
        memref.store %0#0, %arg1[%arg2] : memref<4xi32>
        memref.store %0#0, %arg1[%arg2] : memref<4xi32>
-        pipeline.register
+        loopschedule.register
      }
-      pipeline.terminator iter_args(%0#1), results() : (index) -> ()
+      loopschedule.terminator iter_args(%0#1), results() : (index) -> ()
    }
    return
  }
--- a/test/Dialect/LoopSchedule/errors.mlir
+++ b/test/Dialect/LoopSchedule/errors.mlir
@ -0,0 +1,215 @@
+// RUN: circt-opt %s -split-input-file -verify-diagnostics
+
+func.func @combinational_condition() {
+  %c0_i32 = arith.constant 0 : i32
+  %0 = memref.alloc() : memref<8xi32>
+  // expected-error @+1 {{'loopschedule.pipeline' op condition must have a combinational body, found %3 = "memref.load"(%1, %2) : (memref<8xi32>, index) -> i32}}
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %c0_i32) : (i32) -> () {
+    %c0 = arith.constant 0 : index
+    %1 = memref.load %0[%c0] : memref<8xi32>
+    %2 = arith.cmpi ult, %1, %arg0 : i32
+    loopschedule.register %2 : i1
+  } do {
+    loopschedule.pipeline.stage start = 0 {
+      loopschedule.register
+    }
+    loopschedule.terminator iter_args(), results() : () -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @single_condition() {
+  %false = arith.constant 0 : i1
+  // expected-error @+1 {{'loopschedule.pipeline' op condition must terminate with a single result, found 'i1', 'i1'}}
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0, %arg0 : i1, i1
+  } do {
+    loopschedule.pipeline.stage start = 0 {
+      loopschedule.register
+    }
+    loopschedule.terminator iter_args(), results() : () -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @boolean_condition() {
+  %c0_i32 = arith.constant 0 : i32
+  // expected-error @+1 {{'loopschedule.pipeline' op condition must terminate with an i1 result, found 'i32'}}
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %c0_i32) : (i32) -> () {
+    loopschedule.register %arg0 : i32
+  } do {
+    loopschedule.pipeline.stage start = 0 {
+      loopschedule.register
+    }
+    loopschedule.terminator iter_args(), results() : () -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @only_stages() {
+  %false = arith.constant 0 : i1
+  // expected-error @+1 {{'loopschedule.pipeline' op stages must contain at least one stage}}
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0 : i1
+  } do {
+    loopschedule.terminator iter_args(), results() : () -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @only_stages() {
+  %false = arith.constant 0 : i1
+  // expected-error @+1 {{'loopschedule.pipeline' op stages may only contain 'loopschedule.pipeline.stage' or 'loopschedule.terminator' ops, found %1 = "arith.addi"(%arg0, %arg0) : (i1, i1) -> i1}}
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0 : i1
+  } do {
+    %0 = arith.addi %arg0, %arg0 : i1
+    loopschedule.terminator iter_args(), results() : () -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @mismatched_register_types() {
+  %false = arith.constant 0 : i1
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0 : i1
+  } do {
+    %0 = loopschedule.pipeline.stage start = 0 {
+      // expected-error @+1 {{'loopschedule.register' op operand types ('i1') must match result types ('i2')}}
+      loopschedule.register %arg0 : i1
+    } : i2
+    loopschedule.terminator iter_args(), results() : () -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @mismatched_iter_args_types() {
+  %false = arith.constant 0 : i1
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0 : i1
+  } do {
+    loopschedule.pipeline.stage start = 0 {
+      loopschedule.register
+    }
+    // expected-error @+1 {{'loopschedule.terminator' op 'iter_args' types () must match pipeline 'iter_args' types ('i1')}}
+    loopschedule.terminator iter_args(), results() : () -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @invalid_iter_args() {
+  %false = arith.constant 0 : i1
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> (i1) {
+    loopschedule.register %arg0 : i1
+  } do {
+    loopschedule.pipeline.stage start = 0 {
+      loopschedule.register
+    }
+    // expected-error @+1 {{'loopschedule.terminator' op 'iter_args' must be defined by a 'loopschedule.pipeline.stage'}}
+    loopschedule.terminator iter_args(%false), results() : (i1) -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @mismatched_result_types() {
+  %false = arith.constant 0 : i1
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> (i1) {
+    loopschedule.register %arg0 : i1
+  } do {
+    %0 = loopschedule.pipeline.stage start = 0 {
+      loopschedule.register %arg0 : i1
+    } : i1
+    // expected-error @+1 {{'loopschedule.terminator' op 'results' types () must match pipeline result types ('i1')}}
+    loopschedule.terminator iter_args(%0), results() : (i1) -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @invalid_results() {
+  %false = arith.constant 0 : i1
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> (i1) {
+    loopschedule.register %arg0 : i1
+  } do {
+    %0 = loopschedule.pipeline.stage start = 0 {
+      loopschedule.register %arg0 : i1
+    } : i1
+    // expected-error @+1 {{'loopschedule.terminator' op 'results' must be defined by a 'loopschedule.pipeline.stage'}}
+    loopschedule.terminator iter_args(%0), results(%false) : (i1) -> (i1)
+  }
+  return
+}
+
+// -----
+
+func.func @negative_start() {
+  %false = arith.constant 0 : i1
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0 : i1
+  } do {
+    // expected-error @+1 {{'loopschedule.pipeline.stage' op 'start' must be non-negative}}
+    %0 = loopschedule.pipeline.stage start = -1 {
+      loopschedule.register %arg0 : i1
+    } : i1
+    loopschedule.terminator iter_args(%0), results() : (i1) -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @non_monotonic_start0() {
+  %false = arith.constant 0 : i1
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0 : i1
+  } do {
+    %0 = loopschedule.pipeline.stage start = 0 {
+      loopschedule.register %arg0 : i1
+    } : i1
+    // expected-error @+1 {{'loopschedule.pipeline.stage' op 'start' must be after previous 'start' (0)}}
+    %1 = loopschedule.pipeline.stage start = 0 {
+      loopschedule.register %0 : i1
+    } : i1
+    loopschedule.terminator iter_args(%1), results() : (i1) -> ()
+  }
+  return
+}
+
+// -----
+
+func.func @non_monotonic_start1() {
+  %false = arith.constant 0 : i1
+  loopschedule.pipeline II = 1 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0 : i1
+  } do {
+    %0 = loopschedule.pipeline.stage start = 0 {
+      loopschedule.register %arg0 : i1
+    } : i1
+    %1 = loopschedule.pipeline.stage start = 1 {
+      loopschedule.register %0 : i1
+    } : i1
+    // expected-error @+1 {{'loopschedule.pipeline.stage' op 'start' must be after previous 'start' (1)}}
+    %2 = loopschedule.pipeline.stage start = 0 {
+      loopschedule.register %1 : i1
+    } : i1
+    loopschedule.terminator iter_args(%2), results() : (i1) -> ()
+  }
+  return
+}
--- a/test/Dialect/LoopSchedule/round-trip.mlir
+++ b/test/Dialect/LoopSchedule/round-trip.mlir
@ -0,0 +1,199 @@
+// RUN: circt-opt %s -verify-diagnostics | circt-opt -verify-diagnostics | FileCheck %s
+
+func.func @test1(%arg0: memref<?xi32>) -> i32 {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %c0_i32 = arith.constant 0 : i32
+  // CHECK: loopschedule.pipeline
+  // CHECK-SAME: II = 1
+  // CHECK-SAME: iter_args(%arg1 = %c0, %arg2 = %c0_i32)
+  // CHECK-SAME: (index, i32) -> i32
+  // CHECK-SAME: {
+  %0 = loopschedule.pipeline II = 1 iter_args(%arg1 = %c0, %arg2 = %c0_i32) : (index, i32) -> i32 {
+    %1 = arith.cmpi ult, %arg1, %c10 : index
+    loopschedule.register %1 : i1
+  // CHECK: } do {
+  } do {
+    // CHECK: loopschedule.pipeline.stage start = 0 {
+    %1:2 = loopschedule.pipeline.stage start = 0 {
+      %3 = arith.addi %arg1, %c1 : index
+      %4 = memref.load %arg0[%arg1] : memref<?xi32>
+      // CHECK: loopschedule.register {{.+}} : {{.+}}
+      // CHECK-NEXT: } : index, i32
+      loopschedule.register %3, %4 : index, i32
+    } : index, i32
+    %2 = loopschedule.pipeline.stage start = 1 {
+      %3 = arith.addi %1#1, %arg2 : i32
+      loopschedule.register %3 : i32
+    } : i32
+    // CHECK: loopschedule.terminator iter_args({{.+}}), results({{.+}}) : {{.+}}
+    loopschedule.terminator iter_args(%1#0, %2), results(%2) : (index, i32) -> i32
+  }
+  return %0 : i32
+}
+
+func.func @test2(%arg0: memref<?xi32>, %arg1: memref<?xi32>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %c10 = arith.constant 10 : index
+  // CHECK: loopschedule.pipeline
+  // CHECK-SAME: II = 1
+  // CHECK-SAME: iter_args(%arg2 = %c0)
+  // CHECK-SAME: (index) -> ()
+  loopschedule.pipeline II = 1 iter_args(%arg2 = %c0) : (index) -> () {
+    %0 = arith.cmpi ult, %arg2, %c10 : index
+    loopschedule.register %0 : i1
+  } do {
+    %0:4 = loopschedule.pipeline.stage start = 0 {
+      %4 = arith.addi %arg2, %c1 : index
+      %5 = memref.load %arg0[%arg2] : memref<?xi32>
+      %6 = arith.cmpi uge, %arg2, %c3 : index
+      loopschedule.register %arg2, %4, %5, %6 : index, index, i32, i1
+    } : index, index, i32, i1
+    // CHECK: loopschedule.pipeline.stage start = 1 when %0#3
+    %1:3 = loopschedule.pipeline.stage start = 1 when %0#3  {
+      %4 = arith.subi %0#0, %c3 : index
+      loopschedule.register %0#2, %0#3, %4 : i32, i1, index
+    } : i32, i1, index
+    %2:4 = loopschedule.pipeline.stage start = 2 when %1#1  {
+      %4 = memref.load %arg0[%1#2] : memref<?xi32>
+      loopschedule.register %1#0, %1#1, %1#2, %4 : i32, i1, index, i32
+    } : i32, i1, index, i32
+    %3:3 = loopschedule.pipeline.stage start = 3 when %2#1  {
+      %4 = arith.addi %2#0, %2#3 : i32
+      loopschedule.register %2#1, %2#2, %4 : i1, index, i32
+    } : i1, index, i32
+    loopschedule.pipeline.stage start = 5 when %3#0  {
+      memref.store %3#2, %arg1[%3#1] : memref<?xi32>
+      loopschedule.register
+    }
+    loopschedule.terminator iter_args(%0#0), results() : (index) -> ()
+  }
+  return
+}
+
+// CHECK-LABEL: func.func @test3
+func.func @test3(%arg0: memref<?xi32>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %0 = memref.alloca() : memref<1xi32>
+  %1 = memref.alloca() : memref<1xi32>
+  %2 = memref.alloca() : memref<1xi32>
+  // CHECK: loopschedule.pipeline
+  // CHECK-SAME: II = 1
+  // CHECK-SAME: iter_args(%arg1 = %c0)
+  // CHECK-SAME: (index) -> ()
+  loopschedule.pipeline II = 1 iter_args(%arg1 = %c0) : (index) -> () {
+    %3 = arith.cmpi ult, %arg1, %c10 : index
+    loopschedule.register %3 : i1
+  } do {
+    %3:5 = loopschedule.pipeline.stage start = 0 {
+      %5 = arith.addi %arg1, %c1 : index
+      %6 = memref.load %2[%c0] : memref<1xi32>
+      %7 = memref.load %1[%c0] : memref<1xi32>
+      %8 = memref.load %0[%c0] : memref<1xi32>
+      %9 = memref.load %arg0[%arg1] : memref<?xi32>
+      loopschedule.register %5, %6, %7, %8, %9 : index, i32, i32, i32, i32
+    } : index, i32, i32, i32, i32
+    %4 = loopschedule.pipeline.stage start = 1 {
+      memref.store %3#2, %2[%c0] : memref<1xi32>
+      memref.store %3#3, %1[%c0] : memref<1xi32>
+      %5 = arith.addi %3#1, %3#4 : i32
+      loopschedule.register %5 : i32
+    } : i32
+    loopschedule.pipeline.stage start = 2 {
+      memref.store %4, %0[%c0] : memref<1xi32>
+      loopschedule.register
+    }
+    loopschedule.terminator iter_args(%3#0), results() : (index) -> ()
+  }
+  return
+}
+
+// CHECK-LABEL: func.func @test4
+func.func @test4(%arg0: memref<?xi32>, %arg1: memref<?xi32>) {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  %c1_i32 = arith.constant 1 : i32
+  // CHECK: loopschedule.pipeline
+  // CHECK-SAME: II = 1
+  // CHECK-SAME: iter_args(%arg2 = %c0)
+  // CHECK-SAME: (index) -> ()
+  loopschedule.pipeline II = 1 iter_args(%arg2 = %c0) : (index) -> () {
+    %0 = arith.cmpi ult, %arg2, %c10 : index
+    loopschedule.register %0 : i1
+  } do {
+    %0:2 = loopschedule.pipeline.stage start = 0 {
+      %3 = arith.addi %arg2, %c1 : index
+      %4 = memref.load %arg1[%arg2] : memref<?xi32>
+      %5 = arith.index_cast %4 : i32 to index
+      loopschedule.register %3, %5 : index, index
+    } : index, index
+    %1:2 = loopschedule.pipeline.stage start = 1 {
+      %3 = memref.load %arg0[%0#1] : memref<?xi32>
+      loopschedule.register %0#1, %3 : index, i32
+    } : index, i32
+    %2:2 = loopschedule.pipeline.stage start = 2 {
+      %3 = arith.addi %1#1, %c1_i32 : i32
+      loopschedule.register %1#0, %3 : index, i32
+    } : index, i32
+    loopschedule.pipeline.stage start = 4 {
+      memref.store %2#1, %arg0[%2#0] : memref<?xi32>
+      loopschedule.register
+    }
+    loopschedule.terminator iter_args(%0#0), results() : (index) -> ()
+  }
+  return
+}
+
+// CHECK-LABEL: func.func @test5
+func.func @test5(%arg0: memref<?xi32>) {
+  %c1 = arith.constant 1 : index
+  %c2 = arith.constant 2 : index
+  %c10 = arith.constant 10 : index
+  // CHECK: loopschedule.pipeline
+  // CHECK-SAME: II = 1
+  // CHECK-SAME: iter_args(%arg1 = %c2)
+  // CHECK-SAME: (index) -> ()
+  loopschedule.pipeline II = 1 iter_args(%arg1 = %c2) : (index) -> () {
+    %0 = arith.cmpi ult, %arg1, %c10 : index
+    loopschedule.register %0 : i1
+  } do {
+    %0 = loopschedule.pipeline.stage start = 0 {
+      %2 = arith.subi %arg1, %c2 : index
+      %3 = memref.load %arg0[%2] : memref<?xi32>
+      loopschedule.register %3 : i32
+    } : i32
+    %1:2 = loopschedule.pipeline.stage start = 1 {
+      %2 = arith.subi %arg1, %c1 : index
+      %3 = memref.load %arg0[%2] : memref<?xi32>
+      %4 = arith.addi %arg1, %c1 : index
+      loopschedule.register %3, %4 : i32, index
+    } : i32, index
+    loopschedule.pipeline.stage start = 2 {
+      %2 = arith.addi %0, %1#0 : i32
+      memref.store %2, %arg0[%arg1] : memref<?xi32>
+      loopschedule.register
+    }
+    loopschedule.terminator iter_args(%1#1), results() : (index) -> ()
+  }
+  return
+}
+
+func.func @trip_count_attr() {
+  %false = arith.constant 0 : i1
+  // CHECK: loopschedule.pipeline II = 1 trip_count = 3
+  loopschedule.pipeline II = 1 trip_count = 3 iter_args(%arg0 = %false) : (i1) -> () {
+    loopschedule.register %arg0 : i1
+  } do {
+    %0 = loopschedule.pipeline.stage start = 0 {
+      loopschedule.register %arg0 : i1
+    } : i1
+    loopschedule.terminator iter_args(%0), results() : (i1) -> ()
+  }
+  return
+}
--- a/test/Dialect/Pipeline/errors.mlir
+++ b/test/Dialect/Pipeline/errors.mlir
@ -1,221 +1,5 @@
 // RUN: circt-opt %s -split-input-file -verify-diagnostics

-func.func @combinational_condition() {
-  %c0_i32 = arith.constant 0 : i32
-  %0 = memref.alloc() : memref<8xi32>
-  // expected-error @+1 {{'pipeline.while' op condition must have a combinational body, found %3 = "memref.load"(%1, %2) : (memref<8xi32>, index) -> i32}}
-  pipeline.while II = 1 iter_args(%arg0 = %c0_i32) : (i32) -> () {
-    %c0 = arith.constant 0 : index
-    %1 = memref.load %0[%c0] : memref<8xi32>
-    %2 = arith.cmpi ult, %1, %arg0 : i32
-    pipeline.register %2 : i1
-  } do {
-    pipeline.while.stage start = 0 {
-      pipeline.register
-    }
-    pipeline.terminator iter_args(), results() : () -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @single_condition() {
-  %false = arith.constant 0 : i1
-  // expected-error @+1 {{'pipeline.while' op condition must terminate with a single result, found 'i1', 'i1'}}
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0, %arg0 : i1, i1
-  } do {
-    pipeline.while.stage start = 0 {
-      pipeline.register
-    }
-    pipeline.terminator iter_args(), results() : () -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @boolean_condition() {
-  %c0_i32 = arith.constant 0 : i32
-  // expected-error @+1 {{'pipeline.while' op condition must terminate with an i1 result, found 'i32'}}
-  pipeline.while II = 1 iter_args(%arg0 = %c0_i32) : (i32) -> () {
-    pipeline.register %arg0 : i32
-  } do {
-    pipeline.while.stage start = 0 {
-      pipeline.register
-    }
-    pipeline.terminator iter_args(), results() : () -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @only_stages() {
-  %false = arith.constant 0 : i1
-  // expected-error @+1 {{'pipeline.while' op stages must contain at least one stage}}
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0 : i1
-  } do {
-    pipeline.terminator iter_args(), results() : () -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @only_stages() {
-  %false = arith.constant 0 : i1
-  // expected-error @+1 {{'pipeline.while' op stages may only contain 'pipeline.while.stage' or 'pipeline.terminator' ops, found %1 = "arith.addi"(%arg0, %arg0) : (i1, i1) -> i1}}
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0 : i1
-  } do {
-    %0 = arith.addi %arg0, %arg0 : i1
-    pipeline.terminator iter_args(), results() : () -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @mismatched_register_types() {
-  %false = arith.constant 0 : i1
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0 : i1
-  } do {
-    %0 = pipeline.while.stage start = 0 {
-      // expected-error @+1 {{'pipeline.register' op operand types ('i1') must match result types ('i2')}}
-      pipeline.register %arg0 : i1
-    } : i2
-    pipeline.terminator iter_args(), results() : () -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @mismatched_iter_args_types() {
-  %false = arith.constant 0 : i1
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0 : i1
-  } do {
-    pipeline.while.stage start = 0 {
-      pipeline.register
-    }
-    // expected-error @+1 {{'pipeline.terminator' op 'iter_args' types () must match pipeline 'iter_args' types ('i1')}}
-    pipeline.terminator iter_args(), results() : () -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @invalid_iter_args() {
-  %false = arith.constant 0 : i1
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> (i1) {
-    pipeline.register %arg0 : i1
-  } do {
-    pipeline.while.stage start = 0 {
-      pipeline.register
-    }
-    // expected-error @+1 {{'pipeline.terminator' op 'iter_args' must be defined by a 'pipeline.while.stage'}}
-    pipeline.terminator iter_args(%false), results() : (i1) -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @mismatched_result_types() {
-  %false = arith.constant 0 : i1
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> (i1) {
-    pipeline.register %arg0 : i1
-  } do {
-    %0 = pipeline.while.stage start = 0 {
-      pipeline.register %arg0 : i1
-    } : i1
-    // expected-error @+1 {{'pipeline.terminator' op 'results' types () must match pipeline result types ('i1')}}
-    pipeline.terminator iter_args(%0), results() : (i1) -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @invalid_results() {
-  %false = arith.constant 0 : i1
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> (i1) {
-    pipeline.register %arg0 : i1
-  } do {
-    %0 = pipeline.while.stage start = 0 {
-      pipeline.register %arg0 : i1
-    } : i1
-    // expected-error @+1 {{'pipeline.terminator' op 'results' must be defined by a 'pipeline.while.stage'}}
-    pipeline.terminator iter_args(%0), results(%false) : (i1) -> (i1)
-  }
-  return
-}
-
-// -----
-
-func.func @negative_start() {
-  %false = arith.constant 0 : i1
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0 : i1
-  } do {
-    // expected-error @+1 {{'pipeline.while.stage' op 'start' must be non-negative}}
-    %0 = pipeline.while.stage start = -1 {
-      pipeline.register %arg0 : i1
-    } : i1
-    pipeline.terminator iter_args(%0), results() : (i1) -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @non_monotonic_start0() {
-  %false = arith.constant 0 : i1
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0 : i1
-  } do {
-    %0 = pipeline.while.stage start = 0 {
-      pipeline.register %arg0 : i1
-    } : i1
-    // expected-error @+1 {{'pipeline.while.stage' op 'start' must be after previous 'start' (0)}}
-    %1 = pipeline.while.stage start = 0 {
-      pipeline.register %0 : i1
-    } : i1
-    pipeline.terminator iter_args(%1), results() : (i1) -> ()
-  }
-  return
-}
-
-// -----
-
-func.func @non_monotonic_start1() {
-  %false = arith.constant 0 : i1
-  pipeline.while II = 1 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0 : i1
-  } do {
-    %0 = pipeline.while.stage start = 0 {
-      pipeline.register %arg0 : i1
-    } : i1
-    %1 = pipeline.while.stage start = 1 {
-      pipeline.register %0 : i1
-    } : i1
-    // expected-error @+1 {{'pipeline.while.stage' op 'start' must be after previous 'start' (1)}}
-    %2 = pipeline.while.stage start = 0 {
-      pipeline.register %1 : i1
-    } : i1
-    pipeline.terminator iter_args(%2), results() : (i1) -> ()
-  }
-  return
-}
-
-// -----
-
 hw.module @mixed_ports(%arg0 : !esi.channel<i32>, %arg1 : i32, %clk : i1, %rst : i1) -> (out: i32) {
  // expected-error @+1 {{'pipeline.pipeline' op if any port of this pipeline is an ESI channel, all ports must be ESI channels.}}
  %0 = pipeline.pipeline(%arg0, %arg1) clock %clk reset %rst : (!esi.channel<i32>, i32) -> (i32) {
--- a/test/Dialect/Pipeline/round-trip.mlir
+++ b/test/Dialect/Pipeline/round-trip.mlir
@ -1,203 +1,6 @@
 // RUN: circt-opt %s -verify-diagnostics | circt-opt -verify-diagnostics | FileCheck %s

-func.func @test1(%arg0: memref<?xi32>) -> i32 {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c10 = arith.constant 10 : index
-  %c0_i32 = arith.constant 0 : i32
-  // CHECK: pipeline.while
-  // CHECK-SAME: II = 1
-  // CHECK-SAME: iter_args(%arg1 = %c0, %arg2 = %c0_i32)
-  // CHECK-SAME: (index, i32) -> i32
-  // CHECK-SAME: {
-  %0 = pipeline.while II = 1 iter_args(%arg1 = %c0, %arg2 = %c0_i32) : (index, i32) -> i32 {
-    %1 = arith.cmpi ult, %arg1, %c10 : index
-    pipeline.register %1 : i1
-  // CHECK: } do {
-  } do {
-    // CHECK: pipeline.while.stage start = 0 {
-    %1:2 = pipeline.while.stage start = 0 {
-      %3 = arith.addi %arg1, %c1 : index
-      %4 = memref.load %arg0[%arg1] : memref<?xi32>
-      // CHECK: pipeline.register {{.+}} : {{.+}}
-      // CHECK-NEXT: } : index, i32
-      pipeline.register %3, %4 : index, i32
-    } : index, i32
-    %2 = pipeline.while.stage start = 1 {
-      %3 = arith.addi %1#1, %arg2 : i32
-      pipeline.register %3 : i32
-    } : i32
-    // CHECK: pipeline.terminator iter_args({{.+}}), results({{.+}}) : {{.+}}
-    pipeline.terminator iter_args(%1#0, %2), results(%2) : (index, i32) -> i32
-  }
-  return %0 : i32
-}
-
-func.func @test2(%arg0: memref<?xi32>, %arg1: memref<?xi32>) {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c3 = arith.constant 3 : index
-  %c10 = arith.constant 10 : index
-  // CHECK: pipeline.while
-  // CHECK-SAME: II = 1
-  // CHECK-SAME: iter_args(%arg2 = %c0)
-  // CHECK-SAME: (index) -> ()
-  pipeline.while II = 1 iter_args(%arg2 = %c0) : (index) -> () {
-    %0 = arith.cmpi ult, %arg2, %c10 : index
-    pipeline.register %0 : i1
-  } do {
-    %0:4 = pipeline.while.stage start = 0 {
-      %4 = arith.addi %arg2, %c1 : index
-      %5 = memref.load %arg0[%arg2] : memref<?xi32>
-      %6 = arith.cmpi uge, %arg2, %c3 : index
-      pipeline.register %arg2, %4, %5, %6 : index, index, i32, i1
-    } : index, index, i32, i1
-    // CHECK: pipeline.while.stage start = 1 when %0#3
-    %1:3 = pipeline.while.stage start = 1 when %0#3  {
-      %4 = arith.subi %0#0, %c3 : index
-      pipeline.register %0#2, %0#3, %4 : i32, i1, index
-    } : i32, i1, index
-    %2:4 = pipeline.while.stage start = 2 when %1#1  {
-      %4 = memref.load %arg0[%1#2] : memref<?xi32>
-      pipeline.register %1#0, %1#1, %1#2, %4 : i32, i1, index, i32
-    } : i32, i1, index, i32
-    %3:3 = pipeline.while.stage start = 3 when %2#1  {
-      %4 = arith.addi %2#0, %2#3 : i32
-      pipeline.register %2#1, %2#2, %4 : i1, index, i32
-    } : i1, index, i32
-    pipeline.while.stage start = 5 when %3#0  {
-      memref.store %3#2, %arg1[%3#1] : memref<?xi32>
-      pipeline.register
-    }
-    pipeline.terminator iter_args(%0#0), results() : (index) -> ()
-  }
-  return
-}
-
-// CHECK-LABEL: func.func @test3
-func.func @test3(%arg0: memref<?xi32>) {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c10 = arith.constant 10 : index
-  %0 = memref.alloca() : memref<1xi32>
-  %1 = memref.alloca() : memref<1xi32>
-  %2 = memref.alloca() : memref<1xi32>
-  // CHECK: pipeline.while
-  // CHECK-SAME: II = 1
-  // CHECK-SAME: iter_args(%arg1 = %c0)
-  // CHECK-SAME: (index) -> ()
-  pipeline.while II = 1 iter_args(%arg1 = %c0) : (index) -> () {
-    %3 = arith.cmpi ult, %arg1, %c10 : index
-    pipeline.register %3 : i1
-  } do {
-    %3:5 = pipeline.while.stage start = 0 {
-      %5 = arith.addi %arg1, %c1 : index
-      %6 = memref.load %2[%c0] : memref<1xi32>
-      %7 = memref.load %1[%c0] : memref<1xi32>
-      %8 = memref.load %0[%c0] : memref<1xi32>
-      %9 = memref.load %arg0[%arg1] : memref<?xi32>
-      pipeline.register %5, %6, %7, %8, %9 : index, i32, i32, i32, i32
-    } : index, i32, i32, i32, i32
-    %4 = pipeline.while.stage start = 1 {
-      memref.store %3#2, %2[%c0] : memref<1xi32>
-      memref.store %3#3, %1[%c0] : memref<1xi32>
-      %5 = arith.addi %3#1, %3#4 : i32
-      pipeline.register %5 : i32
-    } : i32
-    pipeline.while.stage start = 2 {
-      memref.store %4, %0[%c0] : memref<1xi32>
-      pipeline.register
-    }
-    pipeline.terminator iter_args(%3#0), results() : (index) -> ()
-  }
-  return
-}
-
-// CHECK-LABEL: func.func @test4
-func.func @test4(%arg0: memref<?xi32>, %arg1: memref<?xi32>) {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %c10 = arith.constant 10 : index
-  %c1_i32 = arith.constant 1 : i32
-  // CHECK: pipeline.while
-  // CHECK-SAME: II = 1
-  // CHECK-SAME: iter_args(%arg2 = %c0)
-  // CHECK-SAME: (index) -> ()
-  pipeline.while II = 1 iter_args(%arg2 = %c0) : (index) -> () {
-    %0 = arith.cmpi ult, %arg2, %c10 : index
-    pipeline.register %0 : i1
-  } do {
-    %0:2 = pipeline.while.stage start = 0 {
-      %3 = arith.addi %arg2, %c1 : index
-      %4 = memref.load %arg1[%arg2] : memref<?xi32>
-      %5 = arith.index_cast %4 : i32 to index
-      pipeline.register %3, %5 : index, index
-    } : index, index
-    %1:2 = pipeline.while.stage start = 1 {
-      %3 = memref.load %arg0[%0#1] : memref<?xi32>
-      pipeline.register %0#1, %3 : index, i32
-    } : index, i32
-    %2:2 = pipeline.while.stage start = 2 {
-      %3 = arith.addi %1#1, %c1_i32 : i32
-      pipeline.register %1#0, %3 : index, i32
-    } : index, i32
-    pipeline.while.stage start = 4 {
-      memref.store %2#1, %arg0[%2#0] : memref<?xi32>
-      pipeline.register
-    }
-    pipeline.terminator iter_args(%0#0), results() : (index) -> ()
-  }
-  return
-}
-
-// CHECK-LABEL: func.func @test5
-func.func @test5(%arg0: memref<?xi32>) {
-  %c1 = arith.constant 1 : index
-  %c2 = arith.constant 2 : index
-  %c10 = arith.constant 10 : index
-  // CHECK: pipeline.while
-  // CHECK-SAME: II = 1
-  // CHECK-SAME: iter_args(%arg1 = %c2)
-  // CHECK-SAME: (index) -> ()
-  pipeline.while II = 1 iter_args(%arg1 = %c2) : (index) -> () {
-    %0 = arith.cmpi ult, %arg1, %c10 : index
-    pipeline.register %0 : i1
-  } do {
-    %0 = pipeline.while.stage start = 0 {
-      %2 = arith.subi %arg1, %c2 : index
-      %3 = memref.load %arg0[%2] : memref<?xi32>
-      pipeline.register %3 : i32
-    } : i32
-    %1:2 = pipeline.while.stage start = 1 {
-      %2 = arith.subi %arg1, %c1 : index
-      %3 = memref.load %arg0[%2] : memref<?xi32>
-      %4 = arith.addi %arg1, %c1 : index
-      pipeline.register %3, %4 : i32, index
-    } : i32, index
-    pipeline.while.stage start = 2 {
-      %2 = arith.addi %0, %1#0 : i32
-      memref.store %2, %arg0[%arg1] : memref<?xi32>
-      pipeline.register
-    }
-    pipeline.terminator iter_args(%1#1), results() : (index) -> ()
-  }
-  return
-}
-
-func.func @trip_count_attr() {
-  %false = arith.constant 0 : i1
-  // CHECK: pipeline.while II = 1 trip_count = 3
-  pipeline.while II = 1 trip_count = 3 iter_args(%arg0 = %false) : (i1) -> () {
-    pipeline.register %arg0 : i1
-  } do {
-    %0 = pipeline.while.stage start = 0 {
-      pipeline.register %arg0 : i1
-    } : i1
-    pipeline.terminator iter_args(%0), results() : (i1) -> ()
-  }
-  return
-}
-
+// CHECK-LABEL: hw.module @retimeable1
 hw.module @retimeable1(%arg0 : i32, %arg1 : i32, %clk : i1, %rst : i1) -> (out: i32) {
  %0 = pipeline.pipeline(%arg0, %arg1) clock %clk reset %rst : (i32, i32) -> (i32) {
   ^bb0(%a0 : i32, %a1: i32):
@ -209,6 +12,7 @@ hw.module @retimeable1(%arg0 : i32, %arg1 : i32, %clk : i1, %rst : i1) -> (out:
  hw.output %0 : i32
 }

+// CHECK-LABEL: hw.module @retimeable2
 hw.module @retimeable2(%arg0 : i32, %arg1 : i32, %clk : i1, %rst : i1) -> (out: i32) {
  %0 = pipeline.pipeline(%arg0, %arg1) clock %clk reset %rst : (i32, i32) -> (i32) {
   ^bb0(%a0 : i32, %a1: i32):
@ -220,6 +24,7 @@ hw.module @retimeable2(%arg0 : i32, %arg1 : i32, %clk : i1, %rst : i1) -> (out:
  hw.output %0 : i32
 }

+// CHECK-LABEL: hw.module @retimeable3
 hw.module @retimeable3(%arg0 : !esi.channel<i32>, %arg1 : !esi.channel<i32>, %clk : i1, %rst: i1) -> (out: !esi.channel<i32>) {
  %0 = pipeline.pipeline(%arg0, %arg1) clock %clk reset %rst : (!esi.channel<i32>, !esi.channel<i32>) -> (!esi.channel<i32>) {
   ^bb0(%a0 : i32, %a1: i32):
--- a/tools/circt-opt/CMakeLists.txt
+++ b/tools/circt-opt/CMakeLists.txt
@ -8,7 +8,7 @@ add_llvm_tool(circt-opt
 llvm_update_compile_flags(circt-opt)
 target_link_libraries(circt-opt
  PRIVATE
-  CIRCTAffineToPipeline
+  CIRCTAffineToLoopSchedule
  CIRCTAnalysisTestPasses
  CIRCTArc
  CIRCTArcToLLVM
@ -49,6 +49,7 @@ target_link_libraries(circt-opt
  CIRCTHWToSystemC
  CIRCTHWTransforms
  CIRCTLoopSchedule
+  CIRCTLoopScheduleToCalyx
  CIRCTSCFToCalyx
  CIRCTScheduling
  CIRCTSeq
@ -60,7 +61,6 @@ target_link_libraries(circt-opt
  CIRCTPipelineOps
  CIRCTPipelineToHW
  CIRCTPipelineTransforms
-  CIRCTPipelineToCalyx
  CIRCTSV
  CIRCTSVTransforms
  CIRCTHWArith