NVIDIA · ssam18 · May 8, 2026 · May 8, 2026
@@ -0,0 +1,48 @@
+# Folding GroupNorm-via-InstanceNorm
+
+## Introduction
+
+PyTorch versions prior to 2.5 export `torch.nn.GroupNorm` as a sequence of
+five operators when targeting ONNX opset versions below 18. The pattern is
+
+```
+    x  ->  Reshape  ->  InstanceNormalization  ->  Reshape  ->  Mul  ->  Add  ->  y
+```
+
+where the `InstanceNormalization` runs with constant scale `1` and bias `0`,
+and the trailing `Mul`/`Add` apply the learned per-channel `gamma`/`beta`. The
+second `Reshape` reads its target shape from a `Shape` op fed by `x`.
+
+This example demonstrates how to detect that pattern and rewrite it as a
+single native `GroupNormalization` node (opset 21+). Doing so avoids the
+fused-norm code path inside TensorRT, which has been observed to drift from
+ONNX Runtime when reduction extents are very large or when `num_groups`
+equals `num_channels`.
+
+## Running The Example
+
+1. Generate a small model containing the legacy pattern.
+
+   ```bash
+   python3 generate.py
+   ```
+
+2. Fold the pattern into a `GroupNormalization` op.
+
+   ```bash
+   python3 fold.py model.onnx folded.onnx
+   ```
+
+3. Inspect the resulting graph in [Netron](https://netron.app) to confirm the
+   five-op subgraph collapsed to a single `GroupNormalization`.
+
+## How It Works
+
+`fold.py` walks every `InstanceNormalization` node in the graph and verifies
+that its surrounding nodes match the legacy template. When the match is good
+it pulls `num_groups` from the upstream `Reshape` constant, lifts the
+per-channel weights out of the trailing `Mul`/`Add`, flattens them to 1D, and
+wires a single `GroupNormalization` node that consumes the original input
+tensor and produces the original output tensor. `graph.cleanup()` then
+removes the now-orphaned reshapes, the `Shape` op, and the dangling
+constants.
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+
+import numpy as np
+import onnx
+import onnx_graphsurgeon as gs
+
+
+def to_1d(arr):
+    return np.asarray(arr).reshape(-1)
+
+
+def fold_groupnorm(graph):
+    folded = 0
+
+    for inst in [n for n in graph.nodes if n.op == "InstanceNormalization"]:
+        pre_input = inst.inputs[0]
+        if not pre_input.inputs or pre_input.inputs[0].op != "Reshape":
+            continue
+        pre = pre_input.inputs[0]
+
+        post_consumers = inst.outputs[0].outputs
+        if len(post_consumers) != 1 or post_consumers[0].op != "Reshape":
+            continue
+        post = post_consumers[0]
+
+        if not isinstance(pre.inputs[1], gs.Constant):
+            continue
+        target_shape = pre.inputs[1].values.tolist()
+        if len(target_shape) < 2 or int(target_shape[1]) <= 0:
+            continue
+        num_groups = int(target_shape[1])
+
+        mul_consumers = post.outputs[0].outputs
+        if len(mul_consumers) != 1 or mul_consumers[0].op != "Mul":
+            continue
+        mul = mul_consumers[0]
+
+        add_consumers = mul.outputs[0].outputs
+        if len(add_consumers) != 1 or add_consumers[0].op != "Add":
+            continue
+        addn = add_consumers[0]
+
+        gamma_t = mul.inputs[0] if mul.inputs[1] is post.outputs[0] else mul.inputs[1]
+        beta_t = addn.inputs[0] if addn.inputs[1] is mul.outputs[0] else addn.inputs[1]
+        if not isinstance(gamma_t, gs.Constant) or not isinstance(beta_t, gs.Constant):
+            continue
+
+        gamma = to_1d(gamma_t.values).astype(np.float32)
+        beta = to_1d(beta_t.values).astype(np.float32)
+        if gamma.shape != beta.shape:
+            continue
+
+        epsilon = inst.attrs.get("epsilon", 1e-5)
+
+        gn_scale = gs.Constant(name=inst.name + "_gn_scale", values=gamma)
+        gn_bias = gs.Constant(name=inst.name + "_gn_bias", values=beta)
+        x_tensor = pre.inputs[0]
+        gn_out = addn.outputs[0]
+
+        gn_out.inputs.clear()
+        gn_node = gs.Node(
+            op="GroupNormalization",
+            name=inst.name + "_folded",
+            attrs={"num_groups": num_groups, "epsilon": float(epsilon)},
+            inputs=[x_tensor, gn_scale, gn_bias],
+            outputs=[gn_out],
+        )
+        graph.nodes.append(gn_node)
+        folded += 1
+
+    if folded:
+        graph.cleanup().toposort()
+    return folded
+
+
+def bump_opset(model, min_version=21):
+    for op in model.opset_import:
+        if op.domain in ("", "ai.onnx") and op.version < min_version:
+            op.version = min_version
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("input", nargs="?", default="model.onnx")
+    ap.add_argument("output", nargs="?", default="folded.onnx")
+    args = ap.parse_args()
+
+    graph = gs.import_onnx(onnx.load(args.input))
+    n = fold_groupnorm(graph)
+    print(f"folded {n} pattern(s)")
+
+    model = gs.export_onnx(graph)
+    bump_opset(model, 21)
+    onnx.save(model, args.output)
+    print(f"wrote {args.output}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+#
+# SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import numpy as np
+import onnx
+import onnx_graphsurgeon as gs
+
+
+N, C, H, W = 1, 8, 4, 4
+NUM_GROUPS = 4
+
+graph = gs.Graph(ir_version=10, opset=17)
+
+x = gs.Variable("input", dtype=np.float32, shape=(N, C, H, W))
+graph.inputs = [x]
+
+target_shape = gs.Constant("reshape_target", values=np.array([N, NUM_GROUPS, -1], dtype=np.int64))
+reshape_in = gs.Variable("reshape_in_out", dtype=np.float32)
+graph.nodes.append(gs.Node(op="Reshape", inputs=[x, target_shape], outputs=[reshape_in]))
+
+inst_scale = gs.Constant("inst_scale", values=np.ones((NUM_GROUPS,), dtype=np.float32))
+inst_bias = gs.Constant("inst_bias", values=np.zeros((NUM_GROUPS,), dtype=np.float32))
+inst_out = gs.Variable("inst_out", dtype=np.float32)
+graph.nodes.append(
+    gs.Node(
+        op="InstanceNormalization",
+        attrs={"epsilon": 1e-5},
+        inputs=[reshape_in, inst_scale, inst_bias],
+        outputs=[inst_out],
+    )
+)
+
+shape_out = gs.Variable("shape_out", dtype=np.int64)
+graph.nodes.append(gs.Node(op="Shape", inputs=[x], outputs=[shape_out]))
+
+reshape_back_out = gs.Variable("reshape_back_out", dtype=np.float32)
+graph.nodes.append(gs.Node(op="Reshape", inputs=[inst_out, shape_out], outputs=[reshape_back_out]))
+
+gamma = gs.Constant("gamma", values=np.random.rand(C).astype(np.float32).reshape(1, C, 1, 1))
+beta = gs.Constant("beta", values=np.random.rand(C).astype(np.float32).reshape(1, C, 1, 1))
+mul_out = gs.Variable("mul_out", dtype=np.float32)
+add_out = gs.Variable("output", dtype=np.float32, shape=(N, C, H, W))
+graph.nodes.append(gs.Node(op="Mul", inputs=[reshape_back_out, gamma], outputs=[mul_out]))
+graph.nodes.append(gs.Node(op="Add", inputs=[mul_out, beta], outputs=[add_out]))
+
+graph.outputs = [add_out]
+onnx.save(gs.export_onnx(graph), "model.onnx")