-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Expand file tree
/
Copy pathDockerfile
More file actions
892 lines (862 loc) · 50.8 KB
/
Dockerfile
File metadata and controls
892 lines (862 loc) · 50.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
# NemoClaw sandbox image — OpenClaw + NemoClaw plugin inside OpenShell
#
# Layers PR-specific code (plugin, blueprint, config, startup script) on top
# of the pre-built base image from GHCR. The base image contains all the
# expensive, rarely-changing layers (apt, gosu, users, openclaw CLI).
#
# For local builds without GHCR access, build the base first:
# docker build -f Dockerfile.base -t ghcr.io/nvidia/nemoclaw/sandbox-base:latest .
# Global ARG — must be declared before the first FROM to be visible
# to all FROM directives. Can be overridden via --build-arg.
ARG BASE_IMAGE=ghcr.io/nvidia/nemoclaw/sandbox-base:latest
# Stage 1: Build TypeScript plugin from source
FROM node:22-trixie-slim@sha256:2d9f5c76c8f4dd36e8f253bee5d828a83a6c09f36188f0b0414325232e0b175d AS builder
ENV NPM_CONFIG_AUDIT=false \
NPM_CONFIG_FUND=false \
NPM_CONFIG_UPDATE_NOTIFIER=false \
NPM_CONFIG_FETCH_RETRIES=5 \
NPM_CONFIG_FETCH_RETRY_MINTIMEOUT=20000 \
NPM_CONFIG_FETCH_RETRY_MAXTIMEOUT=120000 \
NPM_CONFIG_FETCH_TIMEOUT=300000
COPY nemoclaw/package.json nemoclaw/package-lock.json nemoclaw/tsconfig.json /opt/nemoclaw/
COPY nemoclaw/src/ /opt/nemoclaw/src/
WORKDIR /opt/nemoclaw
RUN npm ci && npm run build
# Stage 2: Runtime image — pull cached base from GHCR
# hadolint ignore=DL3006
FROM ${BASE_IMAGE}
ARG OPENCLAW_VERSION=2026.5.22
ARG OPENCLAW_2026_5_22_INTEGRITY=sha512-m+zgBELGbCHjWB1IWF5WSWNPr480cMKOMff2OF72c8A0AMD4hC/9+qwYtzjYmGkETcffnB711JymlVsQnh2Tow==
# Harden: remove unnecessary build tools and network probes from base image (#830)
# Protect runtime tools before autoremove — the GHCR base may predate the
# procps/e2fsprogs additions, leaving ps/chattr absent or auto-marked. The
# conditional install keeps stale bases usable while fresh bases skip apt.
# Refs: #2343, shields-up chattr hardening
# hadolint ignore=DL3001
RUN set -eu; \
apt-mark manual procps e2fsprogs 2>/dev/null || true; \
(apt-get remove --purge -y gcc gcc-12 g++ g++-12 cpp cpp-12 make \
netcat-openbsd netcat-traditional ncat 2>/dev/null || true); \
apt-get autoremove --purge -y; \
needs_ps=0; \
needs_chattr=0; \
if ! command -v ps >/dev/null 2>&1; then needs_ps=1; fi; \
if ! command -v chattr >/dev/null 2>&1; then needs_chattr=1; fi; \
if [ "$needs_ps" = "1" ] || [ "$needs_chattr" = "1" ]; then \
apt-get update; \
if [ "$needs_ps" = "1" ]; then \
apt-get install -y --no-install-recommends procps=2:4.0.4-9; \
fi; \
if [ "$needs_chattr" = "1" ]; then \
apt-get install -y --no-install-recommends e2fsprogs=1.47.2-3+b11; \
fi; \
fi; \
rm -rf /var/lib/apt/lists/*; \
ps --version; \
command -v chattr >/dev/null
# Copy built plugin and blueprint into the sandbox
COPY --from=builder /opt/nemoclaw/dist/ /opt/nemoclaw/dist/
COPY nemoclaw/openclaw.plugin.json /opt/nemoclaw/
COPY nemoclaw/package.json nemoclaw/package-lock.json /opt/nemoclaw/
COPY nemoclaw-blueprint/ /opt/nemoclaw-blueprint/
RUN chmod -R a+rX /opt/nemoclaw /opt/nemoclaw-blueprint/
# Install runtime dependencies only (no devDependencies, no build step)
WORKDIR /opt/nemoclaw
ENV NPM_CONFIG_AUDIT=false \
NPM_CONFIG_FUND=false \
NPM_CONFIG_UPDATE_NOTIFIER=false \
NPM_CONFIG_FETCH_RETRIES=5 \
NPM_CONFIG_FETCH_RETRY_MINTIMEOUT=20000 \
NPM_CONFIG_FETCH_RETRY_MAXTIMEOUT=120000 \
NPM_CONFIG_FETCH_TIMEOUT=300000
RUN npm ci --omit=dev
COPY scripts/patch-openclaw-tool-catalog.js /usr/local/lib/nemoclaw/patch-openclaw-tool-catalog.js
COPY scripts/patch-openclaw-chat-send.js /usr/local/lib/nemoclaw/patch-openclaw-chat-send.js
RUN chmod 755 /usr/local/lib/nemoclaw/patch-openclaw-tool-catalog.js \
/usr/local/lib/nemoclaw/patch-openclaw-chat-send.js
# Upgrade OpenClaw if the base image is stale.
#
# The GHCR base image (sandbox-base:latest) may lag behind the version pinned
# in Dockerfile.base. When that happens the fetch-guard patches below fail
# because the target functions don't exist in the older OpenClaw. Rather than
# silently skipping patches (leaving the sandbox unpatched), upgrade OpenClaw
# in-place so every build gets the version the patches expect.
#
# OPENCLAW_VERSION is the NemoClaw runtime build target. It must be at least the
# blueprint minimum, which also supports the legacy direct-blueprint image path.
# hadolint ignore=DL3059,DL4006
RUN set -eu; \
echo "$OPENCLAW_VERSION" | grep -qxE '[0-9]+(\.[0-9]+)*' \
|| { echo "ERROR: OPENCLAW_VERSION='$OPENCLAW_VERSION' is invalid (expected e.g. 2026.3.11)" >&2; exit 1; }; \
MIN_VER=$(grep -m 1 'min_openclaw_version' /opt/nemoclaw-blueprint/blueprint.yaml | awk '{print $2}' | tr -d '"'); \
[ -n "$MIN_VER" ] || { echo "ERROR: Could not parse min_openclaw_version from blueprint.yaml" >&2; exit 1; }; \
if [ "$(printf '%s\n%s' "$MIN_VER" "$OPENCLAW_VERSION" | sort -V | head -n1)" != "$MIN_VER" ]; then \
echo "ERROR: OpenClaw build target ${OPENCLAW_VERSION} is below blueprint minimum ${MIN_VER}" >&2; exit 1; \
fi; \
EXPECTED_INTEGRITY=""; \
if [ "$OPENCLAW_VERSION" = "2026.5.22" ]; then EXPECTED_INTEGRITY="$OPENCLAW_2026_5_22_INTEGRITY"; fi; \
if [ -n "$EXPECTED_INTEGRITY" ]; then \
REGISTRY_INTEGRITY=$(npm view "openclaw@${OPENCLAW_VERSION}" dist.integrity); \
if [ "$REGISTRY_INTEGRITY" != "$EXPECTED_INTEGRITY" ]; then \
echo "ERROR: OpenClaw ${OPENCLAW_VERSION} npm integrity mismatch" >&2; \
echo "Expected: ${EXPECTED_INTEGRITY}" >&2; \
echo "Actual: ${REGISTRY_INTEGRITY}" >&2; exit 1; \
fi; \
fi; \
CUR_VER=$(openclaw --version 2>/dev/null | awk '{print $2}' || echo "0.0.0"); \
if [ "$(printf '%s\n%s' "$OPENCLAW_VERSION" "$CUR_VER" | sort -V | head -n1)" = "$OPENCLAW_VERSION" ]; then \
echo "INFO: OpenClaw $CUR_VER is current (>= $OPENCLAW_VERSION), no upgrade needed"; \
else \
echo "INFO: Base image has OpenClaw $CUR_VER, upgrading to $OPENCLAW_VERSION"; \
# npm 10's atomic-move install can hit EROFS on overlayfs when the
# prior install spans multiple image layers (e.g. openclaw was
# baked into sandbox-base, then we upgrade on top here). Clearing
# at the shell level first gives npm a clean slate and avoids the
# rmdir failure inside npm's own install path.
rm -rf /usr/local/lib/node_modules/openclaw /usr/local/bin/openclaw; \
npm install -g --no-audit --no-fund --no-progress "openclaw@${OPENCLAW_VERSION}"; \
fi; \
# Pre-install the codex-acp package so the embedded ACPx runtime can
# call the local binary instead of `npx @zed-industries/codex-acp`.
# The sandbox's L7 proxy denies @zed-industries/* package URLs
# (403 policy_denied), and npm still refreshes registry metadata for
# versioned npx package specs even when the package is globally installed.
# Installing the binary at build time and configuring ACPx to use it
# directly keeps TC-SBX-02 off the runtime npm path.
npm install -g --no-audit --no-fund --no-progress \
'@zed-industries/codex-acp@0.11.1'; \
command -v codex-acp >/dev/null
# Patch OpenClaw media fetch for proxy-only sandbox (NVIDIA/NemoClaw#1755).
#
# NemoClaw forces all sandbox egress through the OpenShell L7 proxy
# (default 10.200.0.1:3128). Two layers of OpenClaw must be patched for
# Telegram/Discord/Slack media downloads to work in this environment:
#
# === Patch 1: redirect strict-mode export to trusted-env-proxy ===
# OpenClaw's media fetch path (fetch-ClF-ZgDC.js → fetchRemoteMedia) calls
# fetchWithSsrFGuard(withStrictGuardedFetchMode({...})) unconditionally.
# Strict mode does DNS-pinning + direct connect, which fails in the sandbox
# netns where only the proxy is reachable. Rewriting the fetch-guard module
# export so the strict alias maps to withTrustedEnvProxyGuardedFetchMode
# makes the existing callsite request proxy mode without touching callers.
# The export pattern `withStrictGuardedFetchMode as <letter>` is stable
# across versions while alias letters drift between minified bundles.
# Files that define withStrictGuardedFetchMode locally without an export
# (e.g. mattermost.js) keep their original strict behavior.
#
# === Patch 2: env-gated bypass for assertExplicitProxyAllowed ===
# OpenClaw 2026.4.2 added assertExplicitProxyAllowed() in fetch-guard,
# which validates the explicit proxy URL by passing the proxy hostname
# through resolvePinnedHostnameWithPolicy() with the *target's* SsrfPolicy.
# When the target uses hostnameAllowlist (Telegram media policy:
# `["api.telegram.org"]`), the proxy hostname (e.g. 10.200.0.1) gets
# rejected with "Blocked hostname (not in allowlist)". This is an upstream
# OpenClaw design flaw: a proxy is infrastructure, not a fetch target, and
# should not be filtered through the target's allowlist.
#
# Inject an early-return guarded by `process.env.OPENSHELL_SANDBOX === "1"`
# so the bypass only activates inside an OpenShell sandbox runtime, which
# is what NemoClaw deploys into. OpenShell injects this env var when it
# starts a sandbox pod; any consumer running the same openclaw bundle
# outside an OpenShell sandbox (bare-metal, another wrapper) does not have
# OPENSHELL_SANDBOX set and keeps the full upstream SSRF check. The L7
# proxy itself enforces per-endpoint network policy inside the sandbox,
# so the trust boundary for SSRF protection is unchanged.
#
# Image-level `ENV` does NOT work here: OpenShell controls the pod env at
# runtime and image ENV vars set by Dockerfile are stripped. OPENSHELL_SANDBOX
# is the only marker reliably present in the runtime.
#
# === Patch 2b: allow OpenShell host gateway through web_fetch guard ===
# OpenClaw's web_fetch SSRF guard blocks *.internal hostnames before the
# OpenShell L7 proxy sees the request. NemoClaw users legitimately reach
# host-local approved services through host.openshell.internal after the
# OpenShell policy explicitly allows that host:port. Add this exact hostname
# only to the web_fetch trusted-env-proxy policy, only inside an OpenShell
# sandbox. The generic SSRF helper and strict/direct DNS-pinned paths remain
# unmodified, so metadata/link-local/private IP literals are unchanged.
#
# === Removal criteria ===
# Patch 1: drop when OpenClaw deprecates withStrictGuardedFetchMode or
# when all media-fetch callsites unconditionally pass useEnvProxy.
# Patch 2: drop when OpenClaw fixes assertExplicitProxyAllowed to skip the
# target hostname allowlist for the proxy hostname check (or exposes config
# to disable the check).
# Patch 2b: drop when OpenClaw ships a reviewed web_fetch trusted-proxy SSRF
# policy surface that can allow host.openshell.internal without allowing
# broader private/special-use hostnames.
#
# SYNC WITH OPENCLAW: these patches classify the compiled OpenClaw dist at
# build time. They apply the legacy patch when the old target exists, skip
# only when the dist shape proves OpenClaw no longer needs that patch, and
# fail with the OpenClaw version plus dist path for mixed or unknown shapes.
# When bumping OPENCLAW_VERSION, verify the new dist
# takes the expected branch and update the regex / sed replacement if needed.
# hadolint ignore=SC2016,DL3059,DL4006
RUN set -eu; \
OC_DIST=/usr/local/lib/node_modules/openclaw/dist; \
OC_VERSION="$(openclaw --version 2>/dev/null | awk '{print $2}' || true)"; \
OC_VERSION="${OC_VERSION:-unknown}"; \
patch_fail() { \
echo "ERROR: OpenClaw ${OC_VERSION} fetch-guard patch cannot classify this dist shape: $*" >&2; \
echo " Inspect ${OC_DIST} and update the Dockerfile patch rules for this OpenClaw layout." >&2; \
exit 1; \
}; \
# --- Patch 1: rewrite fetch-guard export --- \
fg_export="$(grep -RIlE --include='*.js' 'export \{[^}]*withStrictGuardedFetchMode as [a-z]' "$OC_DIST" || true)"; \
if [ -n "$fg_export" ]; then \
for f in $fg_export; do \
grep -q 'withTrustedEnvProxyGuardedFetchMode' "$f" || patch_fail "Patch 1 target $f is missing withTrustedEnvProxyGuardedFetchMode"; \
done; \
printf '%s\n' "$fg_export" | xargs sed -i -E 's|withStrictGuardedFetchMode as ([a-z])|withTrustedEnvProxyGuardedFetchMode as \1|g'; \
if grep -REq --include='*.js' 'withStrictGuardedFetchMode as [a-z]' "$OC_DIST"; then echo "ERROR: Patch 1 left strict-mode export alias" >&2; exit 1; fi; \
echo "INFO: Patch 1 applied to OpenClaw ${OC_VERSION} strict fetch export"; \
else \
strict_refs="$(grep -RIl --include='*.js' 'withStrictGuardedFetchMode' "$OC_DIST" || true)"; \
trusted_refs="$(grep -RIl --include='*.js' 'withTrustedEnvProxyGuardedFetchMode' "$OC_DIST" || true)"; \
media_fetch_files="$(grep -RIl --include='*.js' 'fetchGuardedMediaResponse' "$OC_DIST" || true)"; \
trusted_media_fetch=0; \
untrusted_media_fetch=0; \
for f in $media_fetch_files; do \
if ! grep -q 'fetchWithSsrFGuard' "$f"; then \
continue; \
elif grep -E 'fetchWithSsrFGuard' "$f" | grep -q 'withTrustedEnvProxyGuardedFetchMode' \
&& ! grep -E 'fetchWithSsrFGuard' "$f" | grep -vq 'withTrustedEnvProxyGuardedFetchMode'; then \
trusted_media_fetch=1; \
else \
echo "ERROR: Patch 1 unreviewed media fetch shape in $f" >&2; \
untrusted_media_fetch=1; \
fi; \
done; \
if [ "$OC_VERSION" != "unknown" ] && [ -z "$strict_refs" ] && [ -n "$trusted_refs" ] && [ "$trusted_media_fetch" = "1" ] && [ "$untrusted_media_fetch" = "0" ]; then \
echo "INFO: OpenClaw ${OC_VERSION} has no withStrictGuardedFetchMode references; Patch 1 not needed"; \
elif [ -z "$trusted_refs" ]; then \
patch_fail "Patch 1 target missing and withTrustedEnvProxyGuardedFetchMode is also absent"; \
else \
echo "ERROR: Patch 1 target missing but the fetch-guard shape is not a reviewed trusted-proxy-only layout:" >&2; \
if [ -n "$strict_refs" ]; then printf '%s\n' "$strict_refs" | head -n 5 >&2; fi; \
patch_fail "Patch 1 cannot safely skip"; \
fi; \
fi; \
# --- Patch 2: neutralize assertExplicitProxyAllowed --- \
fg_assert="$(grep -RIlE --include='*.js' 'async function assertExplicitProxyAllowed' "$OC_DIST" || true)"; \
if [ -n "$fg_assert" ]; then \
patched_assert=0; \
for f in $fg_assert; do \
if grep -q 'process.env.OPENSHELL_SANDBOX === "1"' "$f"; then \
echo "INFO: Patch 2 already present in $f"; \
else \
sed -i -E 's|(async function assertExplicitProxyAllowed\([^)]*\) \{)|\1 if (process.env.OPENSHELL_SANDBOX === "1") return; /* nemoclaw: env-gated bypass, see Dockerfile */ |' "$f"; \
grep -Eq 'assertExplicitProxyAllowed\([^)]*\) \{ if \(process\.env\.OPENSHELL_SANDBOX === "1"\) return; /\* nemoclaw' "$f" \
|| patch_fail "Patch 2 verification failed for $f"; \
patched_assert=1; \
fi; \
done; \
if [ "$patched_assert" = "1" ]; then \
echo "INFO: Patch 2 applied to OpenClaw ${OC_VERSION} explicit proxy validator"; \
fi; \
else \
proxy_hostname_checks="$(grep -RIlE --include='*.js' 'resolvePinnedHostnameWithPolicy' "$OC_DIST" | while IFS= read -r f; do \
if grep -Eq 'parsedProxyUrl|proxyUrl|proxyHostname|proxy.*[Hh]ostname|[Hh]ostname.*proxy|allowPrivateProxy' "$f"; then \
printf '%s\n' "$f"; \
fi; \
done || true)"; \
if [ -z "$proxy_hostname_checks" ]; then \
echo "INFO: OpenClaw ${OC_VERSION} has no assertExplicitProxyAllowed proxy hostname validator; Patch 2 not needed"; \
else \
echo "ERROR: Patch 2 target missing but proxy hostname validation references remain:" >&2; \
printf '%s\n' "$proxy_hostname_checks" | head -n 5 >&2; \
patch_fail "Patch 2 cannot safely skip"; \
fi; \
fi; \
# --- Patch 2b: allow OpenShell host gateway only through web_fetch trusted env proxy --- \
# Reviewed against openclaw@2026.5.22 dist: fetchWithWebToolsNetworkGuard \
# passes useEnvProxy into withTrustedEnvProxyGuardedFetchMode(resolved), and \
# the SSRF guard consumes policy.allowedHostnames to skip private-network \
# checks for an exact normalized hostname. hostnameAllowlist only gates \
# hostname pattern matching and does not bypass .internal/private blocking. \
web_guard_files="$(grep -RIlE --include='*.js' 'function fetchWithWebToolsNetworkGuard\(params\)' "$OC_DIST" || true)"; \
if [ -n "$web_guard_files" ]; then \
patched_host_gateway=0; \
for f in $web_guard_files; do \
if grep -q 'nemoclaw: OpenShell host gateway for web_fetch trusted env proxy' "$f"; then \
echo "INFO: Patch 2b already present in $f"; \
else \
grep -q 'withTrustedEnvProxyGuardedFetchMode(resolved)' "$f" \
|| patch_fail "Patch 2b target $f is missing reviewed trusted env-proxy web_fetch call"; \
sed -i -E 's|return fetchWithSsrFGuard\(useEnvProxy \? withTrustedEnvProxyGuardedFetchMode\(resolved\) : withStrictGuardedFetchMode\(resolved\)\);|const hostGatewayPolicy = process.env.OPENSHELL_SANDBOX === "1" \&\& useEnvProxy \&\& new URL(resolved.url).hostname === "host.openshell.internal" ? { ...resolved.policy, allowedHostnames: [...resolved.policy?.allowedHostnames ?? [], "host.openshell.internal"] } : resolved.policy; return fetchWithSsrFGuard(useEnvProxy ? withTrustedEnvProxyGuardedFetchMode({ ...resolved, policy: hostGatewayPolicy }) : withStrictGuardedFetchMode(resolved)); /* nemoclaw: OpenShell host gateway for web_fetch trusted env proxy, see Dockerfile */|' "$f"; \
grep -Fq 'process.env.OPENSHELL_SANDBOX === "1" && useEnvProxy && new URL(resolved.url).hostname === "host.openshell.internal"' "$f" \
|| patch_fail "Patch 2b verification failed for $f"; \
patched_host_gateway=1; \
fi; \
done; \
if [ "$patched_host_gateway" = "1" ]; then \
echo "INFO: Patch 2b applied to OpenClaw ${OC_VERSION} web_fetch trusted-proxy host-gateway policy"; \
fi; \
else \
web_fetch_proxy_refs="$(grep -RIlE --include='*.js' 'web_fetch|useEnvProxy|useTrustedEnvProxy|withTrustedEnvProxyGuardedFetchMode\(resolved\)' "$OC_DIST" || true)"; \
if [ -z "$web_fetch_proxy_refs" ]; then \
echo "INFO: OpenClaw ${OC_VERSION} has no web_fetch trusted env-proxy callsite; Patch 2b not needed"; \
else \
echo "ERROR: Patch 2b target missing but web_fetch/trusted-proxy references remain:" >&2; \
printf '%s\n' "$web_fetch_proxy_refs" | head -n 5 >&2; \
patch_fail "Patch 2b cannot safely skip"; \
fi; \
fi; \
# --- Patch 3: follow symlinks in plugin-install path checks (#2203) --- \
# OpenClaw's install-safe-path and install-package-dir reject symlinked \
# directories via lstat. Changing lstat → stat in these two modules lets \
# symlinks resolve; the real security gates (realpath + isPathInside \
# containment) remain intact — a symlink escaping the base tree is still caught. \
# Scoped to install-safe-path + install-package-dir only. \
isp_file="$(grep -RIlE --include='*.js' 'const baseLstat = await fs\.(lstat|stat)\(baseDir\)' "$OC_DIST/install-safe-path-"*.js || true)"; \
test -n "$isp_file" || { echo "ERROR: install-safe-path baseLstat pattern not found" >&2; exit 1; }; \
sed -i 's/const baseLstat = await fs\.lstat(baseDir)/const baseLstat = await fs.stat(baseDir)/' "$isp_file"; \
if grep -q 'const baseLstat = await fs\.lstat(baseDir)' "$isp_file"; then echo "ERROR: Patch 3a (install-safe-path) left baseLstat lstat call" >&2; exit 1; fi; \
if ! grep -q 'const baseLstat = await fs\.stat(baseDir)' "$isp_file"; then echo "ERROR: Patch 3a (install-safe-path) did not find patched baseLstat stat call" >&2; exit 1; fi; \
ipd_file="$(grep -RIlE --include='*.js' 'assertInstallBaseStable' "$OC_DIST/install-package-dir-"*.js || true)"; \
test -n "$ipd_file" || { echo "ERROR: install-package-dir assertInstallBaseStable not found" >&2; exit 1; }; \
if grep -q 'const baseLstat = await fs\.lstat(params\.installBaseDir)' "$ipd_file"; then \
sed -i 's/const baseLstat = await fs\.lstat(params\.installBaseDir)/const baseLstat = await fs.stat(params.installBaseDir)/' "$ipd_file"; \
sed -i 's/baseLstat\.isSymbolicLink()/false \/* nemoclaw: symlink check disabled, realpath guards containment *\//' "$ipd_file"; \
if grep -q 'fs\.lstat(params\.installBaseDir)' "$ipd_file"; then echo "ERROR: Patch 3b (install-package-dir) left lstat in assertInstallBaseStable" >&2; exit 1; fi; \
if ! grep -q 'const baseLstat = await fs\.stat(params\.installBaseDir)' "$ipd_file" && ! grep -q 'await fs\.stat(params\.installBaseDir)).isDirectory()' "$ipd_file"; then echo "ERROR: Patch 3b (install-package-dir) did not find patched/safe installBaseDir stat call" >&2; exit 1; fi; \
if grep -q 'baseLstat\.isSymbolicLink()' "$ipd_file"; then echo "ERROR: Patch 3b (install-package-dir) left baseLstat symlink check" >&2; exit 1; fi; \
else \
grep -q 'await fs\.realpath(params\.installBaseDir) !== params\.expectedRealPath' "$ipd_file" || { echo "ERROR: install-package-dir lacks expected realpath stability guard" >&2; exit 1; }; \
fi; \
# --- Patch 5: bump default WS handshake timeout 10s -> 60s (#2484) --- \
# OpenClaw's WS connect handshake has a hard-coded 10s timeout on both \
# client and server. Server-side connect-handler processing can exceed \
# that limit under load (multiple concurrent connects on slow CI infra), \
# causing `openclaw agent --json` to fail with "gateway timeout after \
# <timeout>ms" and TC-SBX-02 to hit its 90s SSH timeout. \
# \
# Both env vars (OPENCLAW_HANDSHAKE_TIMEOUT_MS, \
# OPENCLAW_CONNECT_CHALLENGE_TIMEOUT_MS) are clamped at the same \
# DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS constant, so we patch the \
# constant itself. Affects both client.js (used by openclaw CLI) and \
# server.impl.js (gateway side). \
# \
# Removal criteria: drop when openclaw fixes the underlying connect \
# latency, or exposes the timeout as an unbounded env override. \
hto_files="$(grep -RIlE --include='*.js' 'DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS = (1e4|15e3|6e4)' "$OC_DIST" || true)"; \
test -n "$hto_files" || { echo "ERROR: handshake-timeout constant not found" >&2; exit 1; }; \
printf '%s\n' "$hto_files" | xargs sed -i -E 's#DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS = (1e4|15e3)#DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS = 6e4#g'; \
if grep -REq --include='*.js' 'DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS = (1e4|15e3)' "$OC_DIST"; then echo "ERROR: Patch 5 left a short handshake-timeout constant" >&2; exit 1; fi; \
if ! grep -REq --include='*.js' 'DEFAULT_PREAUTH_HANDSHAKE_TIMEOUT_MS = 6e4' "$OC_DIST"; then echo "ERROR: Patch 5 did not find patched 6e4 constant" >&2; exit 1; fi
# Patch OpenClaw chat.send gateway behavior for OpenClaw 2026.5.x.
#
# OpenClaw can accept rapid TUI/WebChat chat.send requests and then emit a
# terminal chat event with state="final" but no assistant message for the later
# submitted run. That makes clients treat the turn as complete even though no
# visible reply was delivered. The shim also correlates real agent run IDs back
# to the submitted chat.send run ID when OpenClaw starts an internal run with a
# different ID, carries that submitted ID through queued follow-up turns, and
# adds the submitted run ID as the transcript idempotency key.
#
# Removal criteria: drop when upstream OpenClaw fixes openclaw/openclaw#70164
# and openclaw/openclaw#50298, or when NemoClaw no longer ships OpenClaw 2026.5.x.
# hadolint ignore=DL3059
RUN node /usr/local/lib/nemoclaw/patch-openclaw-chat-send.js \
/usr/local/lib/node_modules/openclaw/dist
# Patch OpenClaw's pinned 2026.5.22 compiled selection runtime to expose a
# compact searchable tool catalog to the model while preserving the full
# effective tool set behind tool_call. NEMOCLAW_TOOL_CATALOG=0 disables this
# wrapper if an emergency rollback is needed. The script fails closed if the
# pinned selection-*.js shape changes.
# hadolint ignore=DL3059
RUN node /usr/local/lib/nemoclaw/patch-openclaw-tool-catalog.js \
/usr/local/lib/node_modules/openclaw/dist
# Set up blueprint for local resolution.
# Blueprints are immutable at runtime; DAC protection (root ownership) is applied
# later since /sandbox/.nemoclaw is Landlock read_write for plugin state (#804).
RUN mkdir -p /sandbox/.nemoclaw/blueprints/0.1.0 \
&& cp -r /opt/nemoclaw-blueprint/* /sandbox/.nemoclaw/blueprints/0.1.0/
# Copy startup script and shared sandbox initialisation library
COPY scripts/lib/sandbox-init.sh /usr/local/lib/nemoclaw/sandbox-init.sh
COPY scripts/nemoclaw-start.sh /usr/local/bin/nemoclaw-start
# Copy NODE_OPTIONS preload modules to a Landlock-accessible path. OpenShell ≥0.0.36
# blocks /opt/nemoclaw-blueprint/ from non-root users, but the entrypoint
# needs to read these files to install runtime preloads under /tmp.
COPY nemoclaw-blueprint/scripts/*.js /usr/local/lib/nemoclaw/preloads/
COPY scripts/codex-acp-wrapper.sh /usr/local/bin/nemoclaw-codex-acp
COPY scripts/generate-openclaw-config.py /usr/local/lib/nemoclaw/generate-openclaw-config.py
COPY scripts/openclaw-build-messaging-plugins.py /usr/local/lib/nemoclaw/openclaw-build-messaging-plugins.py
COPY scripts/seed-wechat-accounts.py /usr/local/lib/nemoclaw/seed-wechat-accounts.py
COPY nemoclaw-blueprint/openclaw-plugins/ /usr/local/share/nemoclaw/openclaw-plugins/
RUN chmod 755 /usr/local/bin/nemoclaw-start /usr/local/bin/nemoclaw-codex-acp \
/usr/local/lib/nemoclaw/sandbox-init.sh \
/usr/local/lib/nemoclaw/generate-openclaw-config.py \
/usr/local/lib/nemoclaw/openclaw-build-messaging-plugins.py \
/usr/local/lib/nemoclaw/seed-wechat-accounts.py \
&& if [ -d /usr/local/lib/nemoclaw/preloads ]; then find /usr/local/lib/nemoclaw/preloads -type f -name '*.js' -exec chmod 644 {} +; fi \
&& chmod 755 /usr/local/share/nemoclaw \
/usr/local/share/nemoclaw/openclaw-plugins \
&& find /usr/local/share/nemoclaw/openclaw-plugins -type d -exec chmod 755 {} + \
&& find /usr/local/share/nemoclaw/openclaw-plugins -type f -exec chmod 644 {} +
# Build args for config that varies per deployment.
# nemoclaw onboard passes these at image build time.
ARG NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b
ARG NEMOCLAW_PROVIDER_KEY=inference
ARG NEMOCLAW_PRIMARY_MODEL_REF=inference/nvidia/nemotron-3-super-120b-a12b
# Default dashboard port 18789 — override at runtime via NEMOCLAW_DASHBOARD_PORT.
ARG CHAT_UI_URL=http://127.0.0.1:18789
ARG NEMOCLAW_INFERENCE_BASE_URL=https://inference.local/v1
ARG NEMOCLAW_INFERENCE_API=openai-completions
ARG NEMOCLAW_CONTEXT_WINDOW=131072
ARG NEMOCLAW_MAX_TOKENS=4096
ARG NEMOCLAW_REASONING=false
# Comma-separated list of input modalities accepted by the primary model
# (e.g. "text" or "text,image" for vision-capable models). OpenClaw's
# model schema currently accepts "text" and "image". See #2421.
ARG NEMOCLAW_INFERENCE_INPUTS=text
# Per-request inference timeout (seconds) baked into agents.defaults.timeoutSeconds.
# Increase for slow local inference (e.g., CPU Ollama). openclaw.json is
# immutable at runtime (Landlock read-only), so this can only be changed by
# rebuilding via `nemoclaw onboard`. Ref: issue #2281
ARG NEMOCLAW_AGENT_TIMEOUT=600
# Cadence for OpenClaw's periodic heartbeat
# (agents.defaults.heartbeat.every). Accepts Go-style durations like "30m",
# "5m", "1h"; "0m" disables heartbeat. Empty default preserves the OpenClaw
# built-in cadence. openclaw.json is immutable at runtime, so this can only
# change at image build time. Ref: issue #2880
ARG NEMOCLAW_AGENT_HEARTBEAT_EVERY=
ARG NEMOCLAW_INFERENCE_COMPAT_B64=e30=
# Base64-encoded JSON list of messaging channel names to pre-configure
# (e.g. ["discord","telegram"]). Channels are added with placeholder tokens
# so the L7 proxy can rewrite them at egress. Default: empty list.
ARG NEMOCLAW_MESSAGING_CHANNELS_B64=W10=
# Base64-encoded JSON map of channel→allowed sender IDs for DM allowlisting
# (e.g. {"telegram":["123456789"]}). Channels with IDs get dmPolicy=allowlist.
# Slack also uses those IDs for channel @mention allowlisting. Channels without
# IDs keep the OpenClaw default (pairing). Default: empty map.
ARG NEMOCLAW_MESSAGING_ALLOWED_IDS_B64=e30=
# Base64-encoded JSON map of Discord guild configs keyed by server ID
# (e.g. {"1234567890":{"requireMention":true,"users":["555"]}}).
# Used to enable guild-channel responses for native Discord. Default: empty map.
ARG NEMOCLAW_DISCORD_GUILDS_B64=e30=
# Base64-encoded JSON Telegram config (e.g. {"requireMention":true}).
# When requireMention is true, Telegram groups get groups: {"*": {"requireMention": true}}
# with groupPolicy: open. See #1737, #3022. Default: empty map.
ARG NEMOCLAW_TELEGRAM_CONFIG_B64=e30=
# Base64-encoded JSON WeChat config (e.g.
# {"accountId":"…","baseUrl":"https://…","userId":"…"}).
# Captured by the host-side iLink QR login during onboard. Non-secret per-account
# metadata only — the bot token flows through the OpenShell provider, never
# baked into the image. Default: empty map.
ARG NEMOCLAW_WECHAT_CONFIG_B64=e30=
# Base64-encoded JSON Slack config (e.g.
# {"allowedChannels":["C012AB3CD","C987ZY6XW"]}).
# Channel IDs scope Slack channel @mention handling. User allowlists still come
# from NEMOCLAW_MESSAGING_ALLOWED_IDS_B64. Default: empty map.
ARG NEMOCLAW_SLACK_CONFIG_B64=e30=
# Set to "1" to force-disable device-pairing auth. Also auto-disabled when
# CHAT_UI_URL is a non-loopback address (Brev Launchable, remote deployments)
# since terminal-based pairing is impossible in those contexts.
# Default: "0" (device auth enabled for local deployments — secure by default).
ARG NEMOCLAW_DISABLE_DEVICE_AUTH=0
# Unique per build — busts the Docker cache for the token-injection layer
# so each image gets a fresh gateway auth token.
# Pass --build-arg NEMOCLAW_BUILD_ID=$(date +%s) to bust the cache.
ARG NEMOCLAW_BUILD_ID=default
# macOS OpenShell VM backend imports the Docker image into a virtiofs rootfs
# where image uid/gid ownership is presented as the host user. The VM also
# starts NemoClaw as the non-root sandbox user, so uid-owned 770/660 paths
# become unreadable unless this Darwin-only compatibility mode is enabled.
ARG NEMOCLAW_DARWIN_VM_COMPAT=0
# Sandbox egress proxy host/port. Defaults match the OpenShell-injected
# gateway (10.200.0.1:3128). Operators on non-default networks can override
# at sandbox creation time by exporting NEMOCLAW_PROXY_HOST / NEMOCLAW_PROXY_PORT
# before running `nemoclaw onboard`. See #1409.
ARG NEMOCLAW_PROXY_HOST=10.200.0.1
ARG NEMOCLAW_PROXY_PORT=3128
# Non-secret flag: set to "1" when the user configured Brave Search during
# onboard. Controls whether the web search block is written to openclaw.json.
# The actual API key is injected at runtime via openshell:resolve:env, never
# baked into the image.
ARG NEMOCLAW_WEB_SEARCH_ENABLED=0
# SECURITY: Promote build-args to env vars so the Python script reads them
# via os.environ, never via string interpolation into Python source code.
# Direct ARG interpolation into python3 -c is a code injection vector (C-2).
ENV NEMOCLAW_MODEL=${NEMOCLAW_MODEL} \
NEMOCLAW_PROVIDER_KEY=${NEMOCLAW_PROVIDER_KEY} \
NEMOCLAW_PRIMARY_MODEL_REF=${NEMOCLAW_PRIMARY_MODEL_REF} \
CHAT_UI_URL=${CHAT_UI_URL} \
NEMOCLAW_INFERENCE_BASE_URL=${NEMOCLAW_INFERENCE_BASE_URL} \
NEMOCLAW_INFERENCE_API=${NEMOCLAW_INFERENCE_API} \
NEMOCLAW_CONTEXT_WINDOW=${NEMOCLAW_CONTEXT_WINDOW} \
NEMOCLAW_MAX_TOKENS=${NEMOCLAW_MAX_TOKENS} \
NEMOCLAW_REASONING=${NEMOCLAW_REASONING} \
NEMOCLAW_INFERENCE_INPUTS=${NEMOCLAW_INFERENCE_INPUTS} \
NEMOCLAW_AGENT_TIMEOUT=${NEMOCLAW_AGENT_TIMEOUT} \
NEMOCLAW_AGENT_HEARTBEAT_EVERY=${NEMOCLAW_AGENT_HEARTBEAT_EVERY} \
NEMOCLAW_INFERENCE_COMPAT_B64=${NEMOCLAW_INFERENCE_COMPAT_B64} \
NEMOCLAW_MESSAGING_CHANNELS_B64=${NEMOCLAW_MESSAGING_CHANNELS_B64} \
NEMOCLAW_MESSAGING_ALLOWED_IDS_B64=${NEMOCLAW_MESSAGING_ALLOWED_IDS_B64} \
NEMOCLAW_DISCORD_GUILDS_B64=${NEMOCLAW_DISCORD_GUILDS_B64} \
NEMOCLAW_TELEGRAM_CONFIG_B64=${NEMOCLAW_TELEGRAM_CONFIG_B64} \
NEMOCLAW_WECHAT_CONFIG_B64=${NEMOCLAW_WECHAT_CONFIG_B64} \
NEMOCLAW_SLACK_CONFIG_B64=${NEMOCLAW_SLACK_CONFIG_B64} \
NEMOCLAW_OPENCLAW_WECHAT_PLUGIN_PREINSTALLED=1 \
NEMOCLAW_DISABLE_DEVICE_AUTH=${NEMOCLAW_DISABLE_DEVICE_AUTH} \
NEMOCLAW_PROXY_HOST=${NEMOCLAW_PROXY_HOST} \
NEMOCLAW_PROXY_PORT=${NEMOCLAW_PROXY_PORT} \
NEMOCLAW_WEB_SEARCH_ENABLED=${NEMOCLAW_WEB_SEARCH_ENABLED}
WORKDIR /sandbox
USER sandbox
# Write openclaw.json with gateway config but WITHOUT the real auth token.
# The gateway auth token is generated at container startup by the entrypoint
# and passed via OPENCLAW_GATEWAY_TOKEN env var only to the gateway process
# (running as 'gateway' user). The token file location depends on startup mode:
# Root mode: /run/nemoclaw/gateway-token (gateway:gateway 0400)
# Non-root mode: $XDG_RUNTIME_DIR/nemoclaw/gateway-token (sandbox:sandbox 0400)
# See: scripts/nemoclaw-start.sh generate_gateway_token()
#
# Config is mutable by default (group-writable sandbox:sandbox). Immutability
# is opt-in via `shields up` (DAC 444 root:root + chattr +i).
# Build args (NEMOCLAW_MODEL, CHAT_UI_URL) customize per deployment.
#
# Generate openclaw.json from environment variables. Config generation logic
# lives in scripts/generate-openclaw-config.py — see that file for the full
# list of env vars and derivation rules.
#
# OpenClaw's managed proxy config activates process-wide HTTP_PROXY/HTTPS_PROXY
# for child npm processes. During image build the OpenShell gateway is not
# available at the runtime sandbox proxy address yet, so defer the final proxy
# block until after build-time OpenClaw doctor/plugin commands complete.
RUN NEMOCLAW_OPENCLAW_MANAGED_PROXY=0 python3 /usr/local/lib/nemoclaw/generate-openclaw-config.py
# hadolint ignore=DL3059,DL4006
RUN python3 /usr/local/lib/nemoclaw/openclaw-build-messaging-plugins.py
# Lock down npm: no further registry traffic in this image. Everything past
# this point must resolve from local sources only.
ENV NPM_CONFIG_OFFLINE=true \
NPM_CONFIG_AUDIT=false \
NPM_CONFIG_FUND=false
# Install NemoClaw plugin into OpenClaw (local /opt/nemoclaw, no network).
# This must fail the image build if registration fails; otherwise the sandbox
# can boot with a discoverable plugin manifest but without the /nemoclaw runtime
# command registered in the active Gateway.
# Re-apply WeChat account seeding after OpenClaw doctor/plugin-install touches
# openclaw.json; the seed script no-ops unless WeChat is actively configured.
# Prune non-runtime metadata from staged bundled plugin dependencies before
# this layer is committed; deleting it in a later layer would not reduce the
# OCI image imported by k3s.
# hadolint ignore=DL3059,DL4006
RUN openclaw plugins install /opt/nemoclaw \
&& openclaw plugins enable nemoclaw \
&& openclaw plugins inspect nemoclaw --json > /dev/null \
&& python3 /usr/local/lib/nemoclaw/seed-wechat-accounts.py \
&& if [ -d /sandbox/.openclaw/plugin-runtime-deps ]; then \
find /sandbox/.openclaw/plugin-runtime-deps -type f \( \
-name '*.d.ts' -o -name '*.d.mts' -o -name '*.d.cts' -o \
-name '*.map' -o -name '*.tsbuildinfo' \
\) -delete; \
find /sandbox/.openclaw/plugin-runtime-deps -type d \( \
-name __tests__ -o -name test -o -name tests -o -name docs -o \
-name examples \
\) -prune -exec rm -rf {} +; \
fi
# SECURITY: Clear any gateway auth token that openclaw doctor/plugins may have
# auto-generated. The real token is created at container startup by the
# entrypoint (generate_gateway_token) and never stored in openclaw.json.
# Also add the final OpenClaw managed proxy config after build-time OpenClaw
# commands are done, so runtime Discord/WebSocket traffic uses the OpenShell
# gateway proxy without forcing image-build npm traffic through that proxy.
RUN python3 -c "\
import json, os; \
path = os.path.expanduser('~/.openclaw/openclaw.json'); \
cfg = json.load(open(path)); \
cfg.setdefault('gateway', {}).setdefault('auth', {})['token'] = ''; \
proxy_host = os.environ.get('NEMOCLAW_PROXY_HOST') or '10.200.0.1'; \
proxy_port = os.environ.get('NEMOCLAW_PROXY_PORT') or '3128'; \
cfg['proxy'] = { \
'enabled': True, \
'proxyUrl': f'http://{proxy_host}:{proxy_port}', \
'loopbackMode': 'proxy', \
}; \
json.dump(cfg, open(path, 'w'), indent=2); \
os.chmod(path, 0o600)"
# Flatten stale published base images that still contain the old
# .openclaw-data symlink bridge. OpenShell starts the sandbox as the sandbox
# user, so runtime migration cannot rely on root privileges inside the pod.
# Doing this in the image build guarantees new PR images have only the unified
# .openclaw layout even when sandbox-base:latest has not been rebuilt yet.
# hadolint ignore=DL3002
USER root
# hadolint ignore=DL4006
RUN set -eu; \
config_dir=/sandbox/.openclaw; \
data_dir=/sandbox/.openclaw-data; \
legacy_layout=0; \
legacy_marker=/tmp/nemoclaw-legacy-openclaw-layout; \
rm -f "$legacy_marker"; \
mkdir -p "$config_dir"; \
if [ -L "$data_dir" ]; then \
echo "ERROR: refusing legacy layout cleanup because $data_dir is a symlink" >&2; \
exit 1; \
fi; \
if [ -d "$data_dir" ]; then \
legacy_layout=1; \
for entry in "$data_dir"/*; do \
[ -e "$entry" ] || [ -L "$entry" ] || continue; \
if [ -L "$entry" ]; then \
echo "ERROR: refusing legacy layout cleanup because $entry is a symlink" >&2; \
exit 1; \
fi; \
name="$(basename "$entry")"; \
target="$config_dir/$name"; \
if [ -L "$target" ]; then \
rm -f "$target"; \
fi; \
if [ -d "$entry" ]; then \
mkdir -p "$target"; \
cp -a "$entry"/. "$target"/; \
elif [ ! -e "$target" ]; then \
cp -a "$entry" "$target"; \
fi; \
done; \
data_real="$(readlink -f "$data_dir" 2>/dev/null || printf '%s' "$data_dir")"; \
while :; do \
replaced_marker="$(mktemp)"; \
rm -f "$replaced_marker"; \
find "$config_dir" -type l -print | while IFS= read -r link; do \
raw_target="$(readlink "$link" 2>/dev/null || true)"; \
resolved_target="$(readlink -f "$link" 2>/dev/null || true)"; \
legacy_target=0; \
case "$raw_target" in "$data_real"/* | "$data_dir"/*) legacy_target=1 ;; esac; \
case "$resolved_target" in "$data_real"/* | "$data_dir"/*) legacy_target=1 ;; esac; \
if [ "$legacy_target" -eq 1 ]; then \
copy_target="$resolved_target"; \
if [ -z "$copy_target" ] || { [ ! -e "$copy_target" ] && [ ! -L "$copy_target" ]; }; then \
copy_target="$raw_target"; \
fi; \
if [ -d "$copy_target" ] && [ ! -L "$copy_target" ]; then \
rm -f "$link"; \
mkdir -p "$link"; \
cp -a "$copy_target"/. "$link"/; \
elif [ -e "$copy_target" ] || [ -L "$copy_target" ]; then \
rm -f "$link"; \
cp -a "$copy_target" "$link"; \
else \
echo "ERROR: legacy symlink target missing: $link -> ${raw_target:-$resolved_target}" >&2; \
exit 1; \
fi; \
: > "$replaced_marker"; \
fi; \
done; \
if [ ! -e "$replaced_marker" ]; then \
rm -f "$replaced_marker"; \
break; \
fi; \
rm -f "$replaced_marker"; \
done; \
rm -rf "$data_dir"; \
fi; \
if [ -e "$data_dir" ] || [ -L "$data_dir" ]; then \
echo "ERROR: legacy data dir still exists after cleanup: $data_dir" >&2; \
exit 1; \
fi; \
if [ "$legacy_layout" = "1" ]; then \
data_real="$(readlink -f "$data_dir" 2>/dev/null || printf '%s' "$data_dir")"; \
find "$config_dir" -type l -print | while IFS= read -r link; do \
raw_target="$(readlink "$link" 2>/dev/null || true)"; \
resolved_target="$(readlink -f "$link" 2>/dev/null || true)"; \
case "$raw_target" in \
"$data_real"/* | "$data_dir"/*) \
echo "ERROR: legacy symlink remains after cleanup: $link -> $raw_target" >&2; \
exit 1; \
;; \
esac; \
case "$resolved_target" in \
"$data_real"/* | "$data_dir"/*) \
echo "ERROR: legacy symlink remains after cleanup: $link -> $resolved_target" >&2; \
exit 1; \
;; \
esac; \
done; \
: > "$legacy_marker"; \
fi; \
for dir in \
"$config_dir/agents/main/agent" \
"$config_dir/extensions" \
"$config_dir/workspace" \
"$config_dir/skills" \
"$config_dir/hooks" \
"$config_dir/identity" \
"$config_dir/devices" \
"$config_dir/canvas" \
"$config_dir/cron" \
"$config_dir/memory" \
"$config_dir/logs" \
"$config_dir/credentials" \
"$config_dir/flows" \
"$config_dir/sandbox" \
"$config_dir/telegram" \
"$config_dir/wechat" \
"$config_dir/media" \
"$config_dir/plugin-runtime-deps"; do \
install -d -o sandbox -g sandbox -m 2770 "$dir"; \
done; \
for file in "$config_dir/update-check.json" "$config_dir/exec-approvals.json"; do \
touch "$file"; \
chown sandbox:sandbox "$file"; \
chmod 660 "$file"; \
done; \
rm -rf /root/.npm /sandbox/.npm
# Stale-base fallback for the gateway-in-sandbox-group setup (#2681).
# Newer base images already add the gateway user to the sandbox group, but
# the derived image must remain build-clean against older sandbox-base:latest
# tags too. The `id -nG` check makes this idempotent.
# hadolint ignore=DL4006
RUN if id gateway >/dev/null 2>&1 && id sandbox >/dev/null 2>&1; then \
if ! id -nG gateway | tr ' ' '\n' | grep -qx sandbox; then \
usermod -aG sandbox gateway; \
fi; \
fi
# Keep the image readable to the root entrypoint after capabilities are dropped.
# Current base images already have a unified .openclaw tree. Avoid walking
# plugin-runtime-deps on every build; only fall back to the broad repair when
# the stale .openclaw-data migration path actually ran.
RUN set -eu; \
if [ -e /tmp/nemoclaw-legacy-openclaw-layout ]; then \
chown -R sandbox:sandbox /sandbox/.openclaw; \
chmod -R g+rwX,o-rwx /sandbox/.openclaw; \
find /sandbox/.openclaw -type d -exec chmod g+s {} +; \
rm -f /tmp/nemoclaw-legacy-openclaw-layout; \
else \
chown sandbox:sandbox \
/sandbox/.openclaw \
/sandbox/.openclaw/openclaw.json \
/sandbox/.openclaw/plugin-runtime-deps; \
chmod 2770 /sandbox/.openclaw /sandbox/.openclaw/plugin-runtime-deps; \
chmod 660 /sandbox/.openclaw/openclaw.json; \
fi
# System-wide proxy hooks for shells where ~/.bashrc / ~/.profile aren't
# sourced (e.g. `bash -ic` / `bash -lc` invoked under a different user or
# without HOME=/sandbox). Defined in Dockerfile.base; replayed here so the
# fix applies before the GHCR base image catches up. Idempotent — `mv` of
# a freshly-rebuilt /etc/bash.bashrc is harmless once the base layer
# already includes the prepended hook (the cat | mv block just rewrites
# with the same first line).
# Ref: https://github.com/NVIDIA/NemoClaw/issues/2704
# hadolint ignore=SC2028,DL4006
RUN if ! grep -q "/tmp/nemoclaw-proxy-env.sh" /etc/profile.d/nemoclaw-proxy.sh 2>/dev/null; then \
printf '%s\n' \
'# NemoClaw runtime proxy config — see /tmp/nemoclaw-proxy-env.sh (#2704)' \
'[ -f /tmp/nemoclaw-proxy-env.sh ] && . /tmp/nemoclaw-proxy-env.sh' \
> /etc/profile.d/nemoclaw-proxy.sh \
&& chmod 444 /etc/profile.d/nemoclaw-proxy.sh; \
fi \
&& if ! head -2 /etc/bash.bashrc | grep -q "/tmp/nemoclaw-proxy-env.sh"; then \
chmod 644 /etc/bash.bashrc 2>/dev/null || true; \
{ printf '%s\n' \
'# NemoClaw runtime proxy config — see /tmp/nemoclaw-proxy-env.sh (#2704)' \
'[ -f /tmp/nemoclaw-proxy-env.sh ] && . /tmp/nemoclaw-proxy-env.sh' \
''; \
cat /etc/bash.bashrc; \
} > /etc/bash.bashrc.new \
&& mv /etc/bash.bashrc.new /etc/bash.bashrc \
&& chmod 444 /etc/bash.bashrc; \
fi
# Pin config hash at build time so the entrypoint can verify integrity.
RUN sha256sum /sandbox/.openclaw/openclaw.json > /sandbox/.openclaw/.config-hash \
&& chmod 660 /sandbox/.openclaw/.config-hash \
&& chown sandbox:sandbox /sandbox/.openclaw/.config-hash
# DAC-protect .nemoclaw directory: /sandbox/.nemoclaw is Landlock read_write
# (for plugin state/config), but the parent and blueprints are immutable at
# runtime. Root ownership on the parent prevents the agent from renaming or
# replacing the root-owned blueprints directory. Only state/, migration/,
# snapshots/, and config.json are sandbox-owned for runtime writes.
# Sticky bit (1755): OpenShell's prepare_filesystem() chowns read_write paths
# to run_as_user at sandbox start, flipping this dir to sandbox:sandbox.
# The sticky bit survives the chown and prevents the sandbox user from
# renaming or deleting root-owned entries (blueprints/).
# Ref: https://github.com/NVIDIA/NemoClaw/issues/804
# Ref: https://github.com/NVIDIA/NemoClaw/issues/1607
RUN chown root:root /sandbox/.nemoclaw \
&& chmod 1755 /sandbox/.nemoclaw \
&& chown -R root:root /sandbox/.nemoclaw/blueprints \
&& chmod -R 755 /sandbox/.nemoclaw/blueprints \
&& mkdir -p /sandbox/.nemoclaw/state /sandbox/.nemoclaw/migration /sandbox/.nemoclaw/snapshots /sandbox/.nemoclaw/staging \
&& chown sandbox:sandbox /sandbox/.nemoclaw/state /sandbox/.nemoclaw/migration /sandbox/.nemoclaw/snapshots /sandbox/.nemoclaw/staging \
&& printf '%s' '{}' > /sandbox/.nemoclaw/config.json \
&& chown sandbox:sandbox /sandbox/.nemoclaw/config.json
# OpenShell 0.0.37's macOS VM backend currently remaps rootfs ownership to the
# host uid/gid inside the guest, while the entrypoint runs as non-root sandbox.
# Enable this only for Darwin VM builds so Linux Docker-driver sandboxes keep
# the tighter group-only mutable-default permissions.
RUN if [ "$NEMOCLAW_DARWIN_VM_COMPAT" = "1" ]; then \
chmod -R a+rwX /sandbox/.openclaw; \
find /sandbox/.openclaw -type d -exec chmod a+rwx {} +; \
chmod a+rw /sandbox/.openclaw/openclaw.json /sandbox/.openclaw/.config-hash; \
for p in /sandbox/.nemoclaw/state /sandbox/.nemoclaw/migration /sandbox/.nemoclaw/snapshots /sandbox/.nemoclaw/staging; do \
chmod -R a+rwX "$p"; \
find "$p" -type d -exec chmod a+rwx {} +; \
done; \
chmod a+rw /sandbox/.nemoclaw/config.json; \
fi
# Health check: poll the gateway's /health endpoint so Docker (and Compose)
# can detect and restart unhealthy containers in standalone deployments.
# Ref: https://github.com/NVIDIA/NemoClaw/issues/1430
#
# Two-stage probe so Docker health does not contradict the NemoClaw delivery
# chain on runtimes where the dashboard port lives in a different network
# namespace (e.g. DGX Spark / aarch64 with OpenShell-managed forwarding).
# The reporter saw `nemoclaw status` Ready + the host forward succeed while
# Docker marked the container unhealthy because the in-container curl could
# not see the dashboard listener. See #3975.
#
# 1. Direct in-container probe (HTTP 200) — definitive when it works,
# preserves the original Compose/standalone health signal.
# 2. ONLY on curl exit 7 ("Couldn't connect"), fall back to verifying
# ALL of:
# - the OpenClaw gateway process started by nemoclaw-start is
# still alive (via pgrep --ignore-ancestors), AND
# - the gateway log file exists and is non-empty (proves the
# process started and emitted its banner; rules out cases
# where the gateway never came up).
# Exit 7 means the in-container TCP connect was refused by the
# kernel because nothing is bound to the dashboard port inside
# this network namespace — the namespace-mismatch shape reported
# in #3975. A connect timeout (curl exit 28) is treated as a real
# failure: a listener exists but is not responding (wedged HTTP
# server), and we want Docker to restart in that case.
#
# Tradeoff: this fallback also fires in a standalone deployment where the
# gateway process is alive but the configured dashboard port is wrong or
# the listener never came up. We accept that residual risk because it
# requires a misconfiguration the start-period (45s) already gives the
# wizard a chance to fix, and the existing host-side delivery chain
# probes (verify-deployment.ts, host port forward, sandbox status) still
# catch it from outside.
#
# The process pattern matches both `openclaw gateway run` (the launcher
# command nemoclaw-start runs) and `openclaw-gateway` (the re-execed
# binary form OpenClaw switches into after startup). This is the same
# variant set the host-side gateway-stop script in services.ts matches.
#
# pgrep uses --ignore-ancestors so it cannot self-match the healthcheck
# shell that Docker spawns to run this CMD — that shell's argv contains
# the literal 'openclaw gateway' string we're searching for, and
# without --ignore-ancestors `pgrep -f` would happily report it as the
# live gateway even after the real process exited (procps 4.0+ supports
# this flag; the base image pins procps to 2:4.0.4-9).
#
# We deliberately do not fall back on HTTP 4xx/5xx — those mean the gateway
# answered with a failure inside this container, which is a real bad signal.
HEALTHCHECK --interval=30s --timeout=5s --start-period=45s --retries=3 \
CMD port="${NEMOCLAW_DASHBOARD_PORT:-${OPENCLAW_GATEWAY_PORT:-}}"; \
if [ -z "$port" ]; then \
port="$(python3 -c 'import os; from urllib.parse import urlparse; raw = os.environ.get("CHAT_UI_URL") or "http://127.0.0.1:18789"; raw = raw if "://" in raw else "http://" + raw; u = urlparse(raw); print(u.port or 18789)' 2>/dev/null || printf '18789')"; \
fi; \
rc=0; \
curl -sf --max-time 3 "http://127.0.0.1:${port}/health" > /dev/null 2>&1 || rc=$?; \
if [ "$rc" = 0 ]; then exit 0; fi; \
if [ "$rc" != 7 ]; then exit 1; fi; \
pgrep --ignore-ancestors -f 'openclaw[ -]gateway' > /dev/null 2>&1 || exit 1; \
[ -s /tmp/gateway.log ]
# Entrypoint runs as root to start the gateway as the gateway user,
# then drops to sandbox for agent commands. See nemoclaw-start.sh.
ENTRYPOINT ["/usr/local/bin/nemoclaw-start"]
CMD ["/bin/bash"]