diff --git a/safetytooling/apis/inference/openai/base.py b/safetytooling/apis/inference/openai/base.py
index e94dc4c..860ec0e 100644
--- a/safetytooling/apis/inference/openai/base.py
+++ b/safetytooling/apis/inference/openai/base.py
@@ -1,5 +1,6 @@
 import asyncio
 import logging
+import os
 import time
 from pathlib import Path
 from traceback import format_exc
@@ -69,7 +70,14 @@ def __init__(
         if openai_api_key:
             self.aclient = openai.AsyncClient(api_key=openai_api_key, base_url=self.base_url)
         else:
-            self.aclient = openai.AsyncClient(base_url=self.base_url)
+            # openai v1+ requires api_key at construction time — it will raise OpenAIError
+            # if OPENAI_API_KEY is not set, even when only non-OpenAI models are used.
+            # Fall back to a sentinel so the client initialises cleanly; actual OpenAI
+            # calls will fail with a 401 if a real key is not set in the environment.
+            self.aclient = openai.AsyncClient(
+                api_key=os.environ.get("OPENAI_API_KEY", "not-configured"),
+                base_url=self.base_url,
+            )
         self.openai_api_key = openai_api_key
 
         self.token_capacity = dict()
diff --git a/safetytooling/apis/inference/openai/embedding.py b/safetytooling/apis/inference/openai/embedding.py
index 905956d..ddda1d5 100644
--- a/safetytooling/apis/inference/openai/embedding.py
+++ b/safetytooling/apis/inference/openai/embedding.py
@@ -1,5 +1,6 @@
 import asyncio
 import logging
+import os
 import time
 import traceback
 
@@ -20,7 +21,9 @@ def __init__(self, batch_size: int = 2048):
         self.num_threads = 1
         self.batch_size = batch_size  # Max batch size for embedding endpoint
 
-        self.aclient = openai.AsyncClient()
+        # openai v1+ requires api_key at construction; fall back to sentinel so
+        # the client initialises even when OPENAI_API_KEY is not set in the environment.
+        self.aclient = openai.AsyncClient(api_key=os.environ.get("OPENAI_API_KEY", "not-configured"))
         self.available_requests = asyncio.BoundedSemaphore(self.num_threads)
 
     async def embed(
diff --git a/safetytooling/apis/inference/openai/moderation.py b/safetytooling/apis/inference/openai/moderation.py
index b800464..7da9b72 100644
--- a/safetytooling/apis/inference/openai/moderation.py
+++ b/safetytooling/apis/inference/openai/moderation.py
@@ -1,5 +1,6 @@
 import asyncio
 import logging
+import os
 import time
 from traceback import format_exc
 from typing import Awaitable
@@ -27,7 +28,9 @@ def __init__(
         self.num_threads = num_threads
         self._batch_size = 32  # Max batch size for moderation endpoint
 
-        self.aclient = openai.AsyncClient()
+        # openai v1+ requires api_key at construction; fall back to sentinel so
+        # the client initialises even when OPENAI_API_KEY is not set in the environment.
+        self.aclient = openai.AsyncClient(api_key=os.environ.get("OPENAI_API_KEY", "not-configured"))
         self.available_requests = asyncio.BoundedSemaphore(self.num_threads)
 
     async def _single_moderation_request(
diff --git a/safetytooling/apis/inference/openai/s2s.py b/safetytooling/apis/inference/openai/s2s.py
index 22118f9..8707bef 100644
--- a/safetytooling/apis/inference/openai/s2s.py
+++ b/safetytooling/apis/inference/openai/s2s.py
@@ -46,7 +46,10 @@ async def acquire(self):
 
 class OpenAIS2SModel(InferenceAPIModel):
     def __init__(self):
-        self.api_key = os.environ["OPENAI_API_KEY"]
+        # Use .get() with a sentinel so the model initialises even when OPENAI_API_KEY
+        # is not set; actual S2S WebSocket connections will fail at call time if no
+        # real key is provided.
+        self.api_key = os.environ.get("OPENAI_API_KEY", "not-configured")
         self.base_url = "wss://api.openai.com/v1/realtime"
         self.model = "gpt-4o-realtime-preview-2024-10-01"
         self.max_size = 10 * 1024 * 1024  # 10MB