diff --git a/safetytooling/apis/inference/openai/base.py b/safetytooling/apis/inference/openai/base.py index e94dc4c..860ec0e 100644 --- a/safetytooling/apis/inference/openai/base.py +++ b/safetytooling/apis/inference/openai/base.py @@ -1,5 +1,6 @@ import asyncio import logging +import os import time from pathlib import Path from traceback import format_exc @@ -69,7 +70,14 @@ def __init__( if openai_api_key: self.aclient = openai.AsyncClient(api_key=openai_api_key, base_url=self.base_url) else: - self.aclient = openai.AsyncClient(base_url=self.base_url) + # openai v1+ requires api_key at construction time — it will raise OpenAIError + # if OPENAI_API_KEY is not set, even when only non-OpenAI models are used. + # Fall back to a sentinel so the client initialises cleanly; actual OpenAI + # calls will fail with a 401 if a real key is not set in the environment. + self.aclient = openai.AsyncClient( + api_key=os.environ.get("OPENAI_API_KEY", "not-configured"), + base_url=self.base_url, + ) self.openai_api_key = openai_api_key self.token_capacity = dict() diff --git a/safetytooling/apis/inference/openai/embedding.py b/safetytooling/apis/inference/openai/embedding.py index 905956d..ddda1d5 100644 --- a/safetytooling/apis/inference/openai/embedding.py +++ b/safetytooling/apis/inference/openai/embedding.py @@ -1,5 +1,6 @@ import asyncio import logging +import os import time import traceback @@ -20,7 +21,9 @@ def __init__(self, batch_size: int = 2048): self.num_threads = 1 self.batch_size = batch_size # Max batch size for embedding endpoint - self.aclient = openai.AsyncClient() + # openai v1+ requires api_key at construction; fall back to sentinel so + # the client initialises even when OPENAI_API_KEY is not set in the environment. + self.aclient = openai.AsyncClient(api_key=os.environ.get("OPENAI_API_KEY", "not-configured")) self.available_requests = asyncio.BoundedSemaphore(self.num_threads) async def embed( diff --git a/safetytooling/apis/inference/openai/moderation.py b/safetytooling/apis/inference/openai/moderation.py index b800464..7da9b72 100644 --- a/safetytooling/apis/inference/openai/moderation.py +++ b/safetytooling/apis/inference/openai/moderation.py @@ -1,5 +1,6 @@ import asyncio import logging +import os import time from traceback import format_exc from typing import Awaitable @@ -27,7 +28,9 @@ def __init__( self.num_threads = num_threads self._batch_size = 32 # Max batch size for moderation endpoint - self.aclient = openai.AsyncClient() + # openai v1+ requires api_key at construction; fall back to sentinel so + # the client initialises even when OPENAI_API_KEY is not set in the environment. + self.aclient = openai.AsyncClient(api_key=os.environ.get("OPENAI_API_KEY", "not-configured")) self.available_requests = asyncio.BoundedSemaphore(self.num_threads) async def _single_moderation_request( diff --git a/safetytooling/apis/inference/openai/s2s.py b/safetytooling/apis/inference/openai/s2s.py index 22118f9..8707bef 100644 --- a/safetytooling/apis/inference/openai/s2s.py +++ b/safetytooling/apis/inference/openai/s2s.py @@ -46,7 +46,10 @@ async def acquire(self): class OpenAIS2SModel(InferenceAPIModel): def __init__(self): - self.api_key = os.environ["OPENAI_API_KEY"] + # Use .get() with a sentinel so the model initialises even when OPENAI_API_KEY + # is not set; actual S2S WebSocket connections will fail at call time if no + # real key is provided. + self.api_key = os.environ.get("OPENAI_API_KEY", "not-configured") self.base_url = "wss://api.openai.com/v1/realtime" self.model = "gpt-4o-realtime-preview-2024-10-01" self.max_size = 10 * 1024 * 1024 # 10MB