Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ LLM_TOKENS_PER_MINUTE=40000 # 每分钟最大令牌数(包含输入+输
# # 推荐:不设置此项,让系统自动计算以确保不超过 TPM/RPM 限制
# # 计算示例:min(800, 40000/3500) = min(800, 11) = 11 并发

# --- LLM Token 估算配置(用于速率限制) ---
# 估算输出 tokens 数量,用于速率限制计算
# 如果估算过高,并发会受限;如果估算不足,可能触发 429 错误
# LLM_ESTIMATED_OUTPUT_TOKENS=3000 # LLM 输出估算(实体提取约 3000 tokens,默认 3000)
# LLM_VLM_ESTIMATED_OUTPUT_TOKENS=500 # VLM 输出估算(图片描述较短,默认 500)
# LLM_VLM_MAX_TOKENS=500 # VLM API 最大输出 tokens(默认 500)
# LLM_VLM_IMAGE_TOKENS_ESTIMATE=200 # VLM 图片输入估算 tokens(默认 200)

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个变量值和LLM_REQUESTS_PER_MINUTE=800 # 每分钟最大请求数(默认 800)
LLM_TOKENS_PER_MINUTE=40000 # 每分钟最大令牌数(包含输入+输出,默认 40000)
是不是有冲突?明明是自动计算,为什么还要这些估算值? @copilot @claude

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

确实有冲突。已在 e2cb6e7 中撤销这些配置项。

rate_limiter.pyavg_tokens_map 已内置各服务的 token 估算值,通过 LLM_REQUESTS_PER_MINUTE / LLM_TOKENS_PER_MINUTE 自动计算并发数,不需要额外暴露这些内部值。

# ====== Embedding 配置 ======
# 用于向量化文本,支持语义检索
EMBEDDING_BASE_URL="https://api.siliconflow.cn/v1"
Expand Down Expand Up @@ -142,6 +150,9 @@ DS_OCR_REQUESTS_PER_MINUTE=800 # 每分钟最大请求数(默认 800)
DS_OCR_TOKENS_PER_MINUTE=40000 # 每分钟最大令牌数(默认 40000)
# DS_OCR_MAX_ASYNC=8 # 【可选】全局默认并发数(未设置时使用硬编码默认值 8)

# --- DeepSeek-OCR Token 估算配置(用于速率限制) ---
# DS_OCR_IMAGE_TOKENS_ESTIMATE=1000 # 图片输入估算 tokens(默认 1000)

# ====== 智能 Parser 选择器配置(v2.0) ======
# 基于文档复杂度自动选择最优 Parser 和模式

Expand Down
26 changes: 26 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,26 @@ class LLMConfig(BaseSettings):
tokens_per_minute: int = Field(default=40000, description="Maximum tokens per minute (input + output)")
max_async: Optional[int] = Field(default=None, description="Maximum concurrent requests (optional, auto-calculated if not set)")

# Token estimation for rate limiting (LLM)
estimated_output_tokens: int = Field(
default=3000,
description="Estimated output tokens for LLM calls (entity extraction typically outputs ~3000 tokens)"
)

# Token estimation for rate limiting (VLM)
vlm_estimated_output_tokens: int = Field(
default=500,
description="Estimated output tokens for VLM calls (image descriptions are typically shorter)"
)
vlm_max_tokens: int = Field(
default=500,
description="Maximum output tokens for VLM API calls"
)
vlm_image_tokens_estimate: int = Field(
default=200,
description="Estimated tokens for image input in VLM calls"
)

class Config:
env_prefix = "LLM_"
env_file = ".env"
Expand Down Expand Up @@ -149,6 +169,12 @@ class DeepSeekOCRConfig(BaseSettings):
tokens_per_minute: int = Field(default=40000, description="Maximum tokens per minute")
max_async: Optional[int] = Field(default=None, description="Maximum concurrent requests (optional, auto-calculated if not set)")

# Token estimation for rate limiting
image_tokens_estimate: int = Field(
default=1000,
description="Estimated tokens for image input in OCR calls"
)

class Config:
env_prefix = "DS_OCR_"
env_file = ".env"
Expand Down
11 changes: 7 additions & 4 deletions src/deepseek_ocr_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ class DSSeekConfig:
fallback_mode: str = field(default_factory=lambda: config.ds_ocr.fallback_mode)
min_output_threshold: int = field(default_factory=lambda: config.ds_ocr.min_output_threshold)

# Token 估算配置
image_tokens_estimate: int = field(default_factory=lambda: config.ds_ocr.image_tokens_estimate)

def __post_init__(self):
"""验证配置"""
if not self.api_key:
Expand Down Expand Up @@ -281,8 +284,8 @@ async def _call_api(self, img_base64: str, prompt: str) -> str:
Raises:
Exception: API 调用失败时抛出异常
"""
# 估算 tokens(提示词 + 图片约 1000 tokens + 输出约 2000 tokens
estimated_tokens = len(prompt) // 3 + 1000 + self.config.max_tokens
# 估算 tokens(提示词 + 图片 + 输出
estimated_tokens = len(prompt) // 3 + self.config.image_tokens_estimate + self.config.max_tokens

# 获取速率限制许可
await self.rate_limiter.rate_limiter.acquire(estimated_tokens)
Expand Down Expand Up @@ -348,8 +351,8 @@ def _call_api_sync(self, img_base64: str, prompt: str) -> str:
"""
import asyncio

# 估算 tokens(提示词 + 图片约 1000 tokens + 输出约 2000 tokens
estimated_tokens = len(prompt) // 3 + 1000 + self.config.max_tokens
# 估算 tokens(提示词 + 图片 + 输出
estimated_tokens = len(prompt) // 3 + self.config.image_tokens_estimate + self.config.max_tokens

# 在同步函数中调用异步速率限制器
try:
Expand Down
22 changes: 18 additions & 4 deletions src/multi_tenant.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ def __init__(
self.max_async = config.llm.max_async
self.vlm_timeout = config.llm.vlm_timeout

# Token 估算配置
self.llm_estimated_output_tokens = config.llm.estimated_output_tokens
self.vlm_estimated_output_tokens = config.llm.vlm_estimated_output_tokens
self.vlm_max_tokens = config.llm.vlm_max_tokens
self.vlm_image_tokens_estimate = config.llm.vlm_image_tokens_estimate

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

问题同.env.example

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已在 e2cb6e7 中撤销相关修改。

# 存储配置
self.use_external_storage = config.storage.use_external
self.kv_storage = config.storage.kv_storage
Expand Down Expand Up @@ -106,11 +112,14 @@ def _create_llm_func(self, llm_config: Dict):
# 获取 rate_limiter 实际使用的并发数(将用于 LightRAG)
actual_max_concurrent = rate_limiter.max_concurrent

# 获取 token 估算配置(支持租户覆盖)
llm_estimated_output = llm_config.get("estimated_output_tokens", self.llm_estimated_output_tokens)

def llm_model_func(prompt, **kwargs):
# 精确计算输入 tokens(使用 tiktoken)
input_tokens = count_tokens(prompt, model="cl100k_base")
# 保守估算输出 tokens(实体提取通常输出较长)
estimated_output = 3000 # 50 entities + 46 relations ≈ 3000 tokens
estimated_output = llm_estimated_output # 从配置读取
estimated_tokens = input_tokens + estimated_output

# Debug: 输出 token 计数
Expand Down Expand Up @@ -295,6 +304,11 @@ def _create_vision_model_func(self, llm_config: Dict):
tokens_per_minute=tokens_per_minute
)

# 获取 VLM token 估算配置(支持租户覆盖)
vlm_image_tokens = llm_config.get("vlm_image_tokens_estimate", self.vlm_image_tokens_estimate)
vlm_estimated_output = llm_config.get("vlm_estimated_output_tokens", self.vlm_estimated_output_tokens)
vlm_max_tokens = llm_config.get("vlm_max_tokens", self.vlm_max_tokens)

async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: str) -> str:
"""
使用 VLM 理解图片内容(带速率限制)
Expand All @@ -309,8 +323,8 @@ async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: st
"""
# 精确计算 tokens(使用 tiktoken)
prompt_tokens = count_tokens(prompt, model="cl100k_base")
image_tokens = 200 # 图片约 200 tokens(固定估算)
estimated_output = 500 # VLM 输出通常较短
image_tokens = vlm_image_tokens # 从配置读取
estimated_output = vlm_estimated_output # 从配置读取
estimated_tokens = prompt_tokens + image_tokens + estimated_output

# Debug: 输出 token 计数
Expand All @@ -336,7 +350,7 @@ async def seed_vision_model_func(prompt: str, image_data: str, system_prompt: st
]
}
],
"max_tokens": 500,
"max_tokens": vlm_max_tokens, # 从配置读取
"temperature": 0.1
}

Expand Down