ModelEngine-Group · Dallas98 · May 27, 2026 · May 17, 2026 · May 17, 2026 · May 26, 2026
@@ -37,8 +37,6 @@ build/
 *.tgz
 
 # Backend
-backend/assets/*
-!backend/assets/test.wav
 backend/flower_db.sqlite
 uploads/
 test/
@@ -60,4 +58,4 @@ assets/
 .Spotlight-V100
 .Trashes
 ehthumbs.db
-Thumbs.db 
+Thumbs.db 
@@ -21,7 +21,7 @@
 
 from database.a2a_agent_db import PROTOCOL_JSONRPC
 from services.memory_config_service import build_memory_context
-from services.image_service import get_vlm_model
+from services.image_service import get_video_understanding_model, get_vlm_model
 from database.agent_db import search_agent_info_by_agent_id, query_sub_agents_id_list
 from database.agent_version_db import query_current_version_no
 from database.tool_db import search_tools_for_sub_agent
@@ -31,13 +31,36 @@
 from utils.model_name_utils import add_repo_to_name
 from utils.prompt_template_utils import get_agent_prompt_template
 from utils.config_utils import tenant_config_manager, get_model_name_from_config
-from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE
+from consts.const import LOCAL_MCP_SERVER, MODEL_CONFIG_MAPPING, LANGUAGE, DATA_PROCESS_SERVICE, MINIO_DEFAULT_BUCKET
 from consts.exceptions import ValidationError
 
 logger = logging.getLogger("create_agent_info")
 logger.setLevel(logging.DEBUG)
 
 
+def _build_internal_s3_url(file: dict) -> str:
+    """Build a valid S3 URL for internal tools from uploaded file metadata."""
+    if not isinstance(file, dict):
+        return ""
+
+    object_name = str(file.get("object_name") or "").strip().lstrip("/")
+    if object_name:
+        bucket = MINIO_DEFAULT_BUCKET or "nexent"
+        return f"s3://{bucket}/{object_name}"
+
+    url = str(file.get("url") or "").strip()
+    if not url or url.startswith("blob:") or url.startswith("s3:/blob:"):
+        return ""
+
+    if url.startswith("s3://"):
+        return url
+
+    if url.startswith("s3:/"):
+        return "s3://" + url.replace("s3:/", "", 1).lstrip("/")
+
+    return "s3:/" + url
+
+
 def _get_skills_for_template(
     agent_id: int,
     tenant_id: str,
@@ -532,10 +555,17 @@
             }
         elif tool_config.class_name == "AnalyzeImageTool":
             tool_config.metadata = {
+                # get_vlm_model reads the first multimodal slot, now shown as image understanding.
                 "vlm_model": get_vlm_model(tenant_id=tenant_id),
                 "storage_client": minio_client,
                 "validate_url_access": lambda urls: validate_urls_access(urls, user_id)
             }
+        elif tool_config.class_name in ["AnalyzeAudioTool", "AnalyzeVideoTool"]:
+            tool_config.metadata = {
+                "vlm_model": get_video_understanding_model(tenant_id=tenant_id),
+                "storage_client": minio_client,
+                "validate_url_access": lambda urls: validate_urls_access(urls, user_id)
+            }
 
         tool_config_list.append(tool_config)
 
@@ -636,21 +666,25 @@
     # Collect files from current message first (higher priority)
     if minio_files and isinstance(minio_files, list):
         for file in minio_files:
-            if isinstance(file, dict) and file.get("url") and file.get("name"):
-                url = file["url"]
-                if url not in seen_urls:
-                    seen_urls.add(url)
+            if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")):
+                s3_url = _build_internal_s3_url(file)
+                if not s3_url:
+                    continue
+                if s3_url not in seen_urls:
+                    seen_urls.add(s3_url)
                     all_files.append(file)
 
     # Collect files from historical messages (lower priority, already-deduped)
     if history and isinstance(history, list):
         for msg in history:
             if isinstance(msg, dict) and msg.get("minio_files"):
                 for file in msg["minio_files"]:
-                    if isinstance(file, dict) and file.get("url") and file.get("name"):
-                        url = file["url"]
-                        if url not in seen_urls:
-                            seen_urls.add(url)
+                    if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")):
+                        s3_url = _build_internal_s3_url(file)
+                        if not s3_url:
+                            continue
+                        if s3_url not in seen_urls:
+                            seen_urls.add(s3_url)
                             all_files.append(file)
 
     # Enforce file count limit (keep most recent files by truncating from the end)
@@ -666,7 +700,7 @@
         fixed_overhead = len(prefix) + len(suffix)
 
         for i, file in enumerate(all_files):
-            s3_url = f"s3:/{file['url']}"
+            s3_url = _build_internal_s3_url(file)
             presigned_url = file.get("presigned_url", "")
 
             # Build description with both URLs
@@ -700,7 +734,7 @@
    return final_query


 def _format_minio_files_for_content(minio_files: Optional[List[dict]], max_files: int = 20) -> str:
    """Format minio_files into a string for embedding in history content.

    Args:
@@ -718,8 +752,10 @@
         if i >= max_files:
             file_lines.append(f"  - ... (and {len(minio_files) - max_files} more files)")
             break
-        if isinstance(file, dict) and file.get("url") and file.get("name"):
-            s3_url = f"s3:/{file['url']}"
+        if isinstance(file, dict) and file.get("name") and (file.get("url") or file.get("object_name")):
+            s3_url = _build_internal_s3_url(file)
+            if not s3_url:
+                continue
             presigned_url = file.get("presigned_url", "")
             if presigned_url:
                 file_lines.append(

@@ -308,6 +308,8 @@ class VectorDatabaseType(str, Enum):
     "multiEmbedding": "MULTI_EMBEDDING_ID",
     "rerank": "RERANK_ID",
     "vlm": "VLM_ID",
+    "vlm2": "VLM2_ID",
+    "vlm3": "VLM3_ID",
     "stt": "STT_ID",
     "tts": "TTS_ID"
 }

@@ -177,6 +177,14 @@ class STTModelConfig(BaseModel):
     accessToken: Optional[str] = None
 
 
+def _empty_model_config() -> SingleModelConfig:
+    return SingleModelConfig(
+        modelName="",
+        displayName="",
+        apiConfig=ModelApiConfig(apiKey="", modelUrl="")
+    )
+
+
 class TTSModelConfig(BaseModel):
     """TTS model specific configuration with factory, appid, and access token fields"""
     modelName: str
@@ -193,6 +201,8 @@ class ModelConfig(BaseModel):
     multiEmbedding: SingleModelConfig
     rerank: SingleModelConfig
     vlm: SingleModelConfig
+    vlm2: SingleModelConfig = Field(default_factory=_empty_model_config)
+    vlm3: SingleModelConfig = Field(default_factory=_empty_model_config)
     stt: STTModelConfig
     tts: TTSModelConfig
 

@@ -117,7 +117,7 @@ system_prompt: |-
      → Use **presigned_url** (already includes proxy prefix, format: `http://.../api/nb/v1/file/fetch?presigned_url=...`)
      Directly use the **presigned_url** field provided in the user's uploaded file info. No need to construct or append anything.
   2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image):
-     → Use **S3 URL** (format: `s3:/nexent/attachments/xxx.pdf`)
+     → Use **S3 URL** (format: `s3://nexent/attachments/xxx.pdf`)
      Reason: Internal tools run inside Nexent and can directly access MinIO storage
 
   {%- else %}

@@ -119,7 +119,7 @@ system_prompt: |-
         → Use **Download URL** (format: `https://minio.example.com/...?token=xxx`)
         Reason: MCP tools run on external services and cannot access internal S3 storage
      2. **Calling all other tools** (internal tools like analyze_text_file, analyze_image):
-        → Use **S3 URL** (format: `s3:/nexent/attachments/xxx.pdf`)
+        → Use **S3 URL** (format: `s3://nexent/attachments/xxx.pdf`)
         Reason: Internal tools run inside Nexent and can directly access MinIO storage
      {%- else %}
      - No tools are currently available

@@ -6,6 +6,7 @@ dependencies = [
     "aiofiles>=0.8.0",
     "uvicorn>=0.34.0",
     "fastapi>=0.115.12",
+    "email-validator>=2.0.0",
     "aiohttp>=3.8.0",
     "authlib>=1.3.0",
     "cryptography>=42.0.0",
@@ -16,6 +17,7 @@ dependencies = [
     "supabase>=2.18.1",
     "websocket-client>=1.8.0",
     "pyyaml>=6.0.2",
+    "jsonref>=1.1.0",
     "ruamel-yaml==0.19.1",
     "redis>=5.0.0",
     "fastmcp==2.12.0",

@@ -1237,7 +1237,7 @@
    return json.dumps(export_data.model_dump())


 async def export_agent_by_agent_id(agent_id: int, tenant_id: str, user_id: str) -> ExportAndImportAgentInfo:
    """
    Export a single agent's information based on agent_id
    """
@@ -1265,7 +1265,7 @@
 
     # Check if any tool is KnowledgeBaseSearchTool and set its metadata to empty dict
     for tool in tool_list:
-        if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "DataMateSearchTool"]:
+        if tool.class_name in ["KnowledgeBaseSearchTool", "AnalyzeTextFileTool", "AnalyzeImageTool", "AnalyzeAudioTool", "AnalyzeVideoTool", "DataMateSearchTool"]:
             tool.metadata = {}
 
     # Get model_id and model display name from agent_info

@@ -20,7 +20,7 @@
     MODEL_ENGINE_ENABLED,
     TENANT_NAME
 )
-from database.model_management_db import get_model_id_by_display_name
+from database.model_management_db import get_model_id_by_display_name, get_model_records
 from utils.config_utils import (
     get_env_key,
     get_model_name_from_config,
@@ -31,6 +31,20 @@
 logger = logging.getLogger("config_sync_service")
 
 
+def get_model_id_for_config(model_type: str, display_name: str, tenant_id: str) -> Optional[int]:
+    if not display_name:
+        return None
+
+    records = get_model_records(
+        {"display_name": display_name, "model_type": model_type},
+        tenant_id
+    )
+    if records:
+        return records[0].get("model_id")
+
+    return get_model_id_by_display_name(display_name, tenant_id)
+
+
 def handle_model_config(tenant_id: str, user_id: str, config_key: str, model_id: Optional[int], tenant_config_dict: dict) -> None:
     """
     Handle model configuration updates, deletions, and settings operations
@@ -98,8 +112,8 @@ async def save_config_impl(config, tenant_id, user_id):
         model_display_name = model_config.get("displayName")
 
         config_key = get_env_key(model_type) + "_ID"
-        model_id = get_model_id_by_display_name(
-            model_display_name, tenant_id)
+        model_id = get_model_id_for_config(
+            model_type, model_display_name, tenant_id)
 
         handle_model_config(tenant_id, user_id, config_key,
                             model_id, tenant_config_dict)

@@ -31,7 +31,11 @@
 
 
 def get_vlm_model(tenant_id: str):
-    # Get the tenant config
+    """Return the configured image understanding model for AnalyzeImageTool.
+
+    The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"]
+    for compatibility, but it is the user-facing image understanding configuration.
+    """
     vlm_model_config = tenant_config_manager.get_model_config(
         key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
     if not vlm_model_config:
@@ -48,3 +52,27 @@
         max_tokens=512,
         ssl_verify=vlm_model_config.get("ssl_verify", True),
     )
+
+
+def get_image_understanding_model(tenant_id: str):
+    return get_vlm_model(tenant_id=tenant_id)
+
+
+def get_video_understanding_model(tenant_id: str):
+    """Return the configured video understanding model for multimodal tools."""
+    vlm_model_config = tenant_config_manager.get_model_config(
+        key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id)
+    if not vlm_model_config:
+        return None
+    return OpenAIVLModel(
+        observer=MessageObserver(),
+        model_id=get_model_name_from_config(
+            vlm_model_config) if vlm_model_config else "",
+        api_base=vlm_model_config.get("base_url", ""),
+        api_key=vlm_model_config.get("api_key", ""),
+        temperature=0.7,
+        top_p=0.7,
+        frequency_penalty=0.5,
+        max_tokens=512,
+        ssl_verify=vlm_model_config.get("ssl_verify", True),
+    )
@@ -64,7 +64,7 @@
        raise ValueError(f"Unsupported model type: {model_type}")


 async def _perform_connectivity_check(
    model_name: str,
    model_type: str,
    model_base_url: str,
@@ -134,7 +134,7 @@
             ssl_verify=ssl_verify,
         )
         connectivity = await rerank_model.connectivity_check()
-    elif model_type == "vlm":
+    elif model_type in ("vlm", "vlm2", "vlm3"):
         observer = MessageObserver()
         set_monitoring_operation("connectivity_check",
                                  display_name=display_name)

@@ -164,6 +164,13 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay
             tenant_id, provider, model_type)
         model_list_ids = {model.get("id")
                           for model in model_list} if model_list else set()
+        existing_model_map = {
+            add_repo_to_name(
+                model_repo=model["model_repo"],
+                model_name=model["model_name"],
+            ): model
+            for model in existing_model_list
+        }
 
         # Delete existing models not present
         for model in existing_model_list:
@@ -173,21 +180,20 @@ async def batch_create_models_for_tenant(user_id: str, tenant_id: str, batch_pay
 
         # Create or update new models
         for model in model_list:
+            model["model_type"] = model_type
             _, model_name = split_repo_name(
                 model["id"]) if model.get("id") else ("", "")
             model_repo, model_name_only = split_repo_name(
                 model.get("id", "")) if model.get("id") else ("", "")
             model_display_name = add_repo_to_name(model_repo, model_name_only)
             if model_name:
-                existing_model_by_display = get_model_by_display_name(
-                    model_display_name, tenant_id)
-                if existing_model_by_display:
+                existing_model = existing_model_map.get(model_display_name)
+                if existing_model:
                     # Check if max_tokens has changed
-                    existing_max_tokens = existing_model_by_display.get(
-                        "max_tokens")
+                    existing_max_tokens = existing_model.get("max_tokens")
                     new_max_tokens = model.get("max_tokens")
                     if new_max_tokens is not None and existing_max_tokens != new_max_tokens:
-                        update_model_record(existing_model_by_display["model_id"], {
+                        update_model_record(existing_model["model_id"], {
                                             "max_tokens": new_max_tokens}, user_id)
                     continue