Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,10 @@ def run(
key_dict[key] = val if pd.notna(val) else ""

prompt_text = self.prompt_template.build_prompt(need_fields, **key_dict)

# 统一组装为基类所需的消息格式
conversations_list.append([{"role": "user", "content": prompt_text}])
if use_api_mode:
conversations_list.append([{"role": "user", "content": prompt_text}])
else:
conversations_list.append([{"from": "human", "value": prompt_text}])

self.logger.info(
f"Built {len(conversations_list)} prompts using fields: {need_fields}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,11 @@ def run(self, storage: DataFlowStorage, input_prompts_key: str, input_image_key:
else:
# Local 模式(如 vLLM)通常需要手动在文本前拼接 <image> 占位符
img_tokens = "<image>" * len(image_path)
conversation = [{"role": "user", "content": img_tokens + q}]
# 根据 serving 模式选择对话格式
if use_api_mode:
conversation = [{"role": "user", "content": img_tokens + q}]
else:
conversation = [{"from": "human", "value": img_tokens + q}]

flat_conversations.append(conversation)
flat_images.append(image_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,10 @@ def run(self,
else:
content = ("<image>" * valid_media_count) + self.user_prompt

conversations_list.append([{"role": "user", "content": content}])
if use_api_mode:
conversations_list.append([{"role": "user", "content": content}])
else:
conversations_list.append([{"from": "human", "value": content}])

elif has_videos:
video_inputs_list = video_column
Expand All @@ -108,7 +111,10 @@ def run(self,
else:
content = ("<video>" * valid_media_count) + self.user_prompt

conversations_list.append([{"role": "user", "content": content}])
if use_api_mode:
conversations_list.append([{"role": "user", "content": content}])
else:
conversations_list.append([{"from": "human", "value": content}])

# 统一调用基类的消息生成接口
outputs = self.serving.generate_from_input_messages(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def run(
else:
self.logger.info("Using local serving mode")

# 4. 构造多模态对话结构
# 4. 构造多模态对话结构(兼容 Local 和 API 模式)
conversations_list = []
image_inputs_list = None
video_inputs_list = None
Expand All @@ -139,25 +139,29 @@ def run(
valid_media_count = len([p for p in paths if p])

if use_api_mode:
# API 模式:使用 OpenAI 格式
content = prompt_text
conversations_list.append([{"role": "user", "content": content}])
else:
# Local 模式:使用 ShareGPT 格式,并注入 <image> tokens
img_tokens = "<image>" * valid_media_count
content = f"{img_tokens}\n{prompt_text}" if img_tokens else prompt_text
conversations_list.append([{"from": "human", "value": content}])

conversations_list.append([{"role": "user", "content": content}])

elif has_videos:
video_inputs_list = video_column
for prompt_text, paths in zip(prompt_column, video_column):
valid_media_count = len([p for p in paths if p])

if use_api_mode:
# API 模式:使用 OpenAI 格式
content = prompt_text
conversations_list.append([{"role": "user", "content": content}])
else:
# Local 模式:使用 ShareGPT 格式,并注入 <video> tokens
vid_tokens = "<video>" * valid_media_count
content = f"{vid_tokens}\n{prompt_text}" if vid_tokens else prompt_text

conversations_list.append([{"role": "user", "content": content}])
conversations_list.append([{"from": "human", "value": content}])

# 5. 统一调用基类接口
outputs = self.serving.generate_from_input_messages(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,10 @@ def run(
img_tokens = "<image>" * len(valid_img_paths)
content = f"{img_tokens}\n{q}" if img_tokens else q

flat_conversations.append([{"role": "user", "content": content}])
if use_api_mode:
flat_conversations.append([{"role": "user", "content": content}])
else:
flat_conversations.append([{"from": "human", "value": content}])
flat_images.append(valid_img_paths)
indices_to_generate.append(idx)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,11 @@ def run(self, storage: DataFlowStorage, input_image_key: str, input_kws_key: str
else:
img_tokens = "<image>" * len(img_path)
content = f"{img_tokens}\n{text_prompt}" if img_tokens else text_prompt
if use_api_mode:
flat_conversations.append([{"role": "user", "content": content}])
else:
flat_conversations.append([{"from": "human", "value": content}])

flat_conversations.append([{"role": "user", "content": content}])
flat_images.append(img_path)
row_mappings.append({"row_idx": idx, "keyword": kw})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,21 @@ def run(self, storage: DataFlowStorage, input_list_key: str, input_image_key: st
img_tokens = "<image>" * len(image_path)
content_v = f"{img_tokens}\n{prompt_v}" if img_tokens else prompt_v

vis_conversations.append([{"role": "user", "content": content_v}])
if use_api_mode:
vis_conversations.append([{"role": "user", "content": content_v}])
else:
vis_conversations.append([{"from": "human", "value": content_v}])
vis_images.append(image_path)
vis_mappings.append({"row_idx": row_idx, "qa_idx": qa_idx, "expected": ans_v})

# --- 2. Text-Only Case (纯文本无图分支) ---
q_t, ans_t = shuffle_options_logic(qa_item, add_none_option=False)
prompt_t = self.inst_template.format(q_t)

txt_conversations.append([{"role": "user", "content": prompt_t}])
if use_api_mode:
txt_conversations.append([{"role": "user", "content": prompt_t}])
else:
txt_conversations.append([{"from": "human", "value": prompt_t}])
txt_mappings.append({"row_idx": row_idx, "qa_idx": qa_idx, "expected": ans_t})

# =========================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ def run(self, storage: DataFlowStorage, input_list_key: str, input_image_key: st
img_tokens = "<image>" * len(image_path)
content = f"{img_tokens}\n{prompt_text}" if img_tokens else prompt_text

flat_conversations.append([{"role": "user", "content": content}])
if use_api_mode:
flat_conversations.append([{"role": "user", "content": content}])
else:
flat_conversations.append([{"from": "human", "value": content}])
flat_images.append(image_path)
row_mappings.append({"row_idx": idx, "item": item})

Expand Down