Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions llm_web_kit/main_html_parser/parser/layout_batch_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
# 确保模板数据的键是整数类型
self.template_data = template_data
self.dynamic_id_enable = False
self.dynamic_classid_enable = False

def parse_tuple_key(self, key_str):
if key_str.startswith('(') and key_str.endswith(')'):
Expand All @@ -40,6 +41,7 @@
html_source = pre_data[PreDataJsonKey.HTML_SOURCE]
template_dict_html = pre_data.get(PreDataJsonKey.TYPICAL_DICT_HTML, '<html></html>')
self.dynamic_id_enable = pre_data.get(PreDataJsonKey.DYNAMIC_ID_ENABLE, False)
self.dynamic_classid_enable = pre_data.get('dynamic_classid_enable', False)
template_data_str = pre_data[PreDataJsonKey.HTML_ELEMENT_DICT]
template_data = dict()
if isinstance(template_data_str, str):
Expand Down Expand Up @@ -174,12 +176,12 @@
elif self.dynamic_id_enable and current_layer_key[2]:
node_label, matched_ele_key = self.__match_tag_class(layer_nodes, current_layer_ori_key, parent_keyy,
node_html, template_doc)
if node_label is None:
if node_label is None and self.dynamic_classid_enable:
node_label, matched_ele_key = self.__match_tag(layer_nodes, current_layer_ori_key, parent_keyy,
node_html,
template_doc, False, True)
if node_label is None:
continue
if node_label is None:
continue

Check warning on line 184 in llm_web_kit/main_html_parser/parser/layout_batch_parser.py

View check run for this annotation

Codecov / codecov/patch

llm_web_kit/main_html_parser/parser/layout_batch_parser.py#L184

Added line #L184 was not covered by tests
# 采用element dict中的key来替换
if current_layer_key == keyy:
keyy = matched_ele_key
Expand All @@ -191,7 +193,7 @@

if node_label == 'red':
has_red = True
elif self.dynamic_id_enable and current_layer_key[1]:
elif self.dynamic_id_enable and self.dynamic_classid_enable and current_layer_key[1]:
node_label, matched_ele_key = self.__match_tag(layer_nodes, current_layer_ori_key, parent_keyy,
node_html,
template_doc, True, False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def test_dynamic_id(self):
# 推广
pre_data[PreDataJsonKey.HTML_SOURCE] = expand_source
pre_data[PreDataJsonKey.DYNAMIC_ID_ENABLE] = True
pre_data['dynamic_classid_enable'] = True
parser = LayoutBatchParser(element_dict)
parts = parser.parse(pre_data)
main_html_body = parts[PreDataJsonKey.MAIN_HTML_BODY]
Expand Down Expand Up @@ -206,6 +207,7 @@ def test_dynamic_classid(self):
# 推广
pre_data[PreDataJsonKey.HTML_SOURCE] = expand_source
pre_data[PreDataJsonKey.DYNAMIC_ID_ENABLE] = True
pre_data['dynamic_classid_enable'] = True
parser = LayoutBatchParser(element_dict)
parts = parser.parse(pre_data)
main_html_body = parts[PreDataJsonKey.MAIN_HTML_BODY]
Expand All @@ -228,6 +230,7 @@ def test_dynamic_classid(self):
# 推广
pre_data[PreDataJsonKey.HTML_SOURCE] = expand_source2
pre_data[PreDataJsonKey.DYNAMIC_ID_ENABLE] = True
pre_data['dynamic_classid_enable'] = True
parser = LayoutBatchParser(element_dict)
parts = parser.parse(pre_data)
main_html_body = parts[PreDataJsonKey.MAIN_HTML_BODY]
Expand Down