Skip to content

Commit 9671766

Browse files
authored
add support to qwen2_moe and qwen3_next (#21)
1 parent 77980e2 commit 9671766

File tree

3 files changed

+55
-16
lines changed

3 files changed

+55
-16
lines changed

defuser/model_registry.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
"mixtral": {
88
"min_transformers_version": "5.0.0",
99
},
10+
"qwen2_moe": {
11+
"min_transformers_version": "5.0.0",
12+
},
1013
"qwen3_moe": {
1114
"min_transformers_version": "5.0.0",
1215
},
@@ -16,4 +19,7 @@
1619
"qwen3_5_moe_text": {
1720
"min_transformers_version": "5.2.0",
1821
},
22+
"qwen3_next": {
23+
"min_transformers_version": "5.0.0",
24+
},
1925
}

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta"
99

1010
[project]
1111
name = "Defuser"
12-
version = "0.0.8"
12+
version = "0.0.9"
1313
description = "Model defuser helper for HF Transformers."
1414
readme = "README.md"
1515
requires-python = ">=3.9"

tests/test_convert_model.py

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,45 @@
44
# Contact: qubitium@modelcloud.ai, x.com/qubitium
55
import torch
66
from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForImageTextToText
7+
from transformers.models.qwen2_moe.modeling_qwen2_moe import Qwen2MoeConfig, Qwen2MoeForCausalLM
8+
from transformers.models.qwen3_next.modeling_qwen3_next import Qwen3NextConfig, Qwen3NextForCausalLM
79

810
from defuser import convert_model
911
from defuser.modeling.replace_modules import materialize_model
1012

1113

14+
def _tiny_moe_config(config_cls):
15+
return config_cls(
16+
num_hidden_layers=1,
17+
hidden_size=64,
18+
intermediate_size=128,
19+
moe_intermediate_size=32,
20+
num_attention_heads=4,
21+
num_key_value_heads=4,
22+
num_experts=4,
23+
num_experts_per_tok=2,
24+
vocab_size=128,
25+
)
26+
27+
28+
def _assert_unfused_expert_module(experts):
29+
assert hasattr(experts, "0")
30+
expert0 = getattr(experts, "0")
31+
assert hasattr(expert0, "gate_proj")
32+
assert hasattr(expert0, "up_proj")
33+
assert hasattr(expert0, "down_proj")
34+
35+
36+
def test_qwen2_moe():
37+
model = Qwen2MoeForCausalLM(_tiny_moe_config(Qwen2MoeConfig))
38+
assert model.config.model_type == "qwen2_moe"
39+
40+
converted = convert_model(model, max_layers=1)
41+
assert converted
42+
43+
_assert_unfused_expert_module(model.model.layers[0].mlp.experts)
44+
45+
1246
def test_qwen3_moe():
1347
model_id = "Qwen/Qwen3-30B-A3B"
1448
config = AutoConfig.from_pretrained(model_id)
@@ -24,12 +58,17 @@ def test_qwen3_moe():
2458
converted = convert_model(model, max_layers=1)
2559
assert converted
2660

27-
experts = model.model.layers[0].mlp.experts
28-
assert hasattr(experts, "0")
29-
expert0 = getattr(experts, "0")
30-
assert hasattr(expert0, "gate_proj")
31-
assert hasattr(expert0, "up_proj")
32-
assert hasattr(expert0, "down_proj")
61+
_assert_unfused_expert_module(model.model.layers[0].mlp.experts)
62+
63+
64+
def test_qwen3_next():
65+
model = Qwen3NextForCausalLM(_tiny_moe_config(Qwen3NextConfig))
66+
assert model.config.model_type == "qwen3_next"
67+
68+
converted = convert_model(model, max_layers=1)
69+
assert converted
70+
71+
_assert_unfused_expert_module(model.model.layers[0].mlp.experts)
3372

3473

3574
def test_qwen3_5_moe():
@@ -60,11 +99,8 @@ def test_qwen3_5_moe():
6099
moe_block = model.model.language_model.layers[0].mlp
61100
experts = moe_block.experts
62101

63-
assert hasattr(experts, "0")
102+
_assert_unfused_expert_module(experts)
64103
expert0 = getattr(experts, "0")
65-
assert hasattr(expert0, "gate_proj")
66-
assert hasattr(expert0, "up_proj")
67-
assert hasattr(expert0, "down_proj")
68104

69105
materialize_model(model.model.language_model.layers[0])
70106

@@ -102,14 +138,11 @@ def test_mixtral():
102138
moe_block = model.model.layers[0].mlp
103139
experts = moe_block.experts
104140

105-
assert hasattr(experts, "0")
141+
_assert_unfused_expert_module(experts)
106142
expert0 = getattr(experts, "0")
107-
assert hasattr(expert0, "gate_proj")
108-
assert hasattr(expert0, "up_proj")
109-
assert hasattr(expert0, "down_proj")
110143

111144
materialize_model(model.model.layers[0])
112145

113146
torch.testing.assert_close(expert0.gate_proj.weight, expected_gate)
114147
torch.testing.assert_close(expert0.up_proj.weight, expected_up)
115-
torch.testing.assert_close(expert0.down_proj.weight, expected_down)
148+
torch.testing.assert_close(expert0.down_proj.weight, expected_down)

0 commit comments

Comments
 (0)