Update README.md and config.json
Browse files- README.md +12 -9
- config.json +53 -0
README.md
CHANGED
|
@@ -10,7 +10,8 @@ tags:
|
|
| 10 |
- mechanistic-interpretability
|
| 11 |
- interpretability
|
| 12 |
- qwen-scope
|
| 13 |
-
|
|
|
|
| 14 |
---
|
| 15 |
|
| 16 |
## Qwen-Scope: Decoding Intelligence, Unleashing Potential
|
|
@@ -23,7 +24,7 @@ We are excited to introduce Qwen-Scope, an interpretability module trained on th
|
|
| 23 |
|
| 24 |
| Property | Value |
|
| 25 |
|---|---|
|
| 26 |
-
| Base model | [Qwen3.5-35B-A3B](https://huggingface.co/Qwen/Qwen3.5-35B-A3B) |
|
| 27 |
| SAE width (`d_sae`) | 32768 |
|
| 28 |
| Hidden size (`d_model`) | 2048 |
|
| 29 |
| Expansion factor | 16× |
|
|
@@ -66,7 +67,7 @@ import torch
|
|
| 66 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 67 |
|
| 68 |
# ── 1. Load base model ────────────────────────────────────────────────────────
|
| 69 |
-
model_name = "Qwen/Qwen3.5-35B-A3B"
|
| 70 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 71 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
|
| 72 |
model.eval()
|
|
@@ -117,7 +118,7 @@ print(f"Feature values : {last_token_acts[active_idx].tolist()}")
|
|
| 117 |
We also provide a gradio demo `app.py`. You can run it locally:
|
| 118 |
```
|
| 119 |
python app.py \
|
| 120 |
-
--model Qwen/Qwen3.5-35B-A3B \
|
| 121 |
--model-name-sae-trained-from qwen3.5-35b-a3b-base \
|
| 122 |
--model-name-analyzing-now qwen3.5-35b-a3b \
|
| 123 |
--sae-path Qwen/SAE-Res-Qwen3.5-35B-A3B-Base-W32K-L0_50 \
|
|
@@ -138,10 +139,12 @@ If you use these SAEs in your research, please cite:
|
|
| 138 |
|
| 139 |
```bibtex
|
| 140 |
@misc{qwen_scope,
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
| 146 |
}
|
| 147 |
```
|
|
|
|
| 10 |
- mechanistic-interpretability
|
| 11 |
- interpretability
|
| 12 |
- qwen-scope
|
| 13 |
+
- arxiv:2605.11887
|
| 14 |
+
base_model: Qwen/Qwen3.5-35B-A3B-Base
|
| 15 |
---
|
| 16 |
|
| 17 |
## Qwen-Scope: Decoding Intelligence, Unleashing Potential
|
|
|
|
| 24 |
|
| 25 |
| Property | Value |
|
| 26 |
|---|---|
|
| 27 |
+
| Base model | [Qwen3.5-35B-A3B-Base](https://huggingface.co/Qwen/Qwen3.5-35B-A3B-Base) |
|
| 28 |
| SAE width (`d_sae`) | 32768 |
|
| 29 |
| Hidden size (`d_model`) | 2048 |
|
| 30 |
| Expansion factor | 16× |
|
|
|
|
| 67 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 68 |
|
| 69 |
# ── 1. Load base model ────────────────────────────────────────────────────────
|
| 70 |
+
model_name = "Qwen/Qwen3.5-35B-A3B-Base"
|
| 71 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 72 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
|
| 73 |
model.eval()
|
|
|
|
| 118 |
We also provide a gradio demo `app.py`. You can run it locally:
|
| 119 |
```
|
| 120 |
python app.py \
|
| 121 |
+
--model Qwen/Qwen3.5-35B-A3B-Base \
|
| 122 |
--model-name-sae-trained-from qwen3.5-35b-a3b-base \
|
| 123 |
--model-name-analyzing-now qwen3.5-35b-a3b \
|
| 124 |
--sae-path Qwen/SAE-Res-Qwen3.5-35B-A3B-Base-W32K-L0_50 \
|
|
|
|
| 139 |
|
| 140 |
```bibtex
|
| 141 |
@misc{qwen_scope,
|
| 142 |
+
title={{Qwen-Scope}: Turning Sparse Features into Development Tools for Large Language Models},
|
| 143 |
+
author={Boyi Deng and Xu Wang and Yaoning Wang and Yu Wan and Yubo Ma and Baosong Yang and Haoran Wei and Jialong Tang and Huan Lin and Ruize Gao and Tianhao Li and Qian Cao and Xuancheng Ren and Xiaodong Deng and An Yang and Fei Huang and Dayiheng Liu and Jingren Zhou},
|
| 144 |
+
year={2026},
|
| 145 |
+
eprint={2605.11887},
|
| 146 |
+
archivePrefix={arXiv},
|
| 147 |
+
primaryClass={cs.CL},
|
| 148 |
+
url={https://arxiv.org/abs/2605.11887},
|
| 149 |
}
|
| 150 |
```
|
config.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "topk_sae",
|
| 3 |
+
"hf_repo_id": "Qwen/SAE-Res-Qwen3.5-35B-A3B-Base-W32K-L0_50",
|
| 4 |
+
"base_model": "Qwen/Qwen3.5-35B-A3B-Base",
|
| 5 |
+
"d_model": 2048,
|
| 6 |
+
"d_sae": 32768,
|
| 7 |
+
"k": 50,
|
| 8 |
+
"num_layers": 40,
|
| 9 |
+
"layers": [
|
| 10 |
+
0,
|
| 11 |
+
1,
|
| 12 |
+
2,
|
| 13 |
+
3,
|
| 14 |
+
4,
|
| 15 |
+
5,
|
| 16 |
+
6,
|
| 17 |
+
7,
|
| 18 |
+
8,
|
| 19 |
+
9,
|
| 20 |
+
10,
|
| 21 |
+
11,
|
| 22 |
+
12,
|
| 23 |
+
13,
|
| 24 |
+
14,
|
| 25 |
+
15,
|
| 26 |
+
16,
|
| 27 |
+
17,
|
| 28 |
+
18,
|
| 29 |
+
19,
|
| 30 |
+
20,
|
| 31 |
+
21,
|
| 32 |
+
22,
|
| 33 |
+
23,
|
| 34 |
+
24,
|
| 35 |
+
25,
|
| 36 |
+
26,
|
| 37 |
+
27,
|
| 38 |
+
28,
|
| 39 |
+
29,
|
| 40 |
+
30,
|
| 41 |
+
31,
|
| 42 |
+
32,
|
| 43 |
+
33,
|
| 44 |
+
34,
|
| 45 |
+
35,
|
| 46 |
+
36,
|
| 47 |
+
37,
|
| 48 |
+
38,
|
| 49 |
+
39
|
| 50 |
+
],
|
| 51 |
+
"hook_point": "resid_post",
|
| 52 |
+
"dtype": "float32"
|
| 53 |
+
}
|