CyberCastle commited on
Commit
85b84b2
·
verified ·
1 Parent(s): a4c08f0

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: XGenerationLab/XiYanSQL-QwenCoder-7B-2504
4
+ library_name: transformers
5
+ pipeline_tag: text-generation
6
+ tags:
7
+ - text-generation
8
+ - quantized
9
+ - awq
10
+ - w4a16
11
+ - llmcompressor
12
+ datasets:
13
+ - birdsql/bird-critic-1.0-open
14
+ ---
15
+
16
+ # XiYanSQL-QwenCoder-7B-2504 W4A16 AWQ
17
+
18
+ Quantized model derived from `XGenerationLab/XiYanSQL-QwenCoder-7B-2504`.
19
+
20
+ This repository contains a locally generated quantized checkpoint ready to be uploaded to the Hugging Face Hub. The folder includes the quantized weights, tokenizer files, and the exact quantization settings used to produce this artifact.
21
+
22
+ ## Format
23
+
24
+ - Quantization type: AWQ
25
+ - Bits: 4-bit weights / 16-bit activations
26
+ - Calibration dataset: `birdsql/bird-critic-1.0-open`
27
+ - Tested backend: Transformers
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from transformers import AutoTokenizer, AutoModelForCausalLM
33
+
34
+ model_id = "your-hf-username/XiYanSQL-QwenCoder-7B-2504-W4A16-AWQ"
35
+
36
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ model_id,
39
+ device_map="auto",
40
+ trust_remote_code=False,
41
+ )
42
+ ```
43
+
44
+ ## About LLMToys
45
+
46
+ This quantizer is hosted in the LLMToys repository: https://github.com/CyberCastle/LLMToys.
47
+ LLMToys is a collection of practical LLM tools and experiments maintained in a single codebase. It groups reusable components for local model execution, quantization workflows, runtime tuning, and structured generation pipelines such as natural-language-to-SQL.
48
+
49
+ ## Quantization Configuration
50
+
51
+ | Setting | Value |
52
+ | --- | --- |
53
+ | Base model | XGenerationLab/XiYanSQL-QwenCoder-7B-2504 |
54
+ | Output folder | XiYanSQL-QwenCoder-7B-2504-W4A16-AWQ |
55
+ | Quantization scheme | AWQ |
56
+ | Weight / activation format | W4A16 |
57
+ | Model architecture | qwen2 |
58
+ | Calibration dataset | birdsql/bird-critic-1.0-open |
59
+ | Calibration split | open |
60
+ | Dataset configuration | n/a |
61
+ | Calibration samples used | 256 |
62
+ | Max sequence length | 2048 |
63
+ | Max GPU memory budget | 12.0 GiB |
64
+ | Sequential onloading | yes |
65
+ | Requested sequential targets | safe-auto |
66
+ | Effective sequential targets | Qwen2Attention, Qwen2MLP |
67
+ | Sequential targets per subgraph | 1 |
68
+ | trust_remote_code | no |
69
+ | Memory preflight mode | off |
70
+ | vLLM smoke test requested | no |
71
+
72
+ ## Toolchain
73
+
74
+ | Setting | Value |
75
+ | --- | --- |
76
+ | Generated at (UTC) | 2026-05-03T21:28:14Z |
77
+ | Runner entrypoint | uv run quantizer/run.py |
78
+ | llmcompressor | 0.10.1.dev127+g76b28ce7 |
79
+ | transformers | 5.6.2 |
80
+ | torch | 2.11.0+cu130 |
81
+ | compressed-tensors | 0.15.1a20260428 |
82
+
83
+ ## Notes
84
+
85
+ - This README is generated automatically by the quantizer so the artifact keeps its execution context.
86
+ - Review the original base model license and any upstream usage restrictions before publishing this checkpoint.
87
+ - If you rerun the quantizer with different settings, regenerate and upload the full output directory again.
chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "dtype": "bfloat16",
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 3584,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 18944,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention"
42
+ ],
43
+ "max_position_embeddings": 32768,
44
+ "max_window_layers": 28,
45
+ "model_type": "qwen2",
46
+ "num_attention_heads": 28,
47
+ "num_hidden_layers": 28,
48
+ "num_key_value_heads": 4,
49
+ "pad_token_id": null,
50
+ "quantization_config": {
51
+ "config_groups": {
52
+ "group_0": {
53
+ "format": "pack-quantized",
54
+ "input_activations": null,
55
+ "output_activations": null,
56
+ "targets": [
57
+ "Linear"
58
+ ],
59
+ "weights": {
60
+ "actorder": null,
61
+ "block_structure": null,
62
+ "dynamic": false,
63
+ "group_size": 128,
64
+ "num_bits": 4,
65
+ "observer": "memoryless_minmax",
66
+ "observer_kwargs": {},
67
+ "scale_dtype": null,
68
+ "strategy": "group",
69
+ "symmetric": false,
70
+ "type": "int",
71
+ "zp_dtype": "torch.int8"
72
+ }
73
+ }
74
+ },
75
+ "format": "pack-quantized",
76
+ "global_compression_ratio": null,
77
+ "ignore": [
78
+ "lm_head"
79
+ ],
80
+ "kv_cache_scheme": null,
81
+ "quant_method": "compressed-tensors",
82
+ "quantization_status": "compressed",
83
+ "sparsity_config": {},
84
+ "transform_config": {},
85
+ "version": "0.15.1.a20260428"
86
+ },
87
+ "rms_norm_eps": 1e-06,
88
+ "rope_parameters": {
89
+ "rope_theta": 1000000.0,
90
+ "rope_type": "default"
91
+ },
92
+ "sliding_window": null,
93
+ "tie_word_embeddings": false,
94
+ "transformers_version": "5.6.2",
95
+ "use_cache": false,
96
+ "use_sliding_window": false,
97
+ "vocab_size": 152064
98
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token_id": 151645,
3
+ "max_new_tokens": 512,
4
+ "pad_token_id": 151643,
5
+ "transformers_version": "5.6.2"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cccfbb7af34eb5360054144c9563df6674fce7da82422ac6a5652e87de59d027
3
+ size 5570857504
recipe.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_stage:
2
+ default_modifiers:
3
+ AWQModifier:
4
+ mappings:
5
+ - smooth_layer: re:.*input_layernorm$
6
+ balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$']
7
+ activation_hook_target: null
8
+ - smooth_layer: re:.*v_proj$
9
+ balance_layers: ['re:.*o_proj$']
10
+ activation_hook_target: null
11
+ - smooth_layer: re:.*post_attention_layernorm$
12
+ balance_layers: ['re:.*gate_proj$', 're:.*up_proj$']
13
+ activation_hook_target: null
14
+ - smooth_layer: re:.*up_proj$
15
+ balance_layers: ['re:.*down_proj$']
16
+ activation_hook_target: null
17
+ offload_device: !!python/object/apply:torch.device [cpu]
18
+ duo_scaling: both
19
+ n_grid: 20
20
+ QuantizationModifier:
21
+ targets: [Linear]
22
+ ignore: [lm_head]
23
+ scheme: W4A16_ASYM
24
+ bypass_divisibility_checks: false
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e2b7649a086fbb9771cd78ec5b7ffa7069e16a82424a382950fedfa1e057861
3
+ size 11421991
tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "is_local": false,
9
+ "local_files_only": false,
10
+ "max_length": null,
11
+ "model_max_length": 10240,
12
+ "pad_to_multiple_of": null,
13
+ "pad_token": "<|endoftext|>",
14
+ "pad_token_type_id": 0,
15
+ "padding_side": "left",
16
+ "split_special_tokens": false,
17
+ "tokenizer_class": "Qwen2Tokenizer",
18
+ "unk_token": null
19
+ }