Instructions to use mlx-community/GLM-OCR-4bit with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use mlx-community/GLM-OCR-4bit with Transformers:
# Use a pipeline as a high-level helper # Warning: Pipeline type "image-to-text" is no longer supported in transformers v5. # You must load the model directly (see below) or downgrade to v4.x with: # 'pip install "transformers<5.0.0' from transformers import pipeline pipe = pipeline("image-to-text", model="mlx-community/GLM-OCR-4bit")# Load model directly from transformers import AutoTokenizer, AutoModelForImageTextToText tokenizer = AutoTokenizer.from_pretrained("mlx-community/GLM-OCR-4bit") model = AutoModelForImageTextToText.from_pretrained("mlx-community/GLM-OCR-4bit") - MLX
How to use mlx-community/GLM-OCR-4bit with MLX:
# Download the model from the Hub pip install huggingface_hub[hf_xet] huggingface-cli download --local-dir GLM-OCR-4bit mlx-community/GLM-OCR-4bit
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- LM Studio
File size: 4,606 Bytes
97f5875 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | [gMASK]<sop>
{%- if tools -%}
<|system|>
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{% for tool in tools %}
{{ tool | tojson(ensure_ascii=False) }}
{% endfor %}
</tools>
For each function call, output the function name and arguments within the following XML format:
<tool_call>{function-name}
<arg_key>{arg-key-1}</arg_key>
<arg_value>{arg-value-1}</arg_value>
<arg_key>{arg-key-2}</arg_key>
<arg_value>{arg-value-2}</arg_value>
...
</tool_call>{%- endif -%}
{%- macro visible_text(content) -%}
{%- if content is string -%}
{{- content }}
{%- elif content is iterable and content is not mapping -%}
{%- for item in content -%}
{%- if item is mapping and item.type == 'text' -%}
{{- item.text }}
{%- elif item is mapping and (item.type == 'image' or 'image' in item) -%}
<|begin_of_image|><|image|><|end_of_image|>
{%- elif item is mapping and (item.type == 'video' or 'video' in item) -%}
<|begin_of_video|><|video|><|end_of_video|>
{%- elif item is string -%}
{{- item }}
{%- endif -%}
{%- endfor -%}
{%- else -%}
{{- content }}
{%- endif -%}
{%- endmacro -%}
{%- set ns = namespace(last_user_index=-1) %}
{%- for m in messages %}
{%- if m.role == 'user' %}
{% set ns.last_user_index = loop.index0 -%}
{%- endif %}
{%- endfor %}
{% for m in messages %}
{%- if m.role == 'user' -%}<|user|>
{% if m.content is string %}
{{ m.content }}
{%- else %}
{%- for item in m.content %}
{% if item.type == 'video' or 'video' in item %}
<|begin_of_video|><|video|><|end_of_video|>{% elif item.type == 'image' or 'image' in item %}
<|begin_of_image|><|image|><|end_of_image|>{% elif item.type == 'text' %}
{{ item.text }}
{%- endif %}
{%- endfor %}
{%- endif %}
{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
{%- elif m.role == 'assistant' -%}
<|assistant|>
{%- set reasoning_content = '' %}
{%- set content = visible_text(m.content) %}
{%- if m.reasoning_content is string %}
{%- set reasoning_content = m.reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
{{ '\n<think>' + reasoning_content.strip() + '</think>'}}
{%- else -%}
{{ '\n<think></think>' }}
{%- endif -%}
{%- if content.strip() -%}
{{ '\n' + content.strip() }}
{%- endif -%}
{% if m.tool_calls %}
{% for tc in m.tool_calls %}
{%- if tc.function %}
{%- set tc = tc.function %}
{%- endif %}
{{ '\n<tool_call>' + tc.name }}
{% set _args = tc.arguments %}
{% for k, v in _args.items() %}
<arg_key>{{ k }}</arg_key>
<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
{% endfor %}
</tool_call>{% endfor %}
{% endif %}
{%- elif m.role == 'tool' -%}
{%- if m.content is string -%}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|observation|>' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- m.content }}
{{- '\n</tool_response>' }}
{% elif m.content is iterable and m.content is not mapping %}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|observation|>' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{%- for tr in m.content -%}
{%- if tr is mapping and tr.type is defined -%}
{%- set t = tr.type | lower -%}
{%- if t == 'text' and tr.text is defined -%}
{{ tr.text }}
{%- elif t in ['image', 'image_url'] -%}
<|begin_of_image|><|image|><|end_of_image|>
{%- elif t in ['video', 'video_url'] -%}
<|begin_of_video|><|video|><|end_of_video|>
{%- else -%}
{{ tr | tojson(ensure_ascii=False) }}
{%- endif -%}
{%- else -%}
{{ tr.output if tr.output is defined else tr }}
{%- endif -%}
{%- endfor -%}
{{- '\n</tool_response>' }}
{%- else -%}
<|observation|>{% for tr in m.content %}
<tool_response>
{{ tr.output if tr.output is defined else tr }}
</tool_response>{% endfor -%}
{% endif -%}
{%- elif m.role == 'system' -%}
<|system|>
{{ visible_text(m.content) }}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
<|assistant|>
{{'<think></think>\n' if (enable_thinking is defined and not enable_thinking) else ''}}
{%- endif -%}
|