File size: 425 Bytes

899c998
0f450c0
899c998

{
  "_name_or_path": "mlsquare/130M_Seshu",
  "architectures": [
    "MambaForCausalLM"
  ],
  "bias": false,
  "conv_bias": true,
  "d_conv": 4,
  "d_inner": 5120,
  "d_model": 2560,
  "d_state": 16,
  "dt_rank": 160,
  "expand": 2,
  "initializer_range": 0.02,
  "model_type": "mamba",
  "n_layer": 64,
  "pad_vocab_size_multiple": 8,
  "torch_dtype": "float32",
  "transformers_version": "4.38.1",
  "vocab_size": 20000
}