Spaces:

raia-center
/

PersianMMLU

Running

App Files Files Community

Omid Ghahroodi commited on Apr 14, 2024

Commit

17fcfcd

1 Parent(s): e7748c5

Update demo

Browse files

Files changed (21) hide show

app.py +81 -81
eval-queue/.DS_Store +0 -0
eval-queue/.gitattributes +55 -0
eval-queue/leaderboard/demo-leaderboard_gpt2-demo_eval_request_False_bfloat16_Original.json +1 -0
eval-results/.DS_Store +0 -0
eval-results/.gitattributes +55 -0
eval-results/README.md +3 -0
eval-results/leaderboard/.DS_Store +0 -0
eval-results/leaderboard/gpt2-demo/demo-leaderboard_gpt2-demo_results_2023-11-21T18-10-08.json +15 -0
scripts/create_request_file.py +107 -107
src/__pycache__/envs.cpython-310.pyc +0 -0
src/__pycache__/populate.cpython-310.pyc +0 -0
src/display/__pycache__/about.cpython-310.pyc +0 -0
src/display/__pycache__/css_html_js.cpython-310.pyc +0 -0
src/display/__pycache__/formatting.cpython-310.pyc +0 -0
src/display/__pycache__/utils.cpython-310.pyc +0 -0
src/display/about.py +80 -34
src/display/utils.py +3 -0
src/leaderboard/__pycache__/read_evals.cpython-310.pyc +0 -0
src/submission/__pycache__/check_validity.cpython-310.pyc +0 -0
src/submission/__pycache__/submit.cpython-310.pyc +0 -0

app.py CHANGED Viewed

@@ -248,87 +248,87 @@ with demo:
                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):

                 with gr.Row():
                     gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+        #         with gr.Column():
+        #             with gr.Accordion(
+        #                 f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
+        #                 open=False,
+        #             ):
+        #                 with gr.Row():
+        #                     finished_eval_table = gr.components.Dataframe(
+        #                         value=finished_eval_queue_df,
+        #                         headers=EVAL_COLS,
+        #                         datatype=EVAL_TYPES,
+        #                         row_count=5,
+        #                     )
+        #             with gr.Accordion(
+        #                 f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
+        #                 open=False,
+        #             ):
+        #                 with gr.Row():
+        #                     running_eval_table = gr.components.Dataframe(
+        #                         value=running_eval_queue_df,
+        #                         headers=EVAL_COLS,
+        #                         datatype=EVAL_TYPES,
+        #                         row_count=5,
+        #                     )
+        #             with gr.Accordion(
+        #                 f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
+        #                 open=False,
+        #             ):
+        #                 with gr.Row():
+        #                     pending_eval_table = gr.components.Dataframe(
+        #                         value=pending_eval_queue_df,
+        #                         headers=EVAL_COLS,
+        #                         datatype=EVAL_TYPES,
+        #                         row_count=5,
+        #                     )
+        #     with gr.Row():
+        #         gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+        #     with gr.Row():
+        #         with gr.Column():
+        #             model_name_textbox = gr.Textbox(label="Model name")
+        #             revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
+        #             model_type = gr.Dropdown(
+        #                 choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
+        #                 label="Model type",
+        #                 multiselect=False,
+        #                 value=None,
+        #                 interactive=True,
+        #             )
+        #         with gr.Column():
+        #             precision = gr.Dropdown(
+        #                 choices=[i.value.name for i in Precision if i != Precision.Unknown],
+        #                 label="Precision",
+        #                 multiselect=False,
+        #                 value="float16",
+        #                 interactive=True,
+        #             )
+        #             weight_type = gr.Dropdown(
+        #                 choices=[i.value.name for i in WeightType],
+        #                 label="Weights type",
+        #                 multiselect=False,
+        #                 value="Original",
+        #                 interactive=True,
+        #             )
+        #             base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+        #     submit_button = gr.Button("Submit Eval")
+        #     submission_result = gr.Markdown()
+        #     submit_button.click(
+        #         add_new_eval,
+        #         [
+        #             model_name_textbox,
+        #             base_model_name_textbox,
+        #             revision_name_textbox,
+        #             precision,
+        #             weight_type,
+        #             model_type,
+        #         ],
+        #         submission_result,
+        #     )
     with gr.Row():
         with gr.Accordion("📙 Citation", open=False):

eval-queue/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

eval-queue/.gitattributes ADDED Viewed

	@@ -0,0 +1,55 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text

eval-queue/leaderboard/demo-leaderboard_gpt2-demo_eval_request_False_bfloat16_Original.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"model": "demo-leaderboard/gpt2-demo", "base_model": "", "revision": "main", "private": false, "precision": "float16", "weight_type": "Original", "status": "FINISHED", "submitted_time": "2023-11-21T18:10:08Z", "model_type": "\ud83d\udfe2 : pretrained", "likes": 0, "params": 0.1, "license": "custom"}

eval-results/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

eval-results/.gitattributes ADDED Viewed

	@@ -0,0 +1,55 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text

eval-results/README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+---
+license: cc-by-nd-4.0
+---

eval-results/leaderboard/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

eval-results/leaderboard/gpt2-demo/demo-leaderboard_gpt2-demo_results_2023-11-21T18-10-08.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "config": {
+        "model_dtype": "torch.float16",
+        "model_name": "demo-leaderboard/gpt2-demo",
+        "model_sha": "ac3299b02780836378b9e1e68c6eead546e89f90"
+    },
+    "results": {
+        "task_name1": {
+            "metric_name": 0
+        },
+        "task_name2": {
+            "metric_name": 0.90
+        }
+    }
+}

scripts/create_request_file.py CHANGED Viewed

@@ -1,107 +1,107 @@
-import json
-import os
-import pprint
-import re
-from datetime import datetime, timezone
-import click
-from colorama import Fore
-from huggingface_hub import HfApi, snapshot_download
-EVAL_REQUESTS_PATH = "eval-queue"
-QUEUE_REPO = "open-llm-leaderboard/requests"
-precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
-model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
-weight_types = ("Original", "Delta", "Adapter")
-def get_model_size(model_info, precision: str):
-    size_pattern = size_pattern = re.compile(r"(\d\.)?\d+(b|m)")
-    try:
-        model_size = round(model_info.safetensors["total"] / 1e9, 3)
-    except (AttributeError, TypeError):
-        try:
-            size_match = re.search(size_pattern, model_info.modelId.lower())
-            model_size = size_match.group(0)
-            model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
-        except AttributeError:
-            return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
-    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
-    model_size = size_factor * model_size
-    return model_size
-def main():
-    api = HfApi()
-    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset")
-    model_name = click.prompt("Enter model name")
-    revision = click.prompt("Enter revision", default="main")
-    precision = click.prompt("Enter precision", default="float16", type=click.Choice(precisions))
-    model_type = click.prompt("Enter model type", type=click.Choice(model_types))
-    weight_type = click.prompt("Enter weight type", default="Original", type=click.Choice(weight_types))
-    base_model = click.prompt("Enter base model", default="")
-    status = click.prompt("Enter status", default="FINISHED")
-    try:
-        model_info = api.model_info(repo_id=model_name, revision=revision)
-    except Exception as e:
-        print(f"{Fore.RED}Could not find model info for {model_name} on the Hub\n{e}{Fore.RESET}")
-        return 1
-    model_size = get_model_size(model_info=model_info, precision=precision)
-    try:
-        license = model_info.cardData["license"]
-    except Exception:
-        license = "?"
-    eval_entry = {
-        "model": model_name,
-        "base_model": base_model,
-        "revision": revision,
-        "private": False,
-        "precision": precision,
-        "weight_type": weight_type,
-        "status": status,
-        "submitted_time": current_time,
-        "model_type": model_type,
-        "likes": model_info.likes,
-        "params": model_size,
-        "license": license,
-    }
-    user_name = ""
-    model_path = model_name
-    if "/" in model_name:
-        user_name = model_name.split("/")[0]
-        model_path = model_name.split("/")[1]
-    pprint.pprint(eval_entry)
-    if click.confirm("Do you want to continue? This request file will be pushed to the hub"):
-        click.echo("continuing...")
-        out_dir = f"{EVAL_REQUESTS_PATH}/{user_name}"
-        os.makedirs(out_dir, exist_ok=True)
-        out_path = f"{out_dir}/{model_path}_eval_request_{False}_{precision}_{weight_type}.json"
-        with open(out_path, "w") as f:
-            f.write(json.dumps(eval_entry))
-        api.upload_file(
-            path_or_fileobj=out_path,
-            path_in_repo=out_path.split(f"{EVAL_REQUESTS_PATH}/")[1],
-            repo_id=QUEUE_REPO,
-            repo_type="dataset",
-            commit_message=f"Add {model_name} to eval queue",
-        )
-    else:
-        click.echo("aborting...")
-if __name__ == "__main__":
-    main()

+# import json
+# import os
+# import pprint
+# import re
+# from datetime import datetime, timezone
+# import click
+# from colorama import Fore
+# from huggingface_hub import HfApi, snapshot_download
+# EVAL_REQUESTS_PATH = "eval-queue"
+# QUEUE_REPO = "open-llm-leaderboard/requests"
+# precisions = ("float16", "bfloat16", "8bit (LLM.int8)", "4bit (QLoRA / FP4)", "GPTQ")
+# model_types = ("pretrained", "fine-tuned", "RL-tuned", "instruction-tuned")
+# weight_types = ("Original", "Delta", "Adapter")
+# def get_model_size(model_info, precision: str):
+#     size_pattern = size_pattern = re.compile(r"(\d\.)?\d+(b|m)")
+#     try:
+#         model_size = round(model_info.safetensors["total"] / 1e9, 3)
+#     except (AttributeError, TypeError):
+#         try:
+#             size_match = re.search(size_pattern, model_info.modelId.lower())
+#             model_size = size_match.group(0)
+#             model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
+#         except AttributeError:
+#             return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
+#     size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
+#     model_size = size_factor * model_size
+#     return model_size
+# def main():
+#     api = HfApi()
+#     current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+#     snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH, repo_type="dataset")
+#     model_name = click.prompt("Enter model name")
+#     revision = click.prompt("Enter revision", default="main")
+#     precision = click.prompt("Enter precision", default="float16", type=click.Choice(precisions))
+#     model_type = click.prompt("Enter model type", type=click.Choice(model_types))
+#     weight_type = click.prompt("Enter weight type", default="Original", type=click.Choice(weight_types))
+#     base_model = click.prompt("Enter base model", default="")
+#     status = click.prompt("Enter status", default="FINISHED")
+#     try:
+#         model_info = api.model_info(repo_id=model_name, revision=revision)
+#     except Exception as e:
+#         print(f"{Fore.RED}Could not find model info for {model_name} on the Hub\n{e}{Fore.RESET}")
+#         return 1
+#     model_size = get_model_size(model_info=model_info, precision=precision)
+#     try:
+#         license = model_info.cardData["license"]
+#     except Exception:
+#         license = "?"
+#     eval_entry = {
+#         "model": model_name,
+#         "base_model": base_model,
+#         "revision": revision,
+#         "private": False,
+#         "precision": precision,
+#         "weight_type": weight_type,
+#         "status": status,
+#         "submitted_time": current_time,
+#         "model_type": model_type,
+#         "likes": model_info.likes,
+#         "params": model_size,
+#         "license": license,
+#     }
+#     user_name = ""
+#     model_path = model_name
+#     if "/" in model_name:
+#         user_name = model_name.split("/")[0]
+#         model_path = model_name.split("/")[1]
+#     pprint.pprint(eval_entry)
+#     if click.confirm("Do you want to continue? This request file will be pushed to the hub"):
+#         click.echo("continuing...")
+#         out_dir = f"{EVAL_REQUESTS_PATH}/{user_name}"
+#         os.makedirs(out_dir, exist_ok=True)
+#         out_path = f"{out_dir}/{model_path}_eval_request_{False}_{precision}_{weight_type}.json"
+#         with open(out_path, "w") as f:
+#             f.write(json.dumps(eval_entry))
+#         api.upload_file(
+#             path_or_fileobj=out_path,
+#             path_in_repo=out_path.split(f"{EVAL_REQUESTS_PATH}/")[1],
+#             repo_id=QUEUE_REPO,
+#             repo_type="dataset",
+#             commit_message=f"Add {model_name} to eval queue",
+#         )
+#     else:
+#         click.echo("aborting...")
+# if __name__ == "__main__":
+#     main()

src/__pycache__/envs.cpython-310.pyc ADDED Viewed

Binary file (550 Bytes). View file

src/__pycache__/populate.cpython-310.pyc ADDED Viewed

Binary file (2.63 kB). View file

src/display/__pycache__/about.cpython-310.pyc ADDED Viewed

Binary file (3.42 kB). View file

src/display/__pycache__/css_html_js.cpython-310.pyc ADDED Viewed

Binary file (2.04 kB). View file

src/display/__pycache__/formatting.cpython-310.pyc ADDED Viewed

Binary file (1.58 kB). View file

src/display/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (5.26 kB). View file

src/display/about.py CHANGED Viewed

@@ -11,57 +11,103 @@ class Task:
 # Init: to update with your specific keys
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
-    task0 = Task("task_name1", "metric_name", "First task")
-    task1 = Task("task_name2", "metric_name", "Second task")
 # Your leaderboard name
-TITLE = """<h1 align="center" id="space-title">Demo leaderboard</h1>"""
 # What does your leaderboard evaluate?
-INTRODUCTION_TEXT = """
-Intro text
-"""
 # Which evaluations are you running? how can people reproduce what you have?
-LLM_BENCHMARKS_TEXT = f"""
-## How it works
-## Reproducibility
-To reproduce our results, here is the commands you can run:
-"""
-EVALUATION_QUEUE_TEXT = """
-## Some good practices before submitting a model
-### 1) Make sure you can load your model and tokenizer using AutoClasses:
-```python
-from transformers import AutoConfig, AutoModel, AutoTokenizer
-config = AutoConfig.from_pretrained("your model name", revision=revision)
-model = AutoModel.from_pretrained("your model name", revision=revision)
-tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
-```
-If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
-Note: make sure your model is public!
-Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
-### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
-It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
-### 3) Make sure your model has an open license!
-This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
-### 4) Fill up your model card
-When we add extra information about models to the leaderboard, it will be automatically taken from the model card
-## In case of model failure
-If your model is displayed in the `FAILED` category, its execution stopped.
-Make sure you have followed the above steps first.
-If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
-"""
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""
 """

 # Init: to update with your specific keys
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
+    # task0 = Task("task_name1", "metric_name", "First task")
+    # task1 = Task("task_name2", "metric_name", "Second task")
+    Analytical_Talent_LSS = Task("Analytical Talent LSS", "Acc", "Analytical Talent LSS")
+    Calculus_USS = Task("Calculus USS", "Acc", "Calculus USS")
+    Chemistry_USS = Task("Chemistry USS", "Acc", "Chemistry USS")
+    Discrete_Mathematics_USS = Task("Discrete Mathematics USS", "Acc", "Discrete Mathematics USS")
+    Economy_USS = Task("Economy USS", "Acc", "Economy USS")
+    Geography_USS = Task("Geography USS", "Acc", "Geography USS")
+    Geology_USS = Task("Geology USS", "Acc", "Geology USS")
+    Geometry_USS = Task("Geometry USS", "Acc", "Geometry USS")
+    History_USS = Task("History USS", "Acc", "History USS")
+    Logic_USS = Task("Logic USS", "Acc", "Logic USS")
+    Mathematical_and_Logical_Intelligence_UPS = Task("Mathematical and Logical Intelligence UPS", "Acc", "Mathematical and Logical Intelligence UPS")
+    Mathematics_LPS = Task("Mathematics LPS", "Acc", "Mathematics LPS")
+    Mathematics_LSS = Task("Mathematics LSS", "Acc", "Mathematics LSS")
+    Mathematics_UPS = Task("Mathematics UPS", "Acc", "Mathematics UPS")
+    Mathematics_USS = Task("Mathematics USS", "Acc", "Mathematics USS")
+    Mathematics_and_Statistics_USS = Task("Mathematics and Statistics USS", "Acc", "Mathematics and Statistics USS")
+    Natural_Sciences_LPS = Task("Natural Sciences LPS", "Acc", "Natural Sciences LPS")
+    Natural_Sciences_LSS = Task("Natural Sciences LSS", "Acc", "Natural Sciences LSS")
+    Natural_Sciences_UPS = Task("Natural Sciences UPS", "Acc", "Natural Sciences UPS")
+    Persian_Literature_LPS = Task("Persian Literature LPS", "Acc", "Persian Literature LPS")
+    Persian_Literature_LSS = Task("Persian Literature LSS", "Acc", "Persian Literature LSS")
+    Persian_Literature_UPS = Task("Persian Literature UPS", "Acc", "Persian Literature UPS")
+    Persian_Literature_USS = Task("Persian Literature USS", "Acc", "Persian Literature USS")
+    Philosophy_USS = Task("Philosophy USS", "Acc", "Philosophy USS")
+    Physics_USS = Task("Physics USS", "Acc", "Physics USS")
+    Probability_and_Statistics_USS = Task("Probability and Statistics USS", "Acc", "Probability and Statistics USS")
+    Psychology_USS = Task("Psychology USS", "Acc", "Psychology USS")
+    Social_Studies_LPS = Task("Social Studies LPS", "Acc", "Social Studies LPS")
+    Social_Studies_LSS = Task("Social Studies LSS", "Acc", "Social Studies LSS")
+    Social_Studies_UPS = Task("Social Studies UPS", "Acc", "Social Studies UPS")
+    Sociology_USS = Task("Sociology USS", "Acc", "Sociology USS")
+    Speed_and_Accuracy_UPS = Task("Speed and Accuracy UPS", "Acc", "Speed and Accuracy UPS")
+    Theology_LPS = Task("Theology LPS", "Acc", "Theology LPS")
+    Theology_LSS = Task("Theology LSS", "Acc", "Theology LSS")
+    Theology_UPS = Task("Theology UPS", "Acc", "Theology UPS")
+    Theology_USS = Task("Theology USS", "Acc", "Theology USS")
+    Verbal_and_Linguistic_Intelligence_UPS = Task("Verbal and Linguistic Intelligence UPS", "Acc", "Verbal and Linguistic Intelligence UPS")
+    Biology_USS = Task("‌Biology USS", "Acc", "‌Biology USS")
+    Avg_on_all_tasks = Task("Avg on all tasks", "Acc", "Avg on all tasks")
+    Avg_on_all_questions = Task("Avg on all questions", "Acc", "Avg on all questions")
 # Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">Khayyam Challenge (PersianMMLU)</h1>"""
 # What does your leaderboard evaluate?
+INTRODUCTION_TEXT = """"""
+# Intro text
+# """
 # Which evaluations are you running? how can people reproduce what you have?
+LLM_BENCHMARKS_TEXT = f""""""
+# ## How it works
+# ## Reproducibility
+# To reproduce our results, here is the commands you can run:
+# """
+EVALUATION_QUEUE_TEXT = """In progress"""
+# ## Some good practices before submitting a model
+# ### 1) Make sure you can load your model and tokenizer using AutoClasses:
+# ```python
+# from transformers import AutoConfig, AutoModel, AutoTokenizer
+# config = AutoConfig.from_pretrained("your model name", revision=revision)
+# model = AutoModel.from_pretrained("your model name", revision=revision)
+# tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
+# ```
+# If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
+# Note: make sure your model is public!
+# Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
+# ### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
+# It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
+# ### 3) Make sure your model has an open license!
+# This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
+# ### 4) Fill up your model card
+# When we add extra information about models to the leaderboard, it will be automatically taken from the model card
+# ## In case of model failure
+# If your model is displayed in the `FAILED` category, its execution stopped.
+# Make sure you have followed the above steps first.
+# If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
+# """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
 CITATION_BUTTON_TEXT = r"""
+@article{ghahroodi2024khayyam,
+  title={Khayyam Challenge (PersianMMLU): Is Your LLM Truly Wise to The Persian Language?},
+  author={Ghahroodi, Omid and Nouri, Marzia and Sanian, Mohammad Vali and Sahebi, Alireza and Dastgheib, Doratossadat and Asgari, Ehsaneddin and Baghshah, Mahdieh Soleymani and Rohban, Mohammad Hossein},
+  journal={arXiv preprint arXiv:2404.06644},
+  year={2024}
+}
 """

src/display/utils.py CHANGED Viewed

@@ -92,6 +92,7 @@ class WeightType(Enum):
     Delta = ModelDetails("Delta")
 class Precision(Enum):
     float16 = ModelDetails("float16")
     bfloat16 = ModelDetails("bfloat16")
     qt_8bit = ModelDetails("8bit")
@@ -100,6 +101,8 @@ class Precision(Enum):
     Unknown = ModelDetails("?")
     def from_str(precision):
         if precision in ["torch.float16", "float16"]:
             return Precision.float16
         if precision in ["torch.bfloat16", "bfloat16"]:

     Delta = ModelDetails("Delta")
 class Precision(Enum):
+    float32 = ModelDetails("float32")
     float16 = ModelDetails("float16")
     bfloat16 = ModelDetails("bfloat16")
     qt_8bit = ModelDetails("8bit")
     Unknown = ModelDetails("?")
     def from_str(precision):
+        if precision in ["torch.float32", "float32"]:
+            return Precision.float32
         if precision in ["torch.float16", "float16"]:
             return Precision.float16
         if precision in ["torch.bfloat16", "bfloat16"]:

src/leaderboard/__pycache__/read_evals.cpython-310.pyc ADDED Viewed

Binary file (6.17 kB). View file

src/submission/__pycache__/check_validity.cpython-310.pyc ADDED Viewed

Binary file (3.74 kB). View file

src/submission/__pycache__/submit.cpython-310.pyc ADDED Viewed

Binary file (2.82 kB). View file