diff --git a/ADVANCED_USAGE.md b/ADVANCED_USAGE.md
index 4f48eca..c0905ba 100755
--- a/ADVANCED_USAGE.md
+++ b/ADVANCED_USAGE.md
@@ -50,6 +50,7 @@ Below are all the arguments for `bigcodebench.evaluate` for the remote evaluatio
 - `--n_samples`: The number of samples, default to `1`
 - `--temperature`: The temperature, default to `0.0`
 - `--max_new_tokens`: The length of max new tokens, default to `1280`
+- `--max_model_len`: The length of max tokens in VLLM, default to `12800`
 - `--greedy`: Whether to use greedy decoding, default to `False`
 - `--strip_newlines`: Whether to strip newlines, default to `False`, set to `True` to strip newlines for some model series like StarCoder2
 - `--direct_completion`: Whether to use direct completion, default to `False`
@@ -69,7 +70,8 @@ Below are all the arguments for `bigcodebench.evaluate` for the remote evaluatio
 - `--tokenizer_legacy`: Whether to use the legacy tokenizer, default to `False`
 - `--samples`: The path to the generated samples file, default to `None`
 - `--no_execute`: Whether to not execute the samples, default to `False`
-- `--remote_execute_api`: The API endpoint for remote execution, default to `https://bigcode-bigcodebench-evaluator.hf.space/`, you can also use your own Gradio API endpoint by cloning the [bigcodebench-evaluator](https://huggingface.co/spaces/bigcode/bigcodebench-evaluator) repo and check `Use via API` at the bottom of the HF space page
+- `--e2b_endpoint`: The API endpoint for remote execution, default to `bigcodebench_evaluator`, you can also use your own E2B API endpoint by cloning the [bigcodebench-evaluator](https://huggingface.co/spaces/bigcode/bigcodebench-evaluator) repo and check `Use via API` at the bottom of the HF space page
+- `--gradio_endpoint`: The API endpoint for remote execution, default to `https://bigcode-bigcodebench-evaluator.hf.space/`, you can also use your own Gradio API endpoint by cloning the [bigcodebench-evaluator](https://huggingface.co/spaces/bigcode/bigcodebench-evaluator) repo and check `Use via API` at the bottom of the HF space page
 - `--pass_k`: The `k` in `Pass@k`, default to `[1, 5, 10]`, e.g. `--pass_k 1,5,10` will evaluate `Pass@1`, `Pass@5` and `Pass@10`
 - `--calibrated`: Whether to use the calibrated samples, default to `True`
 - `--save_pass_rate`: Whether to save the pass rate to a file, default to `True`
diff --git a/Docker/Evaluate.Dockerfile b/Docker/Evaluate.Dockerfile
index 90e7f40..8b2cdcd 100755
--- a/Docker/Evaluate.Dockerfile
+++ b/Docker/Evaluate.Dockerfile
@@ -54,7 +54,7 @@ RUN pip install \
     rich \
     accelerate \
     anthropic \
-    google-generativeai \
+    google-genai \
     mistralai \
     openai \
     e2b
diff --git a/README.md b/README.md
index 94ad2ef..d3913d9 100755
--- a/README.md
+++ b/README.md
@@ -187,7 +187,7 @@ Please make sure your HF access token has the `Make calls to inference providers
 ## 💻 LLM-generated Code
 
 We share pre-generated code samples from LLMs we have [evaluated](https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard) on the full set:
-*  See the attachment of our [v0.2.1.post7](https://github.com/bigcode-project/bigcodebench/releases/tag/v0.2.1.post7). We include `sanitized_samples_calibrated.zip` for your convenience.
+*  See the attachment of our [v0.2.4](https://github.com/bigcode-project/bigcodebench/releases/tag/v0.2.4). We include `sanitized_samples_calibrated.zip` for your convenience.
 
 ## 🧑 Advanced Usage
 
diff --git a/analysis/get_results.py b/analysis/get_results.py
index fc5aa17..641c43b 100755
--- a/analysis/get_results.py
+++ b/analysis/get_results.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numpy import mean
 from glob import glob
-from utils import *
+from utils import model_info
 from tqdm import tqdm
 import pandas as pd
 import itertools
@@ -48,6 +48,8 @@ def get_results(tids):
             "moe": info["moe"],
             "size": info["size"],
             "act_param": info["act_param"],
+            "date": info.get("date", None),
+            "prefill": info.get("prefill", False),
             # "direct_complete": info["direct_complete"],
         }
         
@@ -118,12 +120,12 @@ def check_valid(results):
 
 
 def split_gen():
-    shutil.rmtree("sanitized_samples", ignore_errors=True)
     shutil.rmtree("sanitized_calibrated_samples", ignore_errors=True)
-    os.makedirs("sanitized_samples/complete", exist_ok=True)
-    os.makedirs("sanitized_samples/instruct", exist_ok=True)
-    os.makedirs("sanitized_calibrated_samples/complete", exist_ok=True)
-    os.makedirs("sanitized_calibrated_samples/instruct", exist_ok=True)
+    os.makedirs("sanitized_calibrated_samples/hard/complete", exist_ok=True)
+    os.makedirs("sanitized_calibrated_samples/hard/instruct", exist_ok=True)
+    os.makedirs("sanitized_calibrated_samples/full/complete", exist_ok=True)
+    os.makedirs("sanitized_calibrated_samples/full/instruct", exist_ok=True)
+    
     for model, info in model_info.items():
         model = model.replace("/", "--")
         files = glob(f"results/{model}--bigcodebench-*.jsonl")
@@ -131,27 +133,21 @@ def split_gen():
             model = info["link"].split("https://huggingface.co/")[-1].replace("/", "--")
         
         for file in files:
+            if "-sanitized" not in file or "calibrated" not in file:
+                continue
+                
             _, suffix = os.path.basename(file).split("--bigcodebench-")
             with open(file, "r") as f:
                 data = f.readlines()
                 
-            if "-sanitized" in file:
-                if "calibrated" in file:
-                    if info["prompted"]:
-                        if suffix.startswith("complete"):
-                            with open(f"sanitized_calibrated_samples/complete/{model}--bigcodebench-{suffix}", "w") as f:
-                                f.writelines(data)
-                        else:
-                            with open(f"sanitized_calibrated_samples/instruct/{model}--bigcodebench-{suffix}", "w") as f:
-                                f.writelines(data)
+            split_type = "hard" if "-hard-" in file else "full"
+            if info["prompted"]:
+                if suffix.startswith("complete") or suffix.startswith("hard-complete"):
+                    with open(f"sanitized_calibrated_samples/{split_type}/complete/{model}--bigcodebench-{suffix}", "w") as f:
+                        f.writelines(data)
                 else:
-                    if suffix.startswith("complete"):
-                        with open(f"sanitized_samples/complete/{model}--bigcodebench-{suffix}", "w") as f:
-                            f.writelines(data)
-                    else:
-                        with open(f"sanitized_samples/instruct/{model}--bigcodebench-{suffix}", "w") as f:
-                            f.writelines(data)
-
+                    with open(f"sanitized_calibrated_samples/{split_type}/instruct/{model}--bigcodebench-{suffix}", "w") as f:
+                        f.writelines(data)
 
 def read_task_perf(tids, task="complete"):
     model_results = dict()
@@ -255,7 +251,7 @@ def get_solve_rate(data_dict, task="complete"):
 
 def get_hf_ds(results):
     hf_dataset = {"model": [], "link": [], "moe": [], "size": [], "act_param": [], "type": [], #"lazy": [],# "direct_complete": [],
-                  "complete": [], "instruct": []}
+                  "complete": [], "instruct": [], "date": [], "prefill": []}
 
     for model, result in results.items():
         hf_dataset["model"].append(model)
@@ -267,6 +263,8 @@ def get_hf_ds(results):
         # hf_dataset["lazy"].append(result["lazy"])
         hf_dataset["complete"].append(result["pass@1"]["complete"])
         hf_dataset["instruct"].append(result["pass@1"]["instruct"])
+        hf_dataset["date"].append(result["date"])
+        hf_dataset["prefill"].append(result["prefill"])
         # hf_dataset["direct_complete"].append(result["direct_complete"])
 
     return Dataset.from_dict(hf_dataset)
@@ -302,7 +300,7 @@ def get_perf_df(data_dict):
 
     
 if __name__ == "__main__":
-    # split_gen()
+    split_gen()
     bcb_orig = load_dataset("bigcode/bigcodebench", split="v0.1.1")
     bcb_hard = load_dataset("bigcode/bigcodebench-hard", split="v0.1.1")
     bcb_config = {
diff --git a/analysis/utils.py b/analysis/utils.py
index 430e113..20ecbf5 100755
--- a/analysis/utils.py
+++ b/analysis/utils.py
@@ -7,7 +7,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-04",
     },
     "bigcode/starcoder2-15b-instruct-v0.1": {
         "name": "StarCoder2-15B-Instruct-v0.1",
@@ -17,7 +18,8 @@
         "size": 15,
         "act_param": 15,
         "open-data": "Full",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-30"
     },
     "bigcode/starcoder2-3b": {
         "name": "StarCoder2-3B",
@@ -27,7 +29,8 @@
         "size": 3,
         "act_param": 3,
         "open-data": "Full",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-29"
     },
     "bigcode/starcoder2-7b": {
         "name": "StarCoder2-7B",
@@ -37,7 +40,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "Full",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-29"
     },
     "bigcode/starcoder2-15b": {
         "name": "StarCoder2-15B",
@@ -47,7 +51,8 @@
         "size": 15,
         "act_param": 15,
         "open-data": "Full",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-29"
     },
     "Qwen/CodeQwen1.5-7B": {
         "name": "CodeQwen1.5-7B",
@@ -57,7 +62,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-16"
     },
     "google/codegemma-2b": {
         "name": "CodeGemma-2B",
@@ -67,7 +73,8 @@
         "size": 2,
         "act_param": 2,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-10"
     },
     "google/codegemma-7b": {
         "name": "CodeGemma-7B",
@@ -77,7 +84,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-10"
     },
     "google/codegemma-7b-it": {
         "name": "CodeGemma-7B-Instruct",
@@ -87,7 +95,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-10"
     },
     "gpt-3.5-turbo-0125": {
         "name": "GPT-3.5-Turbo-0125",
@@ -97,7 +106,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-01-25"
     },
     "gpt-4o": {
         "name": "GPT-4o-2024-05-13",
@@ -107,7 +117,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-13"
     },
     "gpt-4-turbo-2024-04-09": {
         "name": "GPT-4-Turbo-2024-04-09",
@@ -117,7 +128,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-09"
     },
     "gpt-4-0613": {
         "name": "GPT-4-0613",
@@ -127,7 +139,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-13"
     },
     "codellama/CodeLlama-7b-hf": {
         "name": "CodeLlama-7B-Base",
@@ -137,7 +150,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-13b-hf": {
         "name": "CodeLlama-13B-Base",
@@ -147,7 +161,8 @@
         "size": 13,
         "act_param": 13,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-7b-Instruct-hf": {
         "name": "CodeLlama-7B-Instruct",
@@ -157,7 +172,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-13b-Instruct-hf": {
         "name": "CodeLlama-13B-Instruct",
@@ -167,7 +183,8 @@
         "size": 13,
         "act_param": 13,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "mistral-large-2402": {
         "name": "Mistral-Large-2402",
@@ -177,7 +194,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-26"
     },
     "mistral-small-2402": {
         "name": "Mistral-Small-2402",
@@ -187,7 +205,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-26"
     },
     "mistralai/Mixtral-8x22B-v0.1": {
         "name": "Mixtral-8x22B-Base",
@@ -197,7 +216,8 @@
         "size": 176,
         "act_param": 44,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-17"
     },
     "mistralai/Mixtral-8x22B-Instruct-v0.1": {
         "name": "Mixtral-8x22B-Instruct",
@@ -207,7 +227,8 @@
         "size": 176,
         "act_param": 44,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-17"
     },
     "codellama/CodeLlama-34b-hf": {
         "name": "CodeLlama-34B-Base",
@@ -217,7 +238,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-34b-Instruct-hf": {
         "name": "CodeLlama-34B-Instruct",
@@ -227,7 +249,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-70b-hf": {
         "name": "CodeLlama-70B-Base",
@@ -237,7 +260,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-70b-Instruct-hf": {
         "name": "CodeLlama-70B-Instruct",
@@ -247,7 +271,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "Qwen/CodeQwen1.5-7B-Chat": {
         "name": "CodeQwen1.5-7B-Chat",
@@ -257,7 +282,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-16"
     },
     "Qwen/Qwen1.5-110B-Chat": {
         "name": "Qwen1.5-110B-Chat",
@@ -267,7 +293,8 @@
         "size": 110,
         "act_param": 110,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-26"
     },
     "Qwen/Qwen1.5-72B-Chat": {
         "name": "Qwen1.5-72B-Chat",
@@ -277,7 +304,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-26"
     },
     "Qwen/Qwen1.5-32B-Chat": {
         "name": "Qwen1.5-32B-Chat",
@@ -287,7 +315,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-26"
     },
     "deepseek-ai/DeepSeek-V2-Chat": {
         "name": "DeepSeek-V2-Chat",
@@ -297,7 +326,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-06"
     },
     "deepseek-ai/deepseek-coder-1.3b-base": {
         "name": "DeepSeek-Coder-1.3B-Base",
@@ -307,7 +337,8 @@
         "size": 1.3,
         "act_param": 1.3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-1.3b-instruct": {
         "name": "DeepSeek-Coder-1.3B-Instruct",
@@ -317,7 +348,8 @@
         "size": 1.3,
         "act_param": 1.3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-33b-base": {
         "name": "DeepSeek-Coder-33B-Base",
@@ -327,7 +359,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-33b-instruct": {
         "name": "DeepSeek-Coder-33B-Instruct",
@@ -337,7 +370,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-6.7b-base": {
         "name": "DeepSeek-Coder-6.7B-Base",
@@ -347,7 +381,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-6.7b-instruct": {
         "name": "DeepSeek-Coder-6.7B-Instruct",
@@ -357,7 +392,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "meta-llama/Meta-Llama-3-70B": {
         "name": "Llama-3-70B-Base",
@@ -367,7 +403,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-18"
     },
     "meta-llama/Meta-Llama-3-70B-Instruct": {
         "name": "Llama-3-70B-Instruct",
@@ -377,7 +414,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-18"
     },
     "meta-llama/Meta-Llama-3-8B": {
         "name": "Llama-3-8B-Base",
@@ -387,7 +425,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-18"
     },
     "meta-llama/Meta-Llama-3-8B-Instruct": {
         "name": "Llama-3-8B-Instruct",
@@ -397,7 +436,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-18"
     },
     "ibm-granite/granite-3b-code-instruct": {
         "name": "Granite-Code-3B-Instruct",
@@ -407,7 +447,8 @@
         "size": 3,
         "act_param": 3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-8b-code-instruct": {
         "name": "Granite-Code-8B-Instruct",
@@ -417,7 +458,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-20b-code-instruct": {
         "name": "Granite-Code-20B-Instruct",
@@ -427,7 +469,8 @@
         "size": 20,
         "act_param": 20,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-34b-code-instruct": {
         "name": "Granite-Code-34B-Instruct",
@@ -437,7 +480,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-3b-code-base": {
         "name": "Granite-Code-3B-Base",
@@ -447,7 +491,8 @@
         "size": 3,
         "act_param": 3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-8b-code-base": {
         "name": "Granite-Code-8B-Base",
@@ -457,7 +502,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-20b-code-base": {
         "name": "Granite-Code-20B-Base",
@@ -467,7 +513,8 @@
         "size": 20,
         "act_param": 20,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-34b-code-base": {
         "name": "Granite-Code-34B-Base",
@@ -477,7 +524,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "claude-3-haiku-20240307": {
         "name": "Claude-3-Haiku-20240307",
@@ -487,7 +535,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-03-07"
     },
     "claude-3-sonnet-20240229": {
         "name": "Claude-3-Sonnet-20240229",
@@ -497,7 +546,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-02-29"
     },
     "claude-3-opus-20240229": {
         "name": "Claude-3-Opus-20240229",
@@ -507,7 +557,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-02-29"
     },
     "01-ai/Yi-1.5-34B-Chat": {
         "name": "Yi-1.5-34B-Chat",
@@ -517,7 +568,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-34B": {
         "name": "Yi-1.5-34B",
@@ -527,7 +579,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-9B-Chat": {
         "name": "Yi-1.5-9B-Chat",
@@ -537,7 +590,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-9B": {
         "name": "Yi-1.5-9B",
@@ -547,7 +601,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-6B-Chat": {
         "name": "Yi-1.5-6B-Chat",
@@ -557,7 +612,8 @@
         "size": 6,
         "act_param": 6,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-6B": {
         "name": "Yi-1.5-6B",
@@ -567,7 +623,8 @@
         "size": 6,
         "act_param": 6,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "Qwen/Qwen2-57B-A14B": {
         "name": "Qwen2-57B-A14B",
@@ -577,7 +634,8 @@
         "size": 57,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-07"
     },
     "Qwen/Qwen2-7B-Instruct": {
         "name": "Qwen2-7B-Instruct",
@@ -587,7 +645,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-07"
     },
     "Qwen/Qwen2-72B-Chat": {
         "name": "Qwen2-72B-Chat",
@@ -597,7 +656,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-07"
     },
     "gemini-1.5-pro": {
         "name": "Gemini-1.5-Pro-API-0514",
@@ -607,7 +667,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-14"
     },
     "gemini-1.5-flash": {
         "name": "Gemini-1.5-Flash-API-0514",
@@ -617,7 +678,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-14"
     },
     "m-a-p/OpenCodeInterpreter-DS-33B": {
         "name": "OpenCodeInterpreter-DS-33B",
@@ -627,7 +689,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-22"
     },
     "m-a-p/OpenCodeInterpreter-DS-6.7B": {
         "name": "OpenCodeInterpreter-DS-6.7B",
@@ -637,7 +700,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-22"
     },
     "m-a-p/OpenCodeInterpreter-DS-1.3B": {
         "name": "OpenCodeInterpreter-DS-1.3B",
@@ -647,7 +711,8 @@
         "size": 1.3,
         "act_param": 1.3,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-22"
     },
     "microsoft/Phi-3-medium-128k-instruct": {
         "name": "Phi-3-Medium-128K-Instruct",
@@ -657,7 +722,8 @@
         "size": 14,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-21"
     },
     "microsoft/Phi-3-small-128k-instruct": {
         "name": "Phi-3-Small-128K-Instruct",
@@ -667,7 +733,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-21"
     },
     "codestral-2405": {
         "name": "Codestral-22B-v0.1",
@@ -677,7 +744,8 @@
         "size": 22,
         "act_param": 22,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-23"
     },
     "codestral-mamba-2407": {
         "name": "Codestral-Mamba",
@@ -687,7 +755,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-16"
     },
     "mistralai/Mistral-7B-Instruct-v0.3": {
         "name": "Mistral-7B-Instruct-v0.3",
@@ -697,7 +766,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-22"
     },
     "mistralai/Mistral-7B-v0.3": {
         "name": "Mistral-7B-v0.3",
@@ -707,7 +777,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-22"
     },
     "CohereForAI/c4ai-command-r-plus": {
         "name": "Command R+",
@@ -717,7 +788,8 @@
         "size": 104,
         "act_param": 104,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-04"
     },
     "deepseek-coder": {
         "name": "DeepSeek-Coder-V2-Instruct",
@@ -727,7 +799,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-06-17"
     },
     "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": {
         "name": "DeepSeek-Coder-V2-Lite-Instruct",
@@ -737,7 +810,8 @@
         "size": 16,
         "act_param": 2.4,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-17"
     },
     "deepseek-ai/DeepSeek-Coder-V2-Lite-Base": {
         "name": "DeepSeek-Coder-V2-Lite-Base",
@@ -747,7 +821,8 @@
         "size": 16,
         "act_param": 2.4,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-17"
     },
     "claude-3-5-sonnet-20240620": {
         "name": "Claude-3.5-Sonnet-20240620",
@@ -757,7 +832,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-06-20"
     },
     "NousResearch/Hermes-2-Theta-Llama-3-70B": {
         "name": "Hermes-2-Theta-Llama-3-70B",
@@ -767,7 +843,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-24"
     },
     "microsoft/wavecoder-ultra-6.7b": {
         "name": "WaveCoder-Ultra-6.7B",
@@ -777,7 +854,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-12-26"
     },
     "google/gemma-2-9b-it": {
         "name": "Gemma-2-9B-Instruct",
@@ -787,7 +865,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-19"
     },
     "Bin12345/AutoCoder": {
         "name": "AutoCoder",
@@ -797,7 +876,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-23"
     },
     "Bin12345/AutoCoder_S_6.7B": {
         "name": "AutoCoder-S-6.7B",
@@ -807,7 +887,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-23"
     },
     "Bin12345/AutoCoder_QW_7B": {
         "name": "AutoCoder-QW-7B",
@@ -817,7 +898,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-23"
     },
     "SenseLLM/ReflectionCoder-DS-33B": {
         "name": "ReflectionCoder-DS-33B",
@@ -827,7 +909,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "SenseLLM/ReflectionCoder-DS-6.7B": {
         "name": "ReflectionCoder-DS-6.7B",
@@ -837,7 +920,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "SenseLLM/ReflectionCoder-CL-34B": {
         "name": "ReflectionCoder-CL-34B",
@@ -847,7 +931,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "SenseLLM/ReflectionCoder-CL-7B": {
         "name": "ReflectionCoder-CL-7B",
@@ -857,7 +942,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "new-microsoft/Phi-3-mini-128k-instruct": {
         "name": "Phi-3.1-Mini-128K-Instruct",
@@ -867,7 +953,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-21"
     },
     "old-microsoft/Phi-3-mini-128k-instruct": {
         "name": "Phi-3-Mini-128K-Instruct",
@@ -877,7 +964,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-21"
     },
     "internlm/internlm2_5-7b-chat": {
         "name": "InternLM2.5-7B-Chat",
@@ -887,7 +975,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-03"
     },
     "NousResearch/Hermes-2-Pro-Llama-3-70B": {
         "name": "Hermes-2-Pro-Llama-3-70B",
@@ -897,7 +986,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-27"
     },
     "new-deepseek-chat": {
         "name": "DeepSeek-V2-Chat (2024-06-28)",
@@ -907,7 +997,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-06-28"
     },
     "vllm-google/gemma-2-27b-it": {
         "name": "Gemma-2-27B-Instruct",
@@ -917,7 +1008,8 @@
         "size": 27,
         "act_param": 27,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-19"
     },
     "Artigenz/Artigenz-Coder-DS-6.7B": {
         "name": "Artigenz-Coder-DS-6.7B",
@@ -927,7 +1019,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-16"
     },
     "openchat/openchat-3.6-8b-20240522": {
         "name": "OpenChat-3.6-8B-20240522",
@@ -937,7 +1030,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-22"
     },
     "Phind/Phind-CodeLlama-34B-v2": {
         "name": "Phind-CodeLlama-34B-v2",
@@ -947,7 +1041,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "yi-large": {
         "name": "Yi-Large",
@@ -957,7 +1052,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-13"
     },
     "THUDM/codegeex4-all-9b": {
         "name": "CodeGeex4-All-9B",
@@ -967,7 +1063,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-05"
     },
     "gpt-4o-mini-2024-07-18": {
         "name": "GPT-4o-mini-2024-07-18",
@@ -977,7 +1074,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-18"
     },
     "Nexusflow/Athene-70B": {
         "name": "Athene-70B",
@@ -987,7 +1085,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-20"
     },
     "NTQAI/Nxcode-CQ-7B-orpo": {
         "name": "Nxcode-CQ-7B-Orpo",
@@ -997,7 +1096,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-25"
     },
     "migtissera/Llama-3-70B-Synthia-v3.5": {
         "name": "Llama-3-70B-Synthia-v3.5",
@@ -1007,7 +1107,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "migtissera/Tess-v2.5.2-Qwen2-72B": {
         "name": "Tess-v2.5.2-Qwen2-72B",
@@ -1017,7 +1118,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-18"
     },
     "WhiteRabbitNeo/WhiteRabbitNeo-33B-v1.5": {
         "name": "WhiteRabbitNeo-33B-v1.5",
@@ -1027,7 +1129,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-10"
     },
     "mistral-large-2407": {
         "name": "Mistral-Large-Instruct-2407",
@@ -1037,7 +1140,8 @@
         "size": 123,
         "act_param": 123,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-07-24"
     },
     "meta-llama/Meta-Llama-3.1-8B-Instruct": {
         "name": "Llama-3.1-8B-Instruct",
@@ -1047,7 +1151,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-23"
     },
     "meta-llama/Meta-Llama-3.1-70B-Instruct": {
         "name": "Llama-3.1-70B-Instruct",
@@ -1057,7 +1162,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-23"
     },
     "meta--llama-3.1-405b-instruct": {
         "name": "Llama-3.1-405B-Instruct",
@@ -1067,7 +1173,8 @@
         "size": 405,
         "act_param": 405,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-23"
     },
     "deepseek-coder-20240724": {
         "name": "DeepSeek-Coder-V2-Instruct (2024-07-24)",
@@ -1077,7 +1184,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-24"
     },
     "microsoft/Phi-3.5-mini-instruct": {
         "name": "Phi-3.5-Mini-Instruct",
@@ -1087,7 +1195,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-23"
     },
     "nv-mistralai--mistral-nemo-12b-instruct": {
         "name": "Mistral-Nemo-12B-Instruct",
@@ -1097,7 +1206,8 @@
         "size": 12,
         "act_param": 12,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-18"
     },
     "wyt2000/InverseCoder-CL-13B": {
         "name": "InverseCoder-CL-13B",
@@ -1107,7 +1217,8 @@
         "size": 13,
         "act_param": 13,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-08"
     },
     "wyt2000/InverseCoder-CL-7B": {
         "name": "InverseCoder-CL-7B",
@@ -1117,7 +1228,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-08"
     },
     "wyt2000/InverseCoder-DS-6.7B": {
         "name": "InverseCoder-DS-6.7B",
@@ -1127,7 +1239,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-08"
     },
     "gemini-1.5-pro-exp-0801": {
         "name": "Gemini-1.5-Pro-Exp-0801",
@@ -1137,7 +1250,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-08-01"
     },
     "gpt-4o-2024-08-06": {
         "name": "GPT-4o-2024-08-06",
@@ -1147,7 +1261,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-06"
     },
     "abacusai/Dracarys-Llama-3.1-70B-Instruct": {
         "name": "Dracarys-Llama-3.1-70B-Instruct",
@@ -1157,7 +1272,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-23"
     },
     "abacusai/Dracarys-72B-Instruct": {
         "name": "Dracarys-72B-Instruct",
@@ -1167,7 +1283,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-23"
     },
     "gemini-1.5-pro-exp-0827": {
         "name": "Gemini-1.5-Pro-Exp-0827",
@@ -1177,7 +1294,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-27"
     },
     "gemini-1.5-flash-exp-0827": {
         "name": "Gemini-1.5-Flash-Exp-0827",
@@ -1187,7 +1305,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-27"
     },
     "microsoft/Phi-3.5-mini-instruct": {
         "name": "Phi-3.5-Mini-Instruct",
@@ -1197,7 +1316,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-23"
     },
     "abacusai/Dracarys-Llama-3.1-70B-Instruct": {
         "name": "Dracarys-Llama-3.1-70B-Instruct",
@@ -1207,7 +1327,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-23"
     },
     "abacusai/Dracarys-72B-Instruct": {
         "name": "Dracarys-72B-Instruct",
@@ -1217,7 +1338,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-23"
     },
     "deepseek-coder-v2.5": {
         "name": "DeepSeek-V2.5",
@@ -1227,7 +1349,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-18"
     },
     "CohereForAI/c4ai-command-r-08-2024": {
         "name": "C4AI-Command-R-08-2024",
@@ -1237,7 +1360,8 @@
         "size": 32.3,
         "act_param": 32.3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-30"
     },
     "CohereForAI/c4ai-command-r-plus-08-2024": {
         "name": "C4AI-Command-R-Plus-08-2024",
@@ -1247,7 +1371,8 @@
         "size": 104,
         "act_param": 104,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-30"
     },
     "ayueei--yue-coder-9b-preview": {
         "name": "Yi-Coder-9B-Chat",
@@ -1257,7 +1382,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-04"
     },
     # "mattshumer/ref_70_e3_prefill": {
     #     "name": "Reflection-Llama-3.1-70B",
@@ -1285,7 +1411,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-09-12"
     },
     "o1-mini-2024-09-12": {
         "name": "o1-Mini-2024-09-12 (temperature=1)",
@@ -1295,7 +1422,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-09-12"
     },
     "Qwen/Qwen2.5-Coder-1.5B-Instruct": {
         "name": "Qwen2.5-Coder-1.5B-Instruct",
@@ -1305,7 +1433,8 @@
         "size": 1.5,
         "act_param": 1.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-12"
     },
     "Qwen/Qwen2.5-Coder-7B-Instruct": {
         "name": "Qwen2.5-Coder-7B-Instruct",
@@ -1315,7 +1444,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-12"
     },
     "gemini-1.5-pro-002": {
         "name": "Gemini-1.5-Pro-002",
@@ -1325,7 +1455,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-09-25"
     },
     "mistralai/Mistral-Small-Instruct-2409": {
         "name": "Mistral-Small-Instruct-2409",
@@ -1335,7 +1466,8 @@
         "size": 22.2,
         "act_param": 22.2,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-18"
     },
     "Qwen/Qwen2.5-0.5B-Instruct": {
         "name": "Qwen2.5-0.5B-Instruct",
@@ -1345,7 +1477,8 @@
         "size": 0.5,
         "act_param": 0.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-1.5B-Instruct": {
         "name": "Qwen2.5-1.5B-Instruct",
@@ -1355,7 +1488,8 @@
         "size": 1.5,
         "act_param": 1.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-7B-Instruct": {
         "name": "Qwen2.5-7B-Instruct",
@@ -1365,7 +1499,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-14B-Instruct": {
         "name": "Qwen2.5-14B-Instruct",
@@ -1375,7 +1510,8 @@
         "size": 14,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-32B-Instruct": {
         "name": "Qwen2.5-32B-Instruct",
@@ -1385,7 +1521,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-72B-Instruct": {
         "name": "Qwen2.5-72B-Instruct",
@@ -1395,7 +1532,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "meta-llama/Llama-3.2-1B-Instruct": {
         "name": "Llama-3.2-1B-Instruct",
@@ -1405,7 +1543,8 @@
         "size": 1,
         "act_param": 1,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-25"
     },
     "meta-llama/Llama-3.2-3B-Instruct": {
         "name": "Llama-3.2-3B-Instruct",
@@ -1415,7 +1554,8 @@
         "size": 3,
         "act_param": 3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-25"
     },
     "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": {
         "name": "Llama-3.1-Nemotron-70B-Instruct",
@@ -1425,7 +1565,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-25"
     },
     "claude-3-5-sonnet-20241022": {
         "name": "Claude-3.5-Sonnet-20241022",
@@ -1435,7 +1576,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-10-22"
     },
     "ibm-granite/granite-3.0-8b-instruct": {
         "name": "Granite-3.0-8B-Instruct",
@@ -1445,7 +1587,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-10-21"
     },
     "ibm-granite/granite-3.0-2b-instruct": {
         "name": "Granite-3.0-2B-Instruct",
@@ -1455,7 +1598,8 @@
         "size": 2,
         "act_param": 2,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-10-21"
     },
     "grok-beta--main": {
         "name": "Grok-Beta",
@@ -1465,7 +1609,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-03-17"
     },
     "claude-3-5-haiku-20241022--main": {
         "name": "Claude-3.5-Haiku-20241022",
@@ -1475,7 +1620,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-10-22"
     },
     "Qwen/Qwen2.5-Coder-14B-Instruct--main": {
         "name": "Qwen2.5-Coder-14B-Instruct",
@@ -1485,7 +1631,8 @@
         "size": 14,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-Coder-32B-Instruct--main": {
         "name": "Qwen2.5-Coder-32B-Instruct",
@@ -1495,7 +1642,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "infly/OpenCoder-1.5B-Instruct--main": {
         "name": "OpenCoder-1.5B-Instruct",
@@ -1505,7 +1653,8 @@
         "size": 1.5,
         "act_param": 1.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-09"
     },
     "infly/OpenCoder-8B-Instruct--main": {
         "name": "OpenCoder-8B-Instruct",
@@ -1515,7 +1664,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-09"
     },
     "microsoft/Phi-3.5-mini-instruct--main": {
         "name": "Phi-3.5-Mini-Instruct",
@@ -1525,7 +1675,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-21"
     },
     "Nexusflow/Athene-V2-Agent--main": {
         "name": "Athene-V2-Agent",
@@ -1535,7 +1686,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-14"
     },
     "Nexusflow/Athene-V2-Chat--main": {
         "name": "Athene-V2-Chat",
@@ -1545,7 +1697,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-14"
     },
     "gemini-exp-1114--main": {
         "name": "Gemini-Exp-1114",
@@ -1555,7 +1708,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-11-14"
     },
     "gpt-4o-2024-11-20--main": {
         "name": "GPT-4o-2024-11-20",
@@ -1565,7 +1719,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-11-20"
     },
     "gemini-exp-1121--main": {
         "name": "Gemini-Exp-1121",
@@ -1575,7 +1730,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-21"
     },
     "gemini-exp-1206--main": {
         "name": "Gemini-Exp-1206",
@@ -1585,7 +1741,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-06"
     },
     "meta-llama--Llama-3.3-70B-Instruct--main": {
         "name": "Llama-3.3-70B-Instruct",
@@ -1595,7 +1752,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-19"
     },
     "deepseek-ai--DeepSeek-V2.5-1210--main": {
         "name": "DeepSeek-V2.5-1210",
@@ -1605,7 +1763,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-10"
     },
     "gemini-2.0-flash-exp--main": {
         "name": "Gemini-2.0-Flash-Exp",
@@ -1615,7 +1774,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-11"
     },
     "gemini-2.0-flash-thinking-exp-1219--main": {
         "name": "Gemini-2.0-Flash-Thinking-Exp-1219",
@@ -1625,7 +1785,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-19"
     },
     "gemini-2.0-flash-thinking-exp-01-21--main": {
         "name": "Gemini-2.0-Flash-Thinking-Exp-01-21",
@@ -1635,7 +1796,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-21"
     },
     "o1-2024-12-17--main": {
         "name": "o1-2024-12-17 (temperature=1, reasoning=medium)",
@@ -1645,7 +1807,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-17"
     },
     "o1-2024-12-17--low--main": {
         "name": "o1-2024-12-17 (temperature=1, reasoning=low)",
@@ -1655,7 +1818,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-17"
     },
     "o1-2024-12-17--high--main": {
         "name": "o1-2024-12-17 (temperature=1, reasoning=high)",
@@ -1665,17 +1829,19 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-17"
     },
     "deepseek-v3-chat--main": {
-        "name": "DeepSeek-V3-Chat",
-        "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3-Chat",
+        "name": "DeepSeek-V3",
+        "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3",
         "prompted": True,
         "moe": True,
         "size": 671,
         "act_param": 37,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-26"
     },
     "microsoft--phi-4--main": {
         "name": "Phi-4",
@@ -1685,7 +1851,8 @@
         "size": 14.7,
         "act_param": 14.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-13"
     },
     "deepseek-reasoner--main": {
         "name": "DeepSeek-R1",
@@ -1695,7 +1862,8 @@
         "size": 671,
         "act_param": 37,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Llama-70B--main": {
         "name": "DeepSeek-R1-Distill-Llama-70B",
@@ -1705,7 +1873,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B--main": {
         "name": "DeepSeek-R1-Distill-Qwen-32B",
@@ -1715,7 +1884,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B--main": {
         "name": "DeepSeek-R1-Distill-Qwen-14B",
@@ -1725,27 +1895,30 @@
         "size": 14,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Llama-8B--main": {
         "name": "DeepSeek-R1-Distill-Llama-8B",
         "link": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
         "prompted": True,
         "moe": False,
-        "size": 14,
-        "act_param": 14,
+        "size": 8,
+        "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B--main": {
         "name": "DeepSeek-R1-Distill-Qwen-7B",
         "link": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
         "prompted": True,
         "moe": False,
-        "size": 14,
-        "act_param": 14,
+        "size": 7,
+        "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B--main": {
         "name": "DeepSeek-R1-Distill-Qwen-1.5B",
@@ -1755,7 +1928,8 @@
         "size": 1.5,
         "act_param": 1.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "mistralai/Mistral-Small-24B-Instruct-2501--main": {
         "name": "Mistral-Small-24B-Instruct-2501",
@@ -1765,7 +1939,8 @@
         "size": 24,
         "act_param": 24,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-31"
     },
     "o3-mini-2025-01-31--medium--main": {
         "name": "o3-mini-2025-01-31 (temperature=1, reasoning=medium)",
@@ -1775,7 +1950,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2025-01-31"
     },
     "o3-mini-2025-01-31--low--main": {
         "name": "o3-mini-2025-01-31 (temperature=1, reasoning=low)",
@@ -1785,7 +1961,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2025-01-31"
     },
     "o3-mini-2025-01-31--high--main": {
         "name": "o3-mini-2025-01-31 (temperature=1, reasoning=high)",
@@ -1795,7 +1972,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2025-01-31"
     },
     "gemini-2.0-flash-001--main": {
         "name": "Gemini-2.0-Flash-001",
@@ -1805,7 +1983,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-02-05"
     },
     "gemini-2.0-flash-exp--main": {
         "name": "Gemini-2.0-Flash-Exp",
@@ -1815,7 +1994,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-02-05"
     },
     "gemini-2.0-flash-lite-preview-02-05--main": {
         "name": "Gemini-2.0-Flash-Lite-Preview-02-05",
@@ -1825,7 +2005,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-02-05"
     },
     "gemini-2.0-pro-exp-02-05--main": {
         "name": "Gemini-2.0-Pro-Exp-02-05",
@@ -1835,7 +2016,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-02-05"
     },
     "NovaSky-AI--Sky-T1-32B-Flash--main": {
         "name": "Sky-T1-32B-Flash",
@@ -1845,7 +2027,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-12"
     },
     "NovaSky-AI--Sky-T1-32B-Preview--main": {
         "name": "Sky-T1-32B-Preview",
@@ -1855,7 +2038,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-12"
     },
     "Qwen--QwQ-32B-Preview--main": {
         "name": "QwQ-32B-Preview",
@@ -1865,6 +2049,205 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-28"
+    },
+    "claude-3-7-sonnet-20250219--main": {
+        "name": "Claude-3-Haiku-20240307",
+        "link": "https://www.anthropic.com/news/claude-3-family",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-02-19"
+    },
+    "chatgpt-4o-latest--main": {
+        "name": "ChatGPT-4o-latest-20250129",
+        "link": "https://chat.openai.com/",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": None,
+        "prefill": False,
+        "date": "2025-01-29"
+    },
+    "Kwaipilot--KwaiCoder-23B-A4B-v1--main": {
+        "name": "KwaiCoder-23B-A4B-v1",
+        "link": "https://huggingface.co/Kwaipilot/KwaiCoder-23B-A4B-v1",
+        "open-data": "None",
+        "prompted": False,
+        "moe": True,
+        "size": 23,
+        "act_param": 4,
+        "prefill": True,
+        "date": "2025-01-25"
+    },
+    "qwen-max-latest--main": {
+        "name": "Qwen2.5-Max",
+        "link": "https://qwenlm.github.io/blog/qwen2.5-max/",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": None,
+        "act_param": None,
+        "prefill": False,
+        "date": "2025-01-28"
+    },
+    "claude-3-7-sonnet-20250219--3200-output-128k-2025-02-19--main": {
+        "name": "Claude-3.7-Sonnet-20250219 (temperature=1, length=12800, reasoning=3200)",
+        "link": "https://www.anthropic.com/news/claude-3-7-sonnet",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-02-19"
+    },
+    "claude-3-7-sonnet-20250219--main": {
+        "name": "Claude-3.7-Sonnet-20250219",
+        "link": "https://www.anthropic.com/news/claude-3-7-sonnet",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-02-19"
+    },
+    "WarriorCoder-6.7B--main": {
+        "name": "WarriorCoder-6.7B (Reproduced)",
+        "link": "https://arxiv.org/abs/2412.17395",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": 6.7,
+        "act_param": 6.7,
+        "open-data": "None",
+        "prefill": True,
+        "date": "2025-02-18"
+    },
+    "google--gemma-3-27b-it--main": {
+        "name": "Gemma-3-27B-Instruct",
+        "link": "https://huggingface.co/google/gemma-3-27b-it",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": 27,
+        "act_param": 27,
+        "open-data": "None",
+        "prefill": True,
+        "date": "2025-03-12"
+    },
+    "Qwen--QwQ-32B--skip_prefill--main": {
+        "name": "QwQ-32B (w/ Reasoning)",
+        "link": "https://huggingface.co/Qwen/QwQ-32B",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": 32,
+        "act_param": 32,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-03-06"
+    },
+    "deepseek-chat-0324--main": {
+        "name": "DeepSeek-V3-0324",
+        "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": 671,
+        "act_param": 37,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-03-24"
+    },
+    "gemini-2.5-pro-exp-03-25--main": {
+        "name": "Gemini-2.5-Pro-Exp-03-25",
+        "link": "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": 37,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-03-25"
+    },
+    "meta/llama-4-scout-17b-16e-instruct--main": {
+        "name": "Llama-4-Scout",
+        "link": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": 109,
+        "act_param": 17,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-05"
+    },
+    "meta/llama-4-maverick-17b-128e-instruct--main": {
+        "name": "Llama-4-Maverick",
+        "link": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": 109,
+        "act_param": 17,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-05"
     },
-}
+    "agentica-org/DeepCoder-14B-Preview--main": {
+        "name": "DeepCoder-14B-Preview",
+        "link": "https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": 14,
+        "act_param": 14,
+        "open-data": "None",
+        "prefill": True,
+        "date": "2025-04-09"
+    },
+    "openrouter/quasar-alpha--main": {
+        "name": "Quasar-Alpha",
+        "link": "https://openrouter.ai/openrouter/quasar-alpha",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-02"
+    },
+    "agentica-org/DeepCoder-14B-Preview--skip_prefill--main": {
+        "name": "DeepCoder-14B-Preview (w/ Reasoning, 64k tokens, temperature=0.6)",
+        "link": "https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": 14,
+        "act_param": 14,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-09"
+    },
+    "openrouter/optimus-alpha--main": {
+        "name": "Optimus-Alpha",
+        "link": "https://openrouter.ai/openrouter/optimus-alpha",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-10"
+    }
+}
\ No newline at end of file
diff --git a/bigcodebench/gen/util/anthropic_request.py b/bigcodebench/gen/util/anthropic_request.py
index e53feab..f6d18fd 100644
--- a/bigcodebench/gen/util/anthropic_request.py
+++ b/bigcodebench/gen/util/anthropic_request.py
@@ -16,7 +16,19 @@ def make_auto_request(client: anthropic.Client, *args, **kwargs) -> Message:
         try:
             signal.signal(signal.SIGALRM, handler)
             signal.alarm(100)
-            ret = client.messages.create(*args, **kwargs)
+            if "reasoning_budget" in kwargs and "reasoning_beta" in kwargs:
+                kwargs["thinking"] = {
+                    "type": "enabled",
+                    "budget_tokens": kwargs["reasoning_budget"],
+                }
+                kwargs["betas"] = [kwargs["reasoning_beta"]]
+                kwargs.pop("reasoning_budget")
+                kwargs.pop("reasoning_beta")
+                kwargs.pop("temperature")
+            if "thinking" in kwargs:
+                ret = client.beta.messages.create(*args, **kwargs, stream=True)
+            else:
+                ret = client.messages.create(*args, **kwargs)
             signal.alarm(0)
         except anthropic.RateLimitError:
             print("Rate limit exceeded. Waiting...")
diff --git a/bigcodebench/gen/util/google_request.py b/bigcodebench/gen/util/google_request.py
index 9e13607..5a76362 100644
--- a/bigcodebench/gen/util/google_request.py
+++ b/bigcodebench/gen/util/google_request.py
@@ -1,11 +1,12 @@
 import time
 
-import google.generativeai as genai
+from google import genai
 from google.api_core.exceptions import GoogleAPICallError, ResourceExhausted
 
 
 def make_request(
-    client: genai.GenerativeModel,
+    model: str,
+    client: genai.Client,
     message: str,
     temperature: float,
     n: int,
@@ -13,21 +14,34 @@ def make_request(
 ) -> genai.types.GenerateContentResponse:
     kwargs = {"temperature": temperature, "max_output_tokens": max_new_tokens}
 
-    if "-thinking-" in client.model_name:
+    if "-thinking-" in model:
         kwargs.pop("max_output_tokens")
-
-    response = client.generate_content(
-        [{"role": "user", "parts": [message]}],
-        generation_config=genai.types.GenerationConfig(
+    
+    response = client.models.generate_content(
+        model=model,
+        contents=message,
+        config=genai.types.GenerateContentConfig(
             candidate_count=n,
+            safety_settings=[
+                genai.types.SafetySetting(
+                    category='HARM_CATEGORY_DANGEROUS_CONTENT',
+                    threshold='BLOCK_NONE'
+                ),
+                genai.types.SafetySetting(
+                    category='HARM_CATEGORY_SEXUALLY_EXPLICIT',
+                    threshold='BLOCK_NONE'
+                ),
+                genai.types.SafetySetting(
+                    category='HARM_CATEGORY_HATE_SPEECH',
+                    threshold='BLOCK_NONE'
+                ),
+                genai.types.SafetySetting(
+                    category='HARM_CATEGORY_HARASSMENT',
+                    threshold='BLOCK_NONE'
+                ),
+            ],
             **kwargs
-        ),
-        safety_settings=[
-            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
-            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
-            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
-            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
-        ],
+        ),            
     )
 
     return response
diff --git a/bigcodebench/gen/util/openai_request.py b/bigcodebench/gen/util/openai_request.py
index f8db3f5..3c8b741 100644
--- a/bigcodebench/gen/util/openai_request.py
+++ b/bigcodebench/gen/util/openai_request.py
@@ -17,7 +17,7 @@ def make_request(
     kwargs["top_p"] = 0.95
     kwargs["max_completion_tokens"] = max_tokens
     kwargs["temperature"] = temperature
-    if model.startswith("o1-") or model.startswith("o3-") or model.endswith("-reasoner"):  # pop top-p and max_completion_tokens
+    if any(model.startswith(m) or model.endswith(m) for m in ["o1-", "o3-", "reasoner", "grok-3-mini-beta"]):  # pop top-p and max_completion_tokens
         kwargs.pop("top_p")
         kwargs.pop("max_completion_tokens")
         kwargs.pop("temperature")
diff --git a/bigcodebench/generate.py b/bigcodebench/generate.py
index bcf1463..adbf892 100644
--- a/bigcodebench/generate.py
+++ b/bigcodebench/generate.py
@@ -127,12 +127,19 @@ def run_codegen(
     split: str,
     subset: str,
     root: str = "bcb_results",
+    lora_path: str = None,
     bs: Optional[int] = None,
     n_samples: int = 1,
     temperature: float = 0.0,
     max_new_tokens: int = 1280,
+    # vllm
+    max_model_len: int = 12800,
     greedy: bool = False,
+    # openai
     reasoning_effort: str = "medium",
+    # anthropic
+    reasoning_budget: int = 0,
+    reasoning_beta: str = "output-128k-2025-02-19",
     strip_newlines: bool = False,
     direct_completion: bool = False,
     resume: bool = True,
@@ -170,9 +177,13 @@ def run_codegen(
         backend=backend,
         subset=subset,
         split=split,
+        lora_path=lora_path,
         temperature=temperature,
         max_new_tokens=max_new_tokens,
+        max_model_len=max_model_len,
         reasoning_effort=reasoning_effort,
+        reasoning_budget=reasoning_budget,
+        reasoning_beta=reasoning_beta,
         instruction_prefix=instruction_prefix,
         response_prefix=response_prefix,
         prefill=not skip_prefill,
@@ -186,9 +197,15 @@ def run_codegen(
     )
     
     extra = "-" + subset if subset != "full" else ""
-    if reasoning_effort and model.startswith("o1-") or model.startswith("o3-") or model.endswith("-reasoner"):
+    if backend == "openai" and reasoning_effort and any(model.startswith(m) or model.endswith(m) for m in ["o1-", "o3-", "reasoner", "grok-3-mini-beta"]):
         model = model + f"--{reasoning_effort}"
-
+    
+    if lora_path:
+        model = model + f"--lora-{lora_path}"
+    
+    if backend == "anthropic" and reasoning_budget and reasoning_beta:
+        model = model + f"--{reasoning_budget}-{reasoning_beta}"
+    
     if skip_prefill:
         identifier = model.replace("/", "--") + "--skip_prefill" + f"--{revision}--bigcodebench{extra}-{split}--{backend}-{temperature}-{n_samples}-sanitized_calibrated.jsonl"
     else:
diff --git a/bigcodebench/provider/__init__.py b/bigcodebench/provider/__init__.py
index c78d870..4cb3410 100644
--- a/bigcodebench/provider/__init__.py
+++ b/bigcodebench/provider/__init__.py
@@ -6,11 +6,16 @@ def make_model(
     backend: str,
     subset: str,
     split: str,
+    lora_path: str = None,
     dataset: str = "bigcodebench",
     temperature: float = 0.0,
     max_new_tokens: int = 1280,
-    # o1 and o3 only
+    max_model_len: int = 12800,
+    # openai only
     reasoning_effort: str = "medium",
+    # anthropic only
+    reasoning_budget: int = 0,
+    reasoning_beta: str = "output-128k-2025-02-19",
     # instruction model only
     instruction_prefix: str = None,
     response_prefix: str = None,
@@ -35,8 +40,10 @@ def make_model(
             name=model,
             subset=subset,
             split=split,
+            lora_path=lora_path,
             temperature=temperature,
             max_new_tokens=max_new_tokens,
+            max_model_len=max_model_len,
             revision=revision,
             dataset=dataset,
             direct_completion=direct_completion,
@@ -55,6 +62,7 @@ def make_model(
             name=model,
             subset=subset,
             split=split,
+            lora_path=lora_path,
             temperature=temperature,
             max_new_tokens=max_new_tokens,
             revision=revision,
@@ -118,6 +126,8 @@ def make_model(
             split=split,
             temperature=temperature,
             max_new_tokens=max_new_tokens,
+            reasoning_budget=reasoning_budget,
+            reasoning_beta=reasoning_beta,
             instruction_prefix=instruction_prefix,
             response_prefix=response_prefix,
         )
diff --git a/bigcodebench/provider/anthropic.py b/bigcodebench/provider/anthropic.py
index 1969e0c..b4a7e43 100644
--- a/bigcodebench/provider/anthropic.py
+++ b/bigcodebench/provider/anthropic.py
@@ -9,9 +9,11 @@
 from bigcodebench.provider.utility import make_raw_chat_prompt
 
 class AnthropicDecoder(DecoderBase):
-    def __init__(self, name: str, **kwargs) -> None:
+    def __init__(self, name: str, reasoning_budget: int = 0, reasoning_beta: str = "output-128k-2025-02-19", **kwargs) -> None:
         super().__init__(name, **kwargs)
         self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_KEY"))
+        self.reasoning_budget = reasoning_budget
+        self.reasoning_beta = reasoning_beta
 
     def codegen(
         self, prompts: List[str], do_sample: bool = True, num_samples: int = 200
@@ -43,8 +45,20 @@ def codegen(
                     max_tokens=self.max_new_tokens,
                     temperature=self.temperature,
                     stop_sequences=self.eos,
+                    reasoning_budget=self.reasoning_budget,
+                    reasoning_beta=self.reasoning_beta,
                 )
-                outputs.append(ret.content[0].text)
+                if isinstance(ret, anthropic.Stream):
+                    output = ""
+                    for chunk in ret:
+                        if chunk.type == "content_block_delta":
+                            # if chunk.delta.type == "thinking_delta":
+                            #     output += chunk.delta.thinking
+                            if chunk.delta.type == "text_delta":
+                                output += chunk.delta.text
+                    outputs.append(output)
+                else:
+                    outputs.append(ret.content[0].text)
             all_outputs.append(outputs)
         return all_outputs
 
diff --git a/bigcodebench/provider/google.py b/bigcodebench/provider/google.py
index 2194c47..e3b18ff 100644
--- a/bigcodebench/provider/google.py
+++ b/bigcodebench/provider/google.py
@@ -2,7 +2,7 @@
 from typing import List
 from tqdm import tqdm
 
-import google.generativeai as genai
+from google import genai
 
 from bigcodebench.provider.base import DecoderBase
 from bigcodebench.gen.util.google_request import make_auto_request
@@ -12,8 +12,8 @@
 class GoogleDecoder(DecoderBase):
     def __init__(self, name: str, **kwargs):
         super().__init__(name, **kwargs)
-        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-        self.client = genai.GenerativeModel(name)
+        self.model = name
+        self.client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
 
     def codegen(
         self, prompts: List[str], do_sample: bool = True, num_samples: int = 200
@@ -34,7 +34,8 @@ def codegen(
                 tokenizer=None,
             )
             ret = make_auto_request(
-                self.client,
+                model=self.model,
+                client=self.client,
                 message=message,
                 n=num_samples,
                 temperature=self.temperature,
diff --git a/bigcodebench/provider/openai.py b/bigcodebench/provider/openai.py
index 12790f6..ff1459f 100644
--- a/bigcodebench/provider/openai.py
+++ b/bigcodebench/provider/openai.py
@@ -28,7 +28,7 @@ def codegen(
             tokenizer=None,
         ) for prompt in prompts]
         # use concurrency based batching for o1 and deepseek models
-        if self.name.startswith("o1-") or self.name.startswith("o3-") or self.name.startswith("deepseek"):
+        if any(self.name.startswith(model) or self.name.endswith(model) for model in ["o1-", "o3-", "reasoner", "grok-3-mini-beta"]):
             return self._codegen_batch_via_concurrency(messages, num_samples)
 
         return self._codegen_api_batch(messages, num_samples)
diff --git a/bigcodebench/provider/vllm.py b/bigcodebench/provider/vllm.py
index cc928e4..41cd251 100644
--- a/bigcodebench/provider/vllm.py
+++ b/bigcodebench/provider/vllm.py
@@ -3,6 +3,8 @@
 
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
+from vllm.lora.request import LoRARequest
+from huggingface_hub import snapshot_download
 
 from bigcodebench.provider.base import DecoderBase
 from bigcodebench.provider.utility import (
@@ -11,7 +13,7 @@
 )
 
 class VllmDecoder(DecoderBase):
-    def __init__(self, name: str, dataset: str, tp: int, **kwargs) -> None:
+    def __init__(self, name: str, lora_path: str, dataset: str, tp: int, max_model_len: int, **kwargs) -> None:
         super().__init__(name, **kwargs)
 
         kwargs = {
@@ -29,7 +31,17 @@ def __init__(self, name: str, dataset: str, tp: int, **kwargs) -> None:
         else:
             if self.prefill and "```" in self.response_prefix:
                 self.eos += ["\n```\n"]
-        self.llm = LLM(model=name, max_model_len=self.max_new_tokens, **kwargs)
+        
+        self.lora_request = None
+        if lora_path:
+            local_lora_path = snapshot_download(lora_path)
+            self.lora_request = LoRARequest(
+                "lora",
+                1,
+                local_lora_path,
+            )
+        
+        self.llm = LLM(model=name, max_model_len=max_model_len, enable_lora=True if self.lora_request else False, **kwargs)
         self.llm.set_tokenizer(tokenizer=self.tokenizer)
 
     def is_direct_completion(self) -> bool:
@@ -64,6 +76,7 @@ def codegen(
                 stop=self.eos,
                 skip_special_tokens=self.skip_special_tokens,
             ),
+            lora_request=self.lora_request,
             use_tqdm=True,
         )
 
diff --git a/setup.cfg b/setup.cfg
index cc20139..5907add 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,7 +35,7 @@ install_requires =
     rich
     accelerate>=0.30.1
     anthropic>=0.26.1
-    google-generativeai>=0.5.4
+    google-genai
     mistralai>=0.2.0,<1.0.0
     openai>=1.11.1
     e2b
diff --git a/tools/fix_v025.py b/tools/fix_v025.py
new file mode 100644
index 0000000..edbeb71
--- /dev/null
+++ b/tools/fix_v025.py
@@ -0,0 +1,135 @@
+from datasets import load_dataset
+from huggingface_hub import HfApi
+
+BIGCODEBENCH_HF = "bigcode/bigcodebench"
+BIGCODEBENCH_HARD_HF = "bigcode/bigcodebench-hard"
+BIGCODEBENCH_VERSION = "v0.1.4"
+BIGCODEBENCH_UPDATE = "bigcode/bcb_update"
+BIGCODEBENCH_NEW_VERSION = "v0.1.5"
+
+def map_ds(sample):
+    if sample["task_id"] in ["BigCodeBench/332"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/334"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('punkt')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/376"]:
+        sample['code_prompt'] = sample['code_prompt'].replace(
+            "import nltk\n",
+            "import nltk\nnltk.download('stopwords')\n",
+            1
+        )
+        sample['complete_prompt'] = sample['complete_prompt'].replace(
+                "import nltk\n",
+                "import nltk\nnltk.download('stopwords')\n",
+                1
+        )
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+        
+    if sample["task_id"] in ["BigCodeBench/383"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('punkt')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/633"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/635"]:
+        sample['code_prompt'] = sample['code_prompt'].replace(
+            "# Importing the required libraries",
+            "# Importing the required libraries\nimport nltk\nnltk.download('stopwords')\n"
+        )
+                
+        sample['complete_prompt'] = sample['complete_prompt'].replace(
+            "# Importing the required libraries",
+            "# Importing the required libraries\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/849"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/940"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('punkt')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/1109"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('punkt')\n"
+        )
+   
+    return sample
+    
+if __name__ == "__main__":
+    api = HfApi()
+    ds_dict = load_dataset(BIGCODEBENCH_HF)
+    hard_ds_dict = load_dataset(BIGCODEBENCH_HARD_HF)
+    ds = ds_dict[BIGCODEBENCH_VERSION]
+    hard_ds = hard_ds_dict[BIGCODEBENCH_VERSION]
+    function_id = [332, 334, 376, 383, 633, 635, 849, 940, 1109]
+    
+    new_ds = ds.map(map_ds)
+    new_ds.to_json("BigCodeBench.jsonl")
+    ds_dict[BIGCODEBENCH_NEW_VERSION] = new_ds
+    ds_dict.push_to_hub(BIGCODEBENCH_HF)
+    
+    new_hard_ds = hard_ds.map(map_ds)
+    new_hard_ds.to_json("BigCodeBench-Hard.jsonl")
+    hard_ds_dict[BIGCODEBENCH_NEW_VERSION] = new_hard_ds
+    hard_ds_dict.push_to_hub(BIGCODEBENCH_HARD_HF)
+
+    for i in function_id:
+        old_sample = ds.select([i])
+        new_sample = new_ds.select([i])
+        old_sample.to_json("old.jsonl")
+        new_sample.to_json("new.jsonl")
+        api.upload_file(
+            path_or_fileobj="old.jsonl",
+            path_in_repo=f"{i}/old.jsonl",
+            repo_id=BIGCODEBENCH_UPDATE,
+            # repo_type="dataset"
+        )
+        api.upload_file(
+            path_or_fileobj="new.jsonl",
+            path_in_repo=f"{i}/new.jsonl",
+            repo_id=BIGCODEBENCH_UPDATE,
+            # repo_type="dataset"
+        )
\ No newline at end of file