diff --git a/Docker/Gradio.Dockerfile b/Docker/Gradio.Dockerfile new file mode 100644 index 0000000..0c7a0c8 --- /dev/null +++ b/Docker/Gradio.Dockerfile @@ -0,0 +1,45 @@ +# Better use newer Python as generated code can use new features +FROM python:3.10-slim + +# install git, g++ and python3-tk +RUN apt-get update && apt-get install -y git g++ python3-tk zip unzip procps r-base + +# upgrade to latest pip +RUN pip install --upgrade pip + +RUN pip install gradio==4.31.0 gradio[oauth] +# Add a new user "bigcodebenchuser" +RUN adduser --disabled-password --gecos "" bigcodebenchuser + +RUN rm -rf /bigcodebench + +# Acquire benchmark code to local +ADD "https://api.github.com/repos/bigcode-project/bigcodebench/commits?per_page=1" latest_commit +RUN git clone https://github.com/bigcode-project/bigcodebench.git /bigcodebench + +RUN cd /bigcodebench && pip install . + +# Pre-install the dataset +RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()" + +RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt + +RUN apt-get update && \ + apt-get install -y \ + bash \ + git git-lfs \ + wget curl procps \ + htop vim nano && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +RUN chown -R bigcodebenchuser:bigcodebenchuser /app + +RUN chmod -R 777 /app + +USER bigcodebenchuser + +# ENTRYPOINT ["python", "app.py"] + +# CMD ["sh", "-c", "pids=$(ps -u $(id -u) -o pid,comm | grep 'bigcodebench' | awk '{print $1}'); if [ -n \"$pids\" ]; then echo $pids | xargs -r kill; fi; rm -rf /tmp/*"] \ No newline at end of file diff --git a/README.md b/README.md index 58f77ec..17d066c 100755 --- a/README.md +++ b/README.md @@ -235,9 +235,10 @@ You are strongly recommended to use a sandbox such as [docker](https://docs.dock ```bash # Mount the current directory to the container -# If you want to change the RAM address space limit (in MB, 128 GB by default): `--max-as-limit XXX` -# If you want to change the RAM data segment limit (in MB, 4 GB by default): `--max-data-limit` -# If you want to change the RAM stack limit (in MB, 4 MB by default): `--max-stack-limit` +# If you want to change the RAM address space limit (in MB, 30 GB by default): `--max-as-limit XXX` +# If you want to change the RAM data segment limit (in MB, 30 GB by default): `--max-data-limit` +# If you want to change the RAM stack limit (in MB, 10 MB by default): `--max-stack-limit` +# If you want to increase the execution time limit (in seconds, 240 seconds by default): `--min-time-limit` docker run -v $(pwd):/app bigcodebench/bigcodebench-evaluate:latest --split [complete|instruct] --subset [full|hard] --samples samples-sanitized-calibrated.jsonl # If you only want to check the ground truths @@ -259,6 +260,8 @@ Then, run the evaluation: bigcodebench.evaluate --split [complete|instruct] --subset [full|hard] --samples samples-sanitized-calibrated.jsonl # ...If you really don't want to check the ground truths bigcodebench.evaluate --split [complete|instruct] --subset [full|hard] --samples samples-sanitized-calibrated.jsonl --no-gt +# If you want to save the pass rate to a file +bigcodebench.evaluate --split [complete|instruct] --subset [full|hard] --samples samples-sanitized-calibrated.jsonl --save_pass_rate # You are strongly recommended to use the following command to clean up the environment after evaluation: pids=$(ps -u $(id -u) -o pid,comm | grep 'bigcodebench' | awk '{print $1}'); if [ -n \"$pids\" ]; then echo $pids | xargs -r kill; fi; @@ -317,7 +320,11 @@ Here are some tips to speed up the evaluation: You can inspect the failed samples by using the following command: ```bash -bigcodebench.inspect --eval-results sample-sanitized-calibrated_eval_results.json --in-place +# Inspect the failed samples and save the results to `inspect/` +bigcodebench.inspect --eval_results sample-sanitized-calibrated_eval_results.json --split complete --subset hard + +# Re-run the inspection in place +bigcodebench.inspect --eval_results sample-sanitized-calibrated_eval_results.json --split complete --subset hard --in_place ``` ## 🚀 Full Script @@ -347,9 +354,9 @@ We share pre-generated code samples from LLMs we have [evaluated](https://huggin ## 🐞 Known Issues -- [ ] Due to [the Hugging Face tokenizer update](https://github.com/huggingface/transformers/pull/31305), some tokenizer may be broken and will degrade the performance of the evaluation. Therefore, we set up with `legacy=False` for the initialization. If you notice the unexpected behaviors, please try `--tokenizer_legacy` during the generation. +- [ ] Due to [the Hugging Face tokenizer update](https://github.com/huggingface/transformers/pull/31305), some tokenizers may be broken and will degrade the performance of the evaluation. Therefore, we set up with `legacy=False` for the initialization. If you notice the unexpected behaviors, please try `--tokenizer_legacy` during the generation. -- [ ] Due to the flakes in the evaluation, the execution results may vary slightly (~0.2%) between runs. We are working on improving the evaluation stability. +- [ ] Due to the flakiness in the evaluation, the execution results may vary slightly (~0.2% for Full set, and ~0.6% for Hard set) between runs. We are working on improving the evaluation stability. - [ ] You may get errors like `ImportError: /usr/local/lib/python3.10/site-packages/matplotlib/_c_internal_utils.cpython-310-x86_64-linux-gnu.so: failed to map segment from shared object` when running the evaluation. This is due to the memory limit of the docker container. You can increase the memory limit of the docker container to solve this issue. diff --git a/analysis/bcb_subset.py b/analysis/bcb_subset.py index 7760a92..cfdd5cc 100644 --- a/analysis/bcb_subset.py +++ b/analysis/bcb_subset.py @@ -12,6 +12,7 @@ from utils import * VERSION = "v0.1.0_hf" + def update_model_info(model_info): for model, info in model_info.items(): if "https://huggingface.co/" in info["link"]: diff --git a/analysis/get_results.py b/analysis/get_results.py index 83af2b3..664e156 100755 --- a/analysis/get_results.py +++ b/analysis/get_results.py @@ -17,11 +17,15 @@ def update_model_info(model_info): if "https://huggingface.co/" in info["link"]: hf_model = info["link"].split("https://huggingface.co/")[-1] print(hf_model) - tokenizer = AutoTokenizer.from_pretrained(hf_model, trust_remote_code=True) - if tokenizer.chat_template is None: + try: + tokenizer = AutoTokenizer.from_pretrained(hf_model, trust_remote_code=True) + + if tokenizer.chat_template is None: + model_info[model]["direct_complete"] = True + else: + model_info[model]["direct_complete"] = False + except: model_info[model]["direct_complete"] = True - else: - model_info[model]["direct_complete"] = False else: model_info[model]["direct_complete"] = False @@ -44,7 +48,7 @@ def get_results(tids): "moe": info["moe"], "size": info["size"], "act_param": info["act_param"], - "direct_complete": info["direct_complete"], + # "direct_complete": info["direct_complete"], } for model, info in model_info.items(): @@ -53,10 +57,16 @@ def get_results(tids): files = glob(f"results/{model}--bigcodebench-*.json") assert files, f"No files found for results/{model}--bigcodebench-*.json" for file in files: - _, suffix = os.path.basename(file).split("--bigcodebench-") + try: + _, suffix = os.path.basename(file).split("--bigcodebench-hard-") + with open("results/"+model+"--bigcodebench-hard-"+suffix, "r") as f: + data = json.load(f) + except: + _, suffix = os.path.basename(file).split("--bigcodebench-") + with open("results/"+model+"--bigcodebench-"+suffix, "r") as f: + data = json.load(f) status = [] - with open("results/"+model+"--bigcodebench-"+suffix, "r") as f: - data = json.load(f) + for key, value in data["eval"].items(): if key not in tids: continue @@ -76,22 +86,22 @@ def get_results(tids): mode = "-cal" results[info["name"]][f"pass@1"][f"{task}{mode}"] = round(mean(status)*100,1) - if not info["prompted"] or info["direct_complete"]: + if not info["prompted"]:# or info["direct_complete"]: results[info["name"]][f"pass@1"][f"{task}-cal"] = round(mean(status)*100,1) for model, result in results.items(): for task in ["complete"]: origin = result["pass@1"].pop(task) - assert origin, f"Missing original complete results for {model}" + # assert origin, f"Missing original complete results for {model}" calibrate = result["pass@1"].pop(f"{task}-cal") if calibrate: - if calibrate - origin > 1: - results[model]["lazy"] = True - else: - results[model]["lazy"] = False + # if calibrate - origin > 1: + # results[model]["lazy"] = True + # else: + # results[model]["lazy"] = False results[model]["pass@1"][task] = calibrate else: - results[model]["lazy"] = False + # results[model]["lazy"] = False results[model]["pass@1"][task] = origin calibrate_instruct = result["pass@1"].pop(f"instruct-cal") result["pass@1"]["instruct"] = calibrate_instruct @@ -151,14 +161,44 @@ def read_task_perf(tids, task="complete"): task_perf = dict() model = model.replace("/", "--") try: - if info["prompted"] and not info["direct_complete"]: - files = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized-calibrated_eval_results.json") - if files: - file = files[0] - else: - file = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized_eval_results.json")[0] - else: - file = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized_eval_results.json")[0] + try: + try: + if info["prompted"]:# and not info["direct_complete"]: + files = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized-calibrated_hard_eval_results.json") + if files: + file = files[0] + else: + file = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized_hard_eval_results.json")[0] + else: + file = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized_hard_eval_results.json")[0] + except: + if info["prompted"]: + files = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized-calibrated_eval_results.json") + if files: + file = files[0] + else: + file = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized_eval_results.json")[0] + else: + file = glob(f"results/{model}--bigcodebench-{task}*-0-1-sanitized_eval_results.json")[0] + except: + try: + if info["prompted"]:# and not info["direct_complete"]: + files = glob(f"results/{model}--bigcodebench-hard-{task}*-0-1-sanitized-calibrated_hard_eval_results.json") + if files: + file = files[0] + else: + file = glob(f"results/{model}--bigcodebench-hard-{task}*-0-1-sanitized_hard_eval_results.json")[0] + else: + file = glob(f"results/{model}--bigcodebench-hard-{task}*-0-1-sanitized_hard_eval_results.json")[0] + except: + if info["prompted"]: + files = glob(f"results/{model}--bigcodebench-hard-{task}*-0-1-sanitized-calibrated_eval_results.json") + if files: + file = files[0] + else: + file = glob(f"results/{model}--bigcodebench-hard-{task}*-0-1-sanitized_eval_results.json")[0] + else: + file = glob(f"results/{model}--bigcodebench-hard-{task}*-0-1-sanitized_eval_results.json")[0] except: continue @@ -255,8 +295,9 @@ def get_elo_mle(df, SCALE=400, BASE=10, INIT_RATING=1000): def update_elo_rating(results, elo_dict): for model, info in model_info.items(): if info["name"] not in elo_dict: - continue - results[info["name"]]["elo_mle"] = elo_dict[info["name"]] + results[info["name"]]["elo_mle"] = None + else: + results[info["name"]]["elo_mle"] = elo_dict[info["name"]] return results @@ -296,7 +337,7 @@ def get_solve_rate(data_dict, task="complete"): def get_hf_ds(results): - hf_dataset = {"model": [], "link": [], "moe": [], "size": [], "act_param": [], "type": [], "lazy": [], "direct_complete": [], + hf_dataset = {"model": [], "link": [], "moe": [], "size": [], "act_param": [], "type": [], #"lazy": [],# "direct_complete": [], "complete": [], "instruct": [], "elo_mle": []} for model, result in results.items(): @@ -306,10 +347,10 @@ def get_hf_ds(results): hf_dataset["size"].append(result["size"]) hf_dataset["act_param"].append(result["act_param"]) hf_dataset["type"].append("🔶" if result["prompted"] else "🟢") - hf_dataset["lazy"].append(result["lazy"]) + # hf_dataset["lazy"].append(result["lazy"]) hf_dataset["complete"].append(result["pass@1"]["complete"]) hf_dataset["instruct"].append(result["pass@1"]["instruct"]) - hf_dataset["direct_complete"].append(result["direct_complete"]) + # hf_dataset["direct_complete"].append(result["direct_complete"]) hf_dataset["elo_mle"].append(result["elo_mle"]) return Dataset.from_dict(hf_dataset) @@ -333,13 +374,24 @@ def push_ds(ds, path, local=False): ds.push_to_hub(path) +def get_perf_df(data_dict): + perfs = {"Model": []} + for task_id in data_dict[list(data_dict.keys())[0]]: + perfs[task_id] = [] + for model, task_perf in data_dict.items(): + perfs["Model"].append(model) + for task_id, status in task_perf.items(): + perfs[task_id].append(status) + return pd.DataFrame(perfs) + + if __name__ == "__main__": - bcb_orig = load_dataset("bigcode/bigcodebench", split="v0.1.0_hf") + # bcb_orig = load_dataset("bigcode/bigcodebench", split="v0.1.0_hf") bcb_hard = load_dataset("bigcode/bigcodebench-hard", split="v0.1.0_hf") - model_info = update_model_info(model_info) + # model_info = update_model_info(model_info) bcb_config = { - "": bcb_orig, + # "": bcb_orig, "-hard": bcb_hard, } for suffix, bcb in bcb_config.items(): @@ -347,7 +399,11 @@ def push_ds(ds, path, local=False): files = [] complete_data, complete_files = read_task_perf(bcb["task_id"], "complete") instruct_data, instruct_files = read_task_perf(bcb["task_id"], "instruct") - assert len(model_info) == len(complete_data) + complete_df = get_perf_df(complete_data) + instruct_df = get_perf_df(instruct_data) + push_ds(DatasetDict({"complete": Dataset.from_pandas(complete_df), "instruct": Dataset.from_pandas(instruct_df)}), f"bigcode/bigcodebench{suffix}-perf") + assert len(model_info) == len(complete_data),\ + f"Missing results for {set([val['name'] for val in model_info.values()]) - set([model for model in complete_data.keys()])}" with open("task2domain.json", "r") as f: task2domain = json.load(f) domain_complete = get_domain_perf(complete_data, task2domain) @@ -372,7 +428,10 @@ def push_ds(ds, path, local=False): } elo_ds = dict() for config, (task_level, no_tie) in elo_config.items(): - battles = get_winner_df(complete_data, bcb["task_id"], "complete", task_level=task_level, no_tie=no_tie) + filter_complete_data = {model: task_perf for model, task_perf in complete_data.items() if model in instruct_data} + complete_battles = get_winner_df(filter_complete_data, bcb["task_id"], "complete", task_level=task_level, no_tie=no_tie) + instruct_battles = get_winner_df(instruct_data, bcb["task_id"], "instruct", task_level=task_level, no_tie=no_tie) + battles = pd.concat([complete_battles, instruct_battles]) elo_mle_bootstrap = get_bootstrap_result(battles, get_elo_mle, 500) bootstrap_lu_median = elo_mle_bootstrap.median().reset_index().set_axis(["model", "Elo rating"], axis=1) bootstrap_lu_median["Elo rating"] = (bootstrap_lu_median["Elo rating"] + 0.5).astype(int) diff --git a/analysis/lib2domain.json b/analysis/lib2domain.json new file mode 100644 index 0000000..d06a48a --- /dev/null +++ b/analysis/lib2domain.json @@ -0,0 +1,157 @@ +{ + "Crypto": "Cryptography", + "PIL": "Visualization", + "array": "General", + "base64": "Cryptography", + "binascii": "Cryptography", + "bisect": "General", + "blake3": "Cryptography", + "bs4": "Network", + "calendar": "Time", + "cgi": "Network", + "chardet": "Network", + "cmath": "Computation", + "codecs": "Cryptography", + "collections": "General", + "cryptography": "Cryptography", + "csv": "System", + "ctypes": "System", + "datetime": "Time", + "dateutil": "Time", + "difflib": "General", + "django": "Network", + "docx": "System", + "email": "Network", + "faker": "General", + "flask": "Network", + "flask_login": "Network", + "flask_mail": "Network", + "flask_restful": "Network", + "fnmatch": "General", + "folium": "Visualization", + "functools": "General", + "geopy": "Network", + "getpass": "System", + "glob": "System", + "gzip": "System", + "hashlib": "Cryptography", + "heapq": "General", + "hmac": "Cryptography", + "html": "Network", + "http": "Network", + "importlib": "General", + "inspect": "General", + "io": "System", + "ipaddress": "Network", + "itertools": "General", + "json": "System", + "keras": "Computation", + "librosa": "Computation", + "logging": "System", + "lxml": "Network", + "math": "Computation", + "matplotlib": "Visualization", + "mechanize": "Network", + "mimetypes": "Network", + "multiprocessing": "System", + "nltk": "Computation", + "numpy": "Computation", + "openpyxl": "System", + "operator": "General", + "os": "System", + "pandas": "Computation", + "pathlib": "System", + "pickle": "System", + "pkgutil": "General", + "platform": "System", + "prettytable": "General", + "psutil": "System", + "pytesseract": "Computation", + "pytz": "Time", + "queue": "General", + "random": "General", + "re": "General", + "requests": "Network", + "rsa": "Cryptography", + "scipy": "Computation", + "seaborn": "Visualization", + "secrets": "Cryptography", + "select": "System", + "sendgrid": "Network", + "shutil": "System", + "sklearn": "Computation", + "smtplib": "Network", + "socket": "Network", + "soundfile": "Computation", + "sqlite3": "System", + "ssl": "Network", + "statistics": "Computation", + "statsmodels": "Computation", + "string": "General", + "struct": "System", + "subprocess": "System", + "sys": "System", + "tarfile": "System", + "tensorflow": "Computation", + "texttable": "General", + "textwrap": "General", + "threading": "System", + "time": "Time", + "turtle": "Visualization", + "types": "General", + "unicodedata": "General", + "urllib": "Network", + "uuid": "General", + "warnings": "General", + "werkzeug": "Network", + "wordninja": "Computation", + "wtforms": "Network", + "xlwt": "System", + "xml": "Network", + "xmltodict": "Network", + "yaml": "System", + "zipfile": "System", + "Levenshtein": "Computation", + "ast": "General", + "configparser": "System", + "cv2": "Computation", + "decimal": "General", + "enum": "General", + "errno": "System", + "flask_wtf": "Network", + "ftplib": "Network", + "gensim": "Computation", + "geopandas": "Computation", + "holidays": "Time", + "mpl_toolkits": "Visualization", + "natsort": "General", + "pyquery": "Network", + "python_http_client": "Network", + "regex": "General", + "shapely": "Computation", + "shlex": "System", + "signal": "System", + "skimage": "Computation", + "sympy": "Computation", + "textblob": "Computation", + "typing": "General", + "wikipedia": "Network", + "wordcloud": "Visualization", + "zlib": "System", + "aspose": "System", + "builtins": "General", + "locale": "System", + "imp": "System", + "docxtpl": "System", + "selenium": "Network", + "IPython": "Computation", + "filecmp": "System", + "multidict": "General", + "sqlalchemy": "System", + "obspy": "Computation", + "pprint": "General", + "xlrd": "System", + "argparse": "General", + "torch": "Computation", + "copy": "General" +} \ No newline at end of file diff --git a/analysis/task2domain.json b/analysis/task2domain.json new file mode 100644 index 0000000..fefa259 --- /dev/null +++ b/analysis/task2domain.json @@ -0,0 +1,5433 @@ +{ + "BigCodeBench/0": [ + "General", + "General" + ], + "BigCodeBench/1": [ + "General", + "General", + "General" + ], + "BigCodeBench/2": [ + "Computation", + "General" + ], + "BigCodeBench/3": [ + "Computation", + "General" + ], + "BigCodeBench/4": [ + "General", + "General" + ], + "BigCodeBench/5": [ + "Computation", + "General" + ], + "BigCodeBench/6": [ + "General", + "System" + ], + "BigCodeBench/7": [ + "General", + "System", + "General" + ], + "BigCodeBench/8": [ + "General", + "General", + "General" + ], + "BigCodeBench/9": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/10": [ + "Computation", + "Computation", + "General", + "General" + ], + "BigCodeBench/11": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/12": [ + "System", + "Time", + "System", + "System" + ], + "BigCodeBench/13": [ + "System", + "Network", + "System" + ], + "BigCodeBench/14": [ + "System", + "System", + "System" + ], + "BigCodeBench/15": [ + "System", + "System", + "System" + ], + "BigCodeBench/16": [ + "System", + "System", + "System" + ], + "BigCodeBench/17": [ + "System", + "System", + "Time" + ], + "BigCodeBench/18": [ + "System", + "System", + "General", + "System", + "System" + ], + "BigCodeBench/19": [ + "System", + "System", + "System" + ], + "BigCodeBench/20": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/21": [ + "System", + "System" + ], + "BigCodeBench/22": [ + "General", + "General", + "General" + ], + "BigCodeBench/23": [ + "Computation", + "General" + ], + "BigCodeBench/24": [ + "Cryptography", + "Cryptography", + "System" + ], + "BigCodeBench/25": [ + "System", + "Cryptography", + "System" + ], + "BigCodeBench/26": [ + "Cryptography", + "Cryptography" + ], + "BigCodeBench/27": [ + "Cryptography", + "System", + "Time" + ], + "BigCodeBench/28": [ + "Cryptography", + "Network", + "System" + ], + "BigCodeBench/29": [ + "Cryptography", + "Computation", + "Computation" + ], + "BigCodeBench/30": [ + "System", + "General", + "System" + ], + "BigCodeBench/31": [ + "Computation", + "Visualization", + "General", + "Visualization" + ], + "BigCodeBench/32": [ + "Network", + "Network" + ], + "BigCodeBench/33": [ + "Computation", + "General" + ], + "BigCodeBench/34": [ + "Visualization", + "General", + "Visualization" + ], + "BigCodeBench/35": [ + "Visualization", + "Visualization" + ], + "BigCodeBench/36": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/37": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/38": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/39": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/40": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/41": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/42": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/43": [ + "Computation", + "Visualization" + ], + "BigCodeBench/44": [ + "Visualization", + "Computation" + ], + "BigCodeBench/45": [ + "Computation", + "Visualization", + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/46": [ + "Visualization", + "Computation" + ], + "BigCodeBench/47": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/48": [ + "Time", + "General", + "Visualization", + "Time" + ], + "BigCodeBench/49": [ + "Computation", + "Time", + "Visualization" + ], + "BigCodeBench/50": [ + "Time", + "Computation", + "Time", + "Visualization" + ], + "BigCodeBench/51": [ + "Visualization", + "Computation" + ], + "BigCodeBench/52": [ + "General", + "Computation" + ], + "BigCodeBench/53": [ + "General", + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/54": [ + "General", + "Computation", + "Computation" + ], + "BigCodeBench/55": [ + "Computation", + "General" + ], + "BigCodeBench/56": [ + "General", + "Computation" + ], + "BigCodeBench/57": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/58": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/59": [ + "Network", + "Visualization", + "Visualization" + ], + "BigCodeBench/60": [ + "Computation", + "System" + ], + "BigCodeBench/61": [ + "Time", + "Computation", + "Visualization" + ], + "BigCodeBench/62": [ + "General", + "Visualization", + "Visualization" + ], + "BigCodeBench/63": [ + "Computation", + "Visualization" + ], + "BigCodeBench/64": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/65": [ + "Computation", + "Visualization" + ], + "BigCodeBench/66": [ + "Computation", + "Visualization" + ], + "BigCodeBench/67": [ + "Computation", + "General", + "System" + ], + "BigCodeBench/68": [ + "Computation", + "Visualization" + ], + "BigCodeBench/69": [ + "General", + "Visualization" + ], + "BigCodeBench/70": [ + "Computation", + "Computation", + "System" + ], + "BigCodeBench/71": [ + "General", + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/72": [ + "Computation", + "Computation", + "General", + "System" + ], + "BigCodeBench/73": [ + "General", + "Computation", + "Visualization", + "Computation", + "System" + ], + "BigCodeBench/74": [ + "Network", + "Network" + ], + "BigCodeBench/75": [ + "Computation", + "General", + "Computation", + "Visualization", + "Time" + ], + "BigCodeBench/76": [ + "Network", + "General", + "General" + ], + "BigCodeBench/77": [ + "Cryptography", + "Network", + "Cryptography", + "Cryptography" + ], + "BigCodeBench/78": [ + "System", + "System", + "Network" + ], + "BigCodeBench/79": [ + "System", + "Network", + "System" + ], + "BigCodeBench/80": [ + "System", + "Network", + "System" + ], + "BigCodeBench/81": [ + "Network", + "Network", + "Network" + ], + "BigCodeBench/82": [ + "Network", + "Network", + "Network", + "Network", + "Network" + ], + "BigCodeBench/83": [ + "Network", + "Network" + ], + "BigCodeBench/84": [ + "Computation", + "Computation" + ], + "BigCodeBench/85": [ + "Computation", + "Time", + "Computation" + ], + "BigCodeBench/86": [ + "Computation", + "Computation" + ], + "BigCodeBench/87": [ + "Computation", + "General" + ], + "BigCodeBench/88": [ + "Computation", + "Time", + "Computation" + ], + "BigCodeBench/89": [ + "Computation", + "Visualization", + "Computation", + "Computation" + ], + "BigCodeBench/90": [ + "Computation", + "Computation" + ], + "BigCodeBench/91": [ + "Visualization", + "Computation" + ], + "BigCodeBench/92": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/93": [ + "Computation", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/94": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/95": [ + "Computation", + "General" + ], + "BigCodeBench/96": [ + "General", + "System", + "General" + ], + "BigCodeBench/97": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/98": [ + "General", + "General", + "General" + ], + "BigCodeBench/99": [ + "Computation", + "Visualization", + "Visualization", + "Computation" + ], + "BigCodeBench/100": [ + "Computation", + "Time", + "General", + "Visualization" + ], + "BigCodeBench/101": [ + "Computation", + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/102": [ + "Computation", + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/103": [ + "Computation", + "Visualization" + ], + "BigCodeBench/104": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/105": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/106": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/107": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/108": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/109": [ + "Computation", + "Visualization" + ], + "BigCodeBench/110": [ + "Computation", + "Visualization" + ], + "BigCodeBench/111": [ + "Computation", + "Visualization" + ], + "BigCodeBench/112": [ + "Computation", + "Visualization" + ], + "BigCodeBench/113": [ + "General", + "General", + "System" + ], + "BigCodeBench/114": [ + "Computation", + "Computation" + ], + "BigCodeBench/115": [ + "Computation", + "Computation" + ], + "BigCodeBench/116": [ + "Computation", + "Visualization" + ], + "BigCodeBench/117": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/118": [ + "System", + "System" + ], + "BigCodeBench/119": [ + "Computation", + "Visualization" + ], + "BigCodeBench/120": [ + "Computation", + "Time", + "General" + ], + "BigCodeBench/121": [ + "Computation", + "Computation" + ], + "BigCodeBench/122": [ + "Computation", + "General" + ], + "BigCodeBench/123": [ + "System", + "Computation", + "System" + ], + "BigCodeBench/124": [ + "General", + "Visualization", + "Time" + ], + "BigCodeBench/125": [ + "General", + "General", + "General", + "System" + ], + "BigCodeBench/126": [ + "Computation", + "Computation", + "Computation", + "General" + ], + "BigCodeBench/127": [ + "System", + "System", + "Cryptography", + "System" + ], + "BigCodeBench/128": [ + "Computation", + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/129": [ + "Computation", + "Network", + "Network" + ], + "BigCodeBench/130": [ + "Cryptography", + "Cryptography", + "System", + "Cryptography" + ], + "BigCodeBench/131": [ + "Cryptography", + "Cryptography", + "System", + "Cryptography" + ], + "BigCodeBench/132": [ + "Computation", + "Computation", + "Visualization", + "Cryptography" + ], + "BigCodeBench/133": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/134": [ + "Computation", + "Visualization" + ], + "BigCodeBench/135": [ + "Computation", + "Visualization", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/136": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/137": [ + "Computation", + "Computation" + ], + "BigCodeBench/138": [ + "Computation", + "Visualization" + ], + "BigCodeBench/139": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/140": [ + "Computation", + "Computation" + ], + "BigCodeBench/141": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/142": [ + "Computation", + "Visualization" + ], + "BigCodeBench/143": [ + "Computation", + "Visualization" + ], + "BigCodeBench/144": [ + "Network", + "Network" + ], + "BigCodeBench/145": [ + "System", + "Network" + ], + "BigCodeBench/146": [ + "System", + "Network" + ], + "BigCodeBench/147": [ + "System", + "Network", + "Network" + ], + "BigCodeBench/148": [ + "Computation", + "Computation" + ], + "BigCodeBench/149": [ + "Computation", + "Computation" + ], + "BigCodeBench/150": [ + "Computation", + "Computation" + ], + "BigCodeBench/151": [ + "Computation", + "Computation" + ], + "BigCodeBench/152": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/153": [ + "Computation", + "Computation" + ], + "BigCodeBench/154": [ + "System", + "Network", + "General", + "System" + ], + "BigCodeBench/155": [ + "Computation", + "Visualization" + ], + "BigCodeBench/156": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/157": [ + "Computation", + "Visualization" + ], + "BigCodeBench/158": [ + "Network", + "System", + "System" + ], + "BigCodeBench/159": [ + "System", + "System", + "System" + ], + "BigCodeBench/160": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/161": [ + "Computation", + "Time", + "General" + ], + "BigCodeBench/162": [ + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/163": [ + "Computation", + "Computation" + ], + "BigCodeBench/164": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/165": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/166": [ + "Time", + "Computation", + "Time" + ], + "BigCodeBench/167": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/168": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/169": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/170": [ + "Computation", + "System", + "Network" + ], + "BigCodeBench/171": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/172": [ + "Time", + "System" + ], + "BigCodeBench/173": [ + "Computation", + "Computation" + ], + "BigCodeBench/174": [ + "Computation", + "Computation" + ], + "BigCodeBench/175": [ + "Visualization", + "General" + ], + "BigCodeBench/176": [ + "General", + "Network" + ], + "BigCodeBench/177": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/178": [ + "General", + "System" + ], + "BigCodeBench/179": [ + "Computation", + "Visualization", + "General", + "Computation" + ], + "BigCodeBench/180": [ + "Visualization", + "Visualization", + "Computation", + "System", + "Computation" + ], + "BigCodeBench/181": [ + "Network", + "General", + "Time" + ], + "BigCodeBench/182": [ + "General", + "Computation" + ], + "BigCodeBench/183": [ + "General", + "Network" + ], + "BigCodeBench/184": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/185": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/186": [ + "Network", + "Visualization" + ], + "BigCodeBench/187": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/188": [ + "Computation", + "Network", + "Visualization" + ], + "BigCodeBench/189": [ + "General", + "Network", + "System" + ], + "BigCodeBench/190": [ + "System", + "Computation", + "System", + "System" + ], + "BigCodeBench/191": [ + "General", + "Computation" + ], + "BigCodeBench/192": [ + "Network", + "General" + ], + "BigCodeBench/193": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/194": [ + "Computation", + "Visualization" + ], + "BigCodeBench/195": [ + "System", + "System", + "Time" + ], + "BigCodeBench/196": [ + "Computation", + "Visualization", + "General", + "Visualization" + ], + "BigCodeBench/197": [ + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/198": [ + "Computation", + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/199": [ + "Time", + "Computation", + "Time", + "General" + ], + "BigCodeBench/200": [ + "Computation", + "General", + "General", + "Visualization" + ], + "BigCodeBench/201": [ + "Computation", + "General" + ], + "BigCodeBench/202": [ + "General", + "General", + "System" + ], + "BigCodeBench/203": [ + "Network", + "System" + ], + "BigCodeBench/204": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/205": [ + "System", + "System" + ], + "BigCodeBench/206": [ + "System", + "System", + "System" + ], + "BigCodeBench/207": [ + "General", + "Network" + ], + "BigCodeBench/208": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/209": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/210": [ + "General", + "General", + "Visualization" + ], + "BigCodeBench/211": [ + "System", + "Network", + "System" + ], + "BigCodeBench/212": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/213": [ + "General", + "Visualization", + "Computation", + "Time" + ], + "BigCodeBench/214": [ + "Computation", + "Visualization", + "General", + "Computation" + ], + "BigCodeBench/215": [ + "Computation", + "System", + "Network", + "Visualization" + ], + "BigCodeBench/216": [ + "Computation", + "General", + "System", + "System" + ], + "BigCodeBench/217": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/218": [ + "Computation", + "Computation" + ], + "BigCodeBench/219": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/220": [ + "Visualization", + "General", + "Time" + ], + "BigCodeBench/221": [ + "Computation", + "Computation" + ], + "BigCodeBench/222": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/223": [ + "Computation", + "Computation" + ], + "BigCodeBench/224": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/225": [ + "Computation", + "Visualization" + ], + "BigCodeBench/226": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/227": [ + "Computation", + "Visualization", + "Computation", + "Computation", + "System" + ], + "BigCodeBench/228": [ + "Computation", + "Computation" + ], + "BigCodeBench/229": [ + "Time", + "General", + "System" + ], + "BigCodeBench/230": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/231": [ + "General", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/232": [ + "Computation", + "General" + ], + "BigCodeBench/233": [ + "General", + "Visualization" + ], + "BigCodeBench/234": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/235": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/236": [ + "Computation", + "Computation" + ], + "BigCodeBench/237": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/238": [ + "Visualization", + "Computation" + ], + "BigCodeBench/239": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/240": [ + "Computation", + "General" + ], + "BigCodeBench/241": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/242": [ + "Visualization", + "Computation" + ], + "BigCodeBench/243": [ + "Computation", + "General" + ], + "BigCodeBench/244": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/245": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/246": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/247": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/248": [ + "Visualization", + "Computation", + "General" + ], + "BigCodeBench/249": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/250": [ + "Computation", + "General", + "System" + ], + "BigCodeBench/251": [ + "Computation", + "Visualization" + ], + "BigCodeBench/252": [ + "Visualization", + "General" + ], + "BigCodeBench/253": [ + "Computation", + "General" + ], + "BigCodeBench/254": [ + "Computation", + "System" + ], + "BigCodeBench/255": [ + "Computation", + "Visualization" + ], + "BigCodeBench/256": [ + "Time", + "General", + "Cryptography", + "System" + ], + "BigCodeBench/257": [ + "Computation", + "Computation" + ], + "BigCodeBench/258": [ + "General", + "System" + ], + "BigCodeBench/259": [ + "Computation", + "Visualization" + ], + "BigCodeBench/260": [ + "System", + "System", + "System" + ], + "BigCodeBench/261": [ + "Computation", + "Visualization" + ], + "BigCodeBench/262": [ + "General", + "Visualization", + "Visualization" + ], + "BigCodeBench/263": [ + "System", + "System", + "System", + "Time" + ], + "BigCodeBench/264": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/265": [ + "System", + "General", + "System" + ], + "BigCodeBench/266": [ + "System", + "General", + "System" + ], + "BigCodeBench/267": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/268": [ + "General", + "General" + ], + "BigCodeBench/269": [ + "Computation", + "Visualization", + "Computation", + "Computation" + ], + "BigCodeBench/270": [ + "General", + "General" + ], + "BigCodeBench/271": [ + "Cryptography", + "General", + "General", + "Time" + ], + "BigCodeBench/272": [ + "Network", + "Network", + "System" + ], + "BigCodeBench/273": [ + "Network", + "Network", + "System" + ], + "BigCodeBench/274": [ + "Network", + "Network", + "Network", + "System", + "Network" + ], + "BigCodeBench/275": [ + "Computation", + "General" + ], + "BigCodeBench/276": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/277": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/278": [ + "Computation", + "Computation" + ], + "BigCodeBench/279": [ + "General", + "General" + ], + "BigCodeBench/280": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/281": [ + "General", + "General", + "System" + ], + "BigCodeBench/282": [ + "Visualization", + "Visualization", + "Computation", + "Computation", + "System" + ], + "BigCodeBench/283": [ + "System", + "General", + "System" + ], + "BigCodeBench/284": [ + "Network", + "Network", + "Network" + ], + "BigCodeBench/285": [ + "Network", + "Network" + ], + "BigCodeBench/286": [ + "System", + "General", + "System" + ], + "BigCodeBench/287": [ + "System", + "General", + "System" + ], + "BigCodeBench/288": [ + "System", + "General", + "System" + ], + "BigCodeBench/289": [ + "Computation", + "Computation" + ], + "BigCodeBench/290": [ + "Computation", + "General", + "System" + ], + "BigCodeBench/291": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/292": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/293": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/294": [ + "Computation", + "Computation" + ], + "BigCodeBench/295": [ + "Computation", + "General" + ], + "BigCodeBench/296": [ + "Computation", + "Visualization" + ], + "BigCodeBench/297": [ + "General", + "General" + ], + "BigCodeBench/298": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/299": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/300": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/301": [ + "Time", + "Computation", + "Computation", + "Time" + ], + "BigCodeBench/302": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/303": [ + "Time", + "Computation", + "Computation", + "Time" + ], + "BigCodeBench/304": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/305": [ + "General", + "General", + "General" + ], + "BigCodeBench/306": [ + "System", + "System" + ], + "BigCodeBench/307": [ + "General", + "Visualization", + "Visualization" + ], + "BigCodeBench/308": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/309": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/310": [ + "Computation", + "System", + "General", + "System" + ], + "BigCodeBench/311": [ + "General", + "Computation", + "Computation" + ], + "BigCodeBench/312": [ + "General", + "Visualization" + ], + "BigCodeBench/313": [ + "System", + "Time", + "General", + "System" + ], + "BigCodeBench/314": [ + "Network", + "Network", + "Network" + ], + "BigCodeBench/315": [ + "Network", + "Network", + "System" + ], + "BigCodeBench/316": [ + "Computation", + "General" + ], + "BigCodeBench/317": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/318": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/319": [ + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/320": [ + "System", + "General", + "System" + ], + "BigCodeBench/321": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/322": [ + "System", + "System", + "System", + "System" + ], + "BigCodeBench/323": [ + "Computation", + "General", + "General", + "Computation" + ], + "BigCodeBench/324": [ + "System", + "System", + "Time" + ], + "BigCodeBench/325": [ + "System", + "System", + "General", + "System" + ], + "BigCodeBench/326": [ + "System", + "System", + "System", + "System" + ], + "BigCodeBench/327": [ + "System", + "General", + "General" + ], + "BigCodeBench/328": [ + "General", + "General", + "General" + ], + "BigCodeBench/329": [ + "System", + "General", + "System" + ], + "BigCodeBench/330": [ + "General", + "General" + ], + "BigCodeBench/331": [ + "General", + "General" + ], + "BigCodeBench/332": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/333": [ + "General", + "General" + ], + "BigCodeBench/334": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/335": [ + "General", + "General", + "General" + ], + "BigCodeBench/336": [ + "System", + "System", + "General", + "System" + ], + "BigCodeBench/337": [ + "Computation", + "Visualization" + ], + "BigCodeBench/338": [ + "General", + "Visualization", + "General" + ], + "BigCodeBench/339": [ + "Network", + "Cryptography", + "Cryptography", + "System" + ], + "BigCodeBench/340": [ + "Cryptography", + "Cryptography", + "System" + ], + "BigCodeBench/341": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/342": [ + "General", + "General", + "General" + ], + "BigCodeBench/343": [ + "Computation", + "Visualization" + ], + "BigCodeBench/344": [ + "System", + "System" + ], + "BigCodeBench/345": [ + "Computation", + "Visualization" + ], + "BigCodeBench/346": [ + "System", + "Time", + "System", + "System" + ], + "BigCodeBench/347": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/348": [ + "System", + "Time", + "System", + "System" + ], + "BigCodeBench/349": [ + "Computation", + "General" + ], + "BigCodeBench/350": [ + "System", + "System", + "System", + "System" + ], + "BigCodeBench/351": [ + "Computation", + "General" + ], + "BigCodeBench/352": [ + "Computation", + "General" + ], + "BigCodeBench/353": [ + "Computation", + "General" + ], + "BigCodeBench/354": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/355": [ + "Computation", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/356": [ + "Visualization", + "Computation", + "Computation" + ], + "BigCodeBench/357": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/358": [ + "General", + "System" + ], + "BigCodeBench/359": [ + "Visualization", + "Computation" + ], + "BigCodeBench/360": [ + "Computation", + "Computation", + "Visualization", + "System" + ], + "BigCodeBench/361": [ + "Computation", + "System" + ], + "BigCodeBench/362": [ + "Computation", + "System" + ], + "BigCodeBench/363": [ + "Computation", + "System" + ], + "BigCodeBench/364": [ + "Computation", + "Computation" + ], + "BigCodeBench/365": [ + "General", + "General", + "System" + ], + "BigCodeBench/366": [ + "General", + "Visualization" + ], + "BigCodeBench/367": [ + "Time", + "General", + "Visualization" + ], + "BigCodeBench/368": [ + "System", + "General", + "System" + ], + "BigCodeBench/369": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/370": [ + "System", + "System", + "General", + "System" + ], + "BigCodeBench/371": [ + "Computation", + "Computation" + ], + "BigCodeBench/372": [ + "System", + "System", + "General" + ], + "BigCodeBench/373": [ + "Visualization", + "Computation" + ], + "BigCodeBench/374": [ + "General", + "System", + "System", + "System" + ], + "BigCodeBench/375": [ + "Visualization", + "Computation" + ], + "BigCodeBench/376": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/377": [ + "System", + "General", + "System" + ], + "BigCodeBench/378": [ + "System", + "Computation", + "General", + "System" + ], + "BigCodeBench/379": [ + "Computation", + "Computation" + ], + "BigCodeBench/380": [ + "System", + "General", + "System" + ], + "BigCodeBench/381": [ + "Computation", + "Visualization", + "Visualization", + "Computation", + "System" + ], + "BigCodeBench/382": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/383": [ + "Computation", + "General", + "Visualization", + "Computation", + "Visualization" + ], + "BigCodeBench/384": [ + "General", + "General", + "General" + ], + "BigCodeBench/385": [ + "General", + "Visualization" + ], + "BigCodeBench/386": [ + "Computation", + "Computation" + ], + "BigCodeBench/387": [ + "Computation", + "Visualization" + ], + "BigCodeBench/388": [ + "Computation", + "General" + ], + "BigCodeBench/389": [ + "System", + "General", + "System" + ], + "BigCodeBench/390": [ + "Computation", + "System", + "Network" + ], + "BigCodeBench/391": [ + "System", + "System", + "System" + ], + "BigCodeBench/392": [ + "Computation", + "Visualization" + ], + "BigCodeBench/393": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/394": [ + "General", + "General", + "General" + ], + "BigCodeBench/395": [ + "System", + "Computation", + "General", + "System", + "General" + ], + "BigCodeBench/396": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/397": [ + "Network", + "General", + "System" + ], + "BigCodeBench/398": [ + "System", + "System" + ], + "BigCodeBench/399": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/400": [ + "System", + "System" + ], + "BigCodeBench/401": [ + "Network", + "Network", + "System" + ], + "BigCodeBench/402": [ + "Network", + "System", + "System", + "General", + "System" + ], + "BigCodeBench/403": [ + "Computation", + "Computation", + "Visualization", + "System" + ], + "BigCodeBench/404": [ + "Computation", + "Computation", + "System" + ], + "BigCodeBench/405": [ + "General", + "Visualization" + ], + "BigCodeBench/406": [ + "Computation", + "Visualization", + "Visualization", + "System" + ], + "BigCodeBench/407": [ + "System", + "System", + "System" + ], + "BigCodeBench/408": [ + "System", + "Computation" + ], + "BigCodeBench/409": [ + "Computation", + "Computation", + "System" + ], + "BigCodeBench/410": [ + "Computation", + "Time", + "System" + ], + "BigCodeBench/411": [ + "Computation", + "System" + ], + "BigCodeBench/412": [ + "Cryptography", + "General", + "System" + ], + "BigCodeBench/413": [ + "Network", + "System" + ], + "BigCodeBench/414": [ + "Computation", + "Computation" + ], + "BigCodeBench/415": [ + "Cryptography", + "Computation" + ], + "BigCodeBench/416": [ + "Computation", + "Visualization" + ], + "BigCodeBench/417": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/418": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/419": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/420": [ + "Computation", + "Computation" + ], + "BigCodeBench/421": [ + "Time", + "System", + "Network", + "System" + ], + "BigCodeBench/422": [ + "Computation", + "Computation" + ], + "BigCodeBench/423": [ + "Computation", + "Computation", + "System" + ], + "BigCodeBench/424": [ + "Computation", + "Computation", + "Computation", + "System" + ], + "BigCodeBench/425": [ + "Visualization", + "Computation", + "System" + ], + "BigCodeBench/426": [ + "Computation", + "Computation", + "System" + ], + "BigCodeBench/427": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/428": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/429": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/430": [ + "Visualization", + "Computation" + ], + "BigCodeBench/431": [ + "Computation", + "Computation", + "System" + ], + "BigCodeBench/432": [ + "Computation", + "Visualization" + ], + "BigCodeBench/433": [ + "Cryptography", + "Cryptography", + "Cryptography", + "Cryptography" + ], + "BigCodeBench/434": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/435": [ + "Computation", + "General" + ], + "BigCodeBench/436": [ + "Visualization", + "General" + ], + "BigCodeBench/437": [ + "System", + "System" + ], + "BigCodeBench/438": [ + "Visualization", + "System", + "System" + ], + "BigCodeBench/439": [ + "Computation", + "Visualization" + ], + "BigCodeBench/440": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/441": [ + "Computation", + "Visualization" + ], + "BigCodeBench/442": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/443": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/444": [ + "Computation", + "Visualization" + ], + "BigCodeBench/445": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/446": [ + "Visualization", + "Computation" + ], + "BigCodeBench/447": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/448": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/449": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/450": [ + "Visualization", + "Computation", + "Computation" + ], + "BigCodeBench/451": [ + "Computation", + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/452": [ + "Computation", + "Computation" + ], + "BigCodeBench/453": [ + "General", + "General", + "General" + ], + "BigCodeBench/454": [ + "System", + "System", + "System" + ], + "BigCodeBench/455": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/456": [ + "Computation", + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/457": [ + "Computation", + "Computation" + ], + "BigCodeBench/458": [ + "Computation", + "General", + "System" + ], + "BigCodeBench/459": [ + "System", + "Time", + "Time", + "System" + ], + "BigCodeBench/460": [ + "System", + "Computation" + ], + "BigCodeBench/461": [ + "System", + "System", + "System", + "Time" + ], + "BigCodeBench/462": [ + "Computation", + "General" + ], + "BigCodeBench/463": [ + "Computation", + "Computation" + ], + "BigCodeBench/464": [ + "General", + "Time", + "System" + ], + "BigCodeBench/465": [ + "General", + "Time", + "Computation", + "System" + ], + "BigCodeBench/466": [ + "General", + "System" + ], + "BigCodeBench/467": [ + "Computation", + "Visualization" + ], + "BigCodeBench/468": [ + "Computation", + "Computation" + ], + "BigCodeBench/469": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/470": [ + "Computation", + "Visualization" + ], + "BigCodeBench/471": [ + "Computation", + "General" + ], + "BigCodeBench/472": [ + "Visualization", + "Computation" + ], + "BigCodeBench/473": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/474": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/475": [ + "Computation", + "Time" + ], + "BigCodeBench/476": [ + "Visualization", + "Computation" + ], + "BigCodeBench/477": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/478": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/479": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/480": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/481": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/482": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/483": [ + "Computation", + "General" + ], + "BigCodeBench/484": [ + "Computation", + "Computation", + "Time", + "Computation" + ], + "BigCodeBench/485": [ + "Time", + "Time", + "Computation", + "Visualization" + ], + "BigCodeBench/486": [ + "Computation", + "Time", + "Computation" + ], + "BigCodeBench/487": [ + "Computation", + "General", + "System" + ], + "BigCodeBench/488": [ + "Computation", + "Time", + "Computation" + ], + "BigCodeBench/489": [ + "Computation", + "Time", + "General" + ], + "BigCodeBench/490": [ + "Network", + "System" + ], + "BigCodeBench/491": [ + "Time", + "General", + "Visualization" + ], + "BigCodeBench/492": [ + "Computation", + "Time", + "General" + ], + "BigCodeBench/493": [ + "Time", + "General", + "Visualization" + ], + "BigCodeBench/494": [ + "Time", + "General", + "Time", + "General" + ], + "BigCodeBench/495": [ + "Computation", + "Computation" + ], + "BigCodeBench/496": [ + "Time", + "Computation", + "Visualization" + ], + "BigCodeBench/497": [ + "Time", + "Time", + "Time" + ], + "BigCodeBench/498": [ + "Network", + "System" + ], + "BigCodeBench/499": [ + "System", + "System", + "System", + "System" + ], + "BigCodeBench/500": [ + "System", + "System" + ], + "BigCodeBench/501": [ + "Computation", + "System", + "System" + ], + "BigCodeBench/502": [ + "Computation", + "Time", + "General", + "Visualization" + ], + "BigCodeBench/503": [ + "Computation", + "Time", + "Computation" + ], + "BigCodeBench/504": [ + "Cryptography", + "Cryptography", + "Cryptography" + ], + "BigCodeBench/505": [ + "Cryptography", + "Cryptography" + ], + "BigCodeBench/506": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/507": [ + "Computation", + "Computation" + ], + "BigCodeBench/508": [ + "System", + "Cryptography", + "System" + ], + "BigCodeBench/509": [ + "General", + "Computation", + "System" + ], + "BigCodeBench/510": [ + "General", + "System" + ], + "BigCodeBench/511": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/512": [ + "Computation", + "Computation" + ], + "BigCodeBench/513": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/514": [ + "Computation", + "Visualization" + ], + "BigCodeBench/515": [ + "Computation", + "Visualization" + ], + "BigCodeBench/516": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/517": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/518": [ + "Computation", + "Computation" + ], + "BigCodeBench/519": [ + "Computation", + "Visualization" + ], + "BigCodeBench/520": [ + "General", + "Visualization" + ], + "BigCodeBench/521": [ + "Computation", + "Visualization" + ], + "BigCodeBench/522": [ + "General", + "Visualization" + ], + "BigCodeBench/523": [ + "Computation", + "Visualization" + ], + "BigCodeBench/524": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/525": [ + "General", + "Computation", + "Visualization", + "System" + ], + "BigCodeBench/526": [ + "Computation", + "General", + "Computation", + "System" + ], + "BigCodeBench/527": [ + "Computation", + "General", + "Visualization", + "Computation", + "Visualization", + "System" + ], + "BigCodeBench/528": [ + "Computation", + "System", + "General", + "Visualization" + ], + "BigCodeBench/529": [ + "General", + "Visualization", + "General" + ], + "BigCodeBench/530": [ + "Computation", + "General", + "Visualization", + "Computation", + "Visualization" + ], + "BigCodeBench/531": [ + "General", + "Visualization", + "Computation" + ], + "BigCodeBench/532": [ + "Computation", + "General", + "Visualization", + "Computation" + ], + "BigCodeBench/533": [ + "Cryptography", + "Cryptography", + "Cryptography", + "Computation" + ], + "BigCodeBench/534": [ + "Cryptography", + "Computation", + "Cryptography" + ], + "BigCodeBench/535": [ + "System", + "Computation", + "General" + ], + "BigCodeBench/536": [ + "System", + "Computation", + "System" + ], + "BigCodeBench/537": [ + "System", + "Computation", + "Visualization" + ], + "BigCodeBench/538": [ + "System", + "Computation" + ], + "BigCodeBench/539": [ + "System", + "General", + "System" + ], + "BigCodeBench/540": [ + "Visualization", + "General", + "Computation", + "General" + ], + "BigCodeBench/541": [ + "General", + "General", + "System", + "System" + ], + "BigCodeBench/542": [ + "System", + "Cryptography", + "General" + ], + "BigCodeBench/543": [ + "Cryptography", + "System" + ], + "BigCodeBench/544": [ + "System", + "System" + ], + "BigCodeBench/545": [ + "Cryptography", + "System", + "General" + ], + "BigCodeBench/546": [ + "General", + "General" + ], + "BigCodeBench/547": [ + "Cryptography", + "Cryptography", + "System" + ], + "BigCodeBench/548": [ + "Cryptography", + "General", + "General", + "System" + ], + "BigCodeBench/549": [ + "Computation", + "Cryptography" + ], + "BigCodeBench/550": [ + "Computation", + "General" + ], + "BigCodeBench/551": [ + "Computation", + "General", + "Visualization", + "Visualization" + ], + "BigCodeBench/552": [ + "Visualization", + "General", + "General" + ], + "BigCodeBench/553": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/554": [ + "Computation", + "General" + ], + "BigCodeBench/555": [ + "Computation", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/556": [ + "General", + "Computation", + "General" + ], + "BigCodeBench/557": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/558": [ + "Computation", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/559": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/560": [ + "Computation", + "Time", + "Visualization" + ], + "BigCodeBench/561": [ + "Time", + "Time" + ], + "BigCodeBench/562": [ + "System", + "System", + "System", + "System" + ], + "BigCodeBench/563": [ + "System", + "System", + "System", + "System" + ], + "BigCodeBench/564": [ + "Time", + "System", + "System", + "Time" + ], + "BigCodeBench/565": [ + "System", + "Cryptography", + "Cryptography" + ], + "BigCodeBench/566": [ + "General", + "General" + ], + "BigCodeBench/567": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/568": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/569": [ + "General", + "Computation", + "General" + ], + "BigCodeBench/570": [ + "General", + "General", + "System" + ], + "BigCodeBench/571": [ + "Computation", + "General" + ], + "BigCodeBench/572": [ + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/573": [ + "Computation", + "Computation" + ], + "BigCodeBench/574": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/575": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/576": [ + "Computation", + "General" + ], + "BigCodeBench/577": [ + "General", + "Cryptography", + "System", + "System" + ], + "BigCodeBench/578": [ + "General", + "Network" + ], + "BigCodeBench/579": [ + "General", + "System", + "General", + "Visualization" + ], + "BigCodeBench/580": [ + "Computation", + "Computation", + "Visualization", + "Computation", + "General" + ], + "BigCodeBench/581": [ + "Computation", + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/582": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/583": [ + "Cryptography", + "Cryptography", + "Cryptography", + "System" + ], + "BigCodeBench/584": [ + "Network", + "Cryptography", + "Cryptography" + ], + "BigCodeBench/585": [ + "Cryptography", + "Cryptography", + "System", + "System" + ], + "BigCodeBench/586": [ + "Cryptography", + "Cryptography", + "Cryptography" + ], + "BigCodeBench/587": [ + "Cryptography", + "Cryptography", + "Cryptography", + "System" + ], + "BigCodeBench/588": [ + "Computation", + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/589": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/590": [ + "Computation", + "Network", + "Time", + "Network" + ], + "BigCodeBench/591": [ + "Computation", + "Time", + "General", + "Visualization" + ], + "BigCodeBench/592": [ + "System", + "Time", + "General", + "System" + ], + "BigCodeBench/593": [ + "Computation", + "Visualization", + "General", + "System", + "System", + "Time" + ], + "BigCodeBench/594": [ + "System", + "General", + "System", + "System", + "Time" + ], + "BigCodeBench/595": [ + "General", + "General", + "General" + ], + "BigCodeBench/596": [ + "Time", + "General", + "Visualization", + "Time" + ], + "BigCodeBench/597": [ + "Computation", + "Time" + ], + "BigCodeBench/598": [ + "Computation", + "Time" + ], + "BigCodeBench/599": [ + "Computation", + "Time" + ], + "BigCodeBench/600": [ + "Computation", + "Computation" + ], + "BigCodeBench/601": [ + "Time", + "Visualization" + ], + "BigCodeBench/602": [ + "Computation", + "Computation" + ], + "BigCodeBench/603": [ + "Computation", + "Computation" + ], + "BigCodeBench/604": [ + "System", + "System" + ], + "BigCodeBench/605": [ + "Computation", + "Visualization" + ], + "BigCodeBench/606": [ + "Computation", + "Computation" + ], + "BigCodeBench/607": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/608": [ + "General", + "Visualization" + ], + "BigCodeBench/609": [ + "General", + "General" + ], + "BigCodeBench/610": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/611": [ + "General", + "Visualization" + ], + "BigCodeBench/612": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/613": [ + "Computation", + "Visualization" + ], + "BigCodeBench/614": [ + "Computation", + "Visualization" + ], + "BigCodeBench/615": [ + "Computation", + "General" + ], + "BigCodeBench/616": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/617": [ + "Computation", + "General", + "Visualization", + "General" + ], + "BigCodeBench/618": [ + "Computation", + "General", + "Visualization", + "Visualization" + ], + "BigCodeBench/619": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/620": [ + "Computation", + "Computation" + ], + "BigCodeBench/621": [ + "Visualization", + "Computation", + "General", + "Computation" + ], + "BigCodeBench/622": [ + "Visualization", + "Computation", + "General", + "Computation" + ], + "BigCodeBench/623": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/624": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/625": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/626": [ + "Time", + "Time", + "General" + ], + "BigCodeBench/627": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/628": [ + "Computation", + "General", + "Visualization" + ], + "BigCodeBench/629": [ + "System", + "Time" + ], + "BigCodeBench/630": [ + "Computation", + "System" + ], + "BigCodeBench/631": [ + "System", + "System" + ], + "BigCodeBench/632": [ + "Computation", + "Time" + ], + "BigCodeBench/633": [ + "Computation", + "General" + ], + "BigCodeBench/634": [ + "General", + "General", + "Computation" + ], + "BigCodeBench/635": [ + "Computation", + "Computation", + "Visualization", + "Computation", + "Computation", + "General" + ], + "BigCodeBench/636": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/637": [ + "Computation", + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/638": [ + "Computation", + "Computation" + ], + "BigCodeBench/639": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/640": [ + "Computation", + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/641": [ + "Computation", + "General", + "System" + ], + "BigCodeBench/642": [ + "Cryptography", + "General", + "Cryptography" + ], + "BigCodeBench/643": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/644": [ + "Cryptography", + "Cryptography" + ], + "BigCodeBench/645": [ + "Computation", + "System" + ], + "BigCodeBench/646": [ + "Time", + "Computation", + "System" + ], + "BigCodeBench/647": [ + "Time", + "Time" + ], + "BigCodeBench/648": [ + "Time", + "Time" + ], + "BigCodeBench/649": [ + "Time", + "Computation", + "Computation" + ], + "BigCodeBench/650": [ + "Time", + "Time", + "Time" + ], + "BigCodeBench/651": [ + "Computation", + "Time" + ], + "BigCodeBench/652": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/653": [ + "Visualization", + "Visualization" + ], + "BigCodeBench/654": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/655": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/656": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/657": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/658": [ + "Computation", + "Computation", + "General", + "Computation" + ], + "BigCodeBench/659": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/660": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/661": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/662": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/663": [ + "Computation", + "Computation" + ], + "BigCodeBench/664": [ + "Computation", + "Visualization" + ], + "BigCodeBench/665": [ + "System", + "General", + "General", + "System" + ], + "BigCodeBench/666": [ + "Computation", + "General" + ], + "BigCodeBench/667": [ + "General", + "General" + ], + "BigCodeBench/668": [ + "Computation", + "General" + ], + "BigCodeBench/669": [ + "Computation", + "General" + ], + "BigCodeBench/670": [ + "Computation", + "General" + ], + "BigCodeBench/671": [ + "System", + "General", + "System" + ], + "BigCodeBench/672": [ + "System", + "System" + ], + "BigCodeBench/673": [ + "General", + "System" + ], + "BigCodeBench/674": [ + "Computation", + "System" + ], + "BigCodeBench/675": [ + "General", + "System" + ], + "BigCodeBench/676": [ + "Computation", + "General" + ], + "BigCodeBench/677": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/678": [ + "Computation", + "System", + "System", + "System" + ], + "BigCodeBench/679": [ + "Computation", + "General" + ], + "BigCodeBench/680": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/681": [ + "Computation", + "System" + ], + "BigCodeBench/682": [ + "Computation", + "General" + ], + "BigCodeBench/683": [ + "Computation", + "System" + ], + "BigCodeBench/684": [ + "Computation", + "Computation" + ], + "BigCodeBench/685": [ + "General", + "General" + ], + "BigCodeBench/686": [ + "Computation", + "Computation" + ], + "BigCodeBench/687": [ + "Computation", + "Computation" + ], + "BigCodeBench/688": [ + "Computation", + "Computation" + ], + "BigCodeBench/689": [ + "Computation", + "Computation" + ], + "BigCodeBench/690": [ + "Computation", + "Computation" + ], + "BigCodeBench/691": [ + "Computation", + "Computation" + ], + "BigCodeBench/692": [ + "Computation", + "Computation" + ], + "BigCodeBench/693": [ + "Computation", + "Computation" + ], + "BigCodeBench/694": [ + "General", + "General" + ], + "BigCodeBench/695": [ + "Computation", + "Computation" + ], + "BigCodeBench/696": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/697": [ + "Computation", + "Computation" + ], + "BigCodeBench/698": [ + "Computation", + "Computation" + ], + "BigCodeBench/699": [ + "Computation", + "Computation" + ], + "BigCodeBench/700": [ + "Computation", + "Computation" + ], + "BigCodeBench/701": [ + "Computation", + "Computation" + ], + "BigCodeBench/702": [ + "Computation", + "Computation" + ], + "BigCodeBench/703": [ + "Computation", + "Computation" + ], + "BigCodeBench/704": [ + "Computation", + "General" + ], + "BigCodeBench/705": [ + "Computation", + "Computation" + ], + "BigCodeBench/706": [ + "Computation", + "Computation" + ], + "BigCodeBench/707": [ + "Computation", + "System" + ], + "BigCodeBench/708": [ + "System", + "Cryptography", + "System", + "System" + ], + "BigCodeBench/709": [ + "Network", + "Cryptography", + "General", + "General" + ], + "BigCodeBench/710": [ + "Computation", + "Computation" + ], + "BigCodeBench/711": [ + "System", + "System" + ], + "BigCodeBench/712": [ + "System", + "System", + "System" + ], + "BigCodeBench/713": [ + "General", + "System" + ], + "BigCodeBench/714": [ + "System", + "System" + ], + "BigCodeBench/715": [ + "System", + "System" + ], + "BigCodeBench/716": [ + "Time", + "System", + "System" + ], + "BigCodeBench/717": [ + "System", + "System" + ], + "BigCodeBench/718": [ + "General", + "Computation", + "Computation" + ], + "BigCodeBench/719": [ + "System", + "General", + "System" + ], + "BigCodeBench/720": [ + "System", + "Time", + "General", + "System" + ], + "BigCodeBench/721": [ + "System", + "General", + "System" + ], + "BigCodeBench/722": [ + "Network", + "General", + "System" + ], + "BigCodeBench/723": [ + "System", + "Network", + "Network", + "System" + ], + "BigCodeBench/724": [ + "System", + "System" + ], + "BigCodeBench/725": [ + "Cryptography", + "System", + "System" + ], + "BigCodeBench/726": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/727": [ + "Computation", + "General", + "Computation" + ], + "BigCodeBench/728": [ + "System", + "System" + ], + "BigCodeBench/729": [ + "System", + "General", + "General", + "System" + ], + "BigCodeBench/730": [ + "System", + "System" + ], + "BigCodeBench/731": [ + "System", + "System", + "Computation" + ], + "BigCodeBench/732": [ + "Computation", + "General", + "General", + "General" + ], + "BigCodeBench/733": [ + "General", + "General" + ], + "BigCodeBench/734": [ + "Computation", + "General" + ], + "BigCodeBench/735": [ + "Computation", + "General" + ], + "BigCodeBench/736": [ + "Computation", + "Computation" + ], + "BigCodeBench/737": [ + "Computation", + "Computation" + ], + "BigCodeBench/738": [ + "Computation", + "Computation" + ], + "BigCodeBench/739": [ + "System", + "General" + ], + "BigCodeBench/740": [ + "General", + "General" + ], + "BigCodeBench/741": [ + "General", + "General" + ], + "BigCodeBench/742": [ + "Computation", + "Computation" + ], + "BigCodeBench/743": [ + "System", + "System" + ], + "BigCodeBench/744": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/745": [ + "System", + "General" + ], + "BigCodeBench/746": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/747": [ + "Computation", + "General" + ], + "BigCodeBench/748": [ + "Computation", + "Computation" + ], + "BigCodeBench/749": [ + "Computation", + "Computation" + ], + "BigCodeBench/750": [ + "Computation", + "Computation" + ], + "BigCodeBench/751": [ + "General", + "General" + ], + "BigCodeBench/752": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/753": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/754": [ + "Computation", + "Time", + "Computation" + ], + "BigCodeBench/755": [ + "System", + "System" + ], + "BigCodeBench/756": [ + "General", + "System", + "System" + ], + "BigCodeBench/757": [ + "Time", + "Computation" + ], + "BigCodeBench/758": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/759": [ + "System", + "General", + "System" + ], + "BigCodeBench/760": [ + "Computation", + "Computation", + "Cryptography", + "General", + "Time" + ], + "BigCodeBench/761": [ + "General", + "General", + "System" + ], + "BigCodeBench/762": [ + "Cryptography", + "System", + "System" + ], + "BigCodeBench/763": [ + "General", + "System", + "Computation", + "System" + ], + "BigCodeBench/764": [ + "System", + "General" + ], + "BigCodeBench/765": [ + "System", + "System", + "System" + ], + "BigCodeBench/766": [ + "General", + "General" + ], + "BigCodeBench/767": [ + "General", + "General", + "General" + ], + "BigCodeBench/768": [ + "System", + "General", + "System" + ], + "BigCodeBench/769": [ + "General", + "General", + "General" + ], + "BigCodeBench/770": [ + "Computation", + "Computation" + ], + "BigCodeBench/771": [ + "System", + "System", + "General", + "System" + ], + "BigCodeBench/772": [ + "Computation", + "Computation" + ], + "BigCodeBench/773": [ + "System", + "General", + "System" + ], + "BigCodeBench/774": [ + "Computation", + "Computation" + ], + "BigCodeBench/775": [ + "General", + "General", + "General" + ], + "BigCodeBench/776": [ + "Computation", + "Computation" + ], + "BigCodeBench/777": [ + "System", + "General", + "System" + ], + "BigCodeBench/778": [ + "General", + "General", + "General" + ], + "BigCodeBench/779": [ + "System", + "System" + ], + "BigCodeBench/780": [ + "Time", + "Computation" + ], + "BigCodeBench/781": [ + "Time", + "System" + ], + "BigCodeBench/782": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/783": [ + "System", + "System" + ], + "BigCodeBench/784": [ + "Computation", + "System", + "General" + ], + "BigCodeBench/785": [ + "System", + "System", + "System" + ], + "BigCodeBench/786": [ + "Computation", + "System", + "General" + ], + "BigCodeBench/787": [ + "Computation", + "General" + ], + "BigCodeBench/788": [ + "Computation", + "General" + ], + "BigCodeBench/789": [ + "Computation", + "Computation" + ], + "BigCodeBench/790": [ + "Computation", + "General" + ], + "BigCodeBench/791": [ + "General", + "General", + "General" + ], + "BigCodeBench/792": [ + "Computation", + "General" + ], + "BigCodeBench/793": [ + "Computation", + "General" + ], + "BigCodeBench/794": [ + "General", + "General" + ], + "BigCodeBench/795": [ + "Computation", + "General" + ], + "BigCodeBench/796": [ + "General", + "System" + ], + "BigCodeBench/797": [ + "Computation", + "General" + ], + "BigCodeBench/798": [ + "System", + "System" + ], + "BigCodeBench/799": [ + "Computation", + "General" + ], + "BigCodeBench/800": [ + "System", + "General", + "System" + ], + "BigCodeBench/801": [ + "General", + "Computation" + ], + "BigCodeBench/802": [ + "Computation", + "General" + ], + "BigCodeBench/803": [ + "Computation", + "Computation" + ], + "BigCodeBench/804": [ + "Time", + "System" + ], + "BigCodeBench/805": [ + "Computation", + "General" + ], + "BigCodeBench/806": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/807": [ + "Computation", + "Computation" + ], + "BigCodeBench/808": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/809": [ + "Computation", + "Computation" + ], + "BigCodeBench/810": [ + "System", + "General", + "System" + ], + "BigCodeBench/811": [ + "Computation", + "General" + ], + "BigCodeBench/812": [ + "System", + "System", + "General" + ], + "BigCodeBench/813": [ + "Computation", + "General" + ], + "BigCodeBench/814": [ + "System", + "General", + "System" + ], + "BigCodeBench/815": [ + "Computation", + "Computation" + ], + "BigCodeBench/816": [ + "General", + "General" + ], + "BigCodeBench/817": [ + "General", + "System" + ], + "BigCodeBench/818": [ + "General", + "General" + ], + "BigCodeBench/819": [ + "General", + "Time" + ], + "BigCodeBench/820": [ + "General", + "General" + ], + "BigCodeBench/821": [ + "System", + "Time" + ], + "BigCodeBench/822": [ + "General", + "General" + ], + "BigCodeBench/823": [ + "Computation", + "Time" + ], + "BigCodeBench/824": [ + "General", + "General" + ], + "BigCodeBench/825": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/826": [ + "System", + "General", + "System" + ], + "BigCodeBench/827": [ + "Computation", + "Computation" + ], + "BigCodeBench/828": [ + "System", + "System", + "System" + ], + "BigCodeBench/829": [ + "Computation", + "Computation" + ], + "BigCodeBench/830": [ + "System", + "System" + ], + "BigCodeBench/831": [ + "Computation", + "General" + ], + "BigCodeBench/832": [ + "System", + "System" + ], + "BigCodeBench/833": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/834": [ + "System", + "System", + "Cryptography" + ], + "BigCodeBench/835": [ + "Computation", + "Computation" + ], + "BigCodeBench/836": [ + "System", + "System", + "System" + ], + "BigCodeBench/837": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/838": [ + "Computation", + "General" + ], + "BigCodeBench/839": [ + "System", + "General" + ], + "BigCodeBench/840": [ + "Computation", + "Computation" + ], + "BigCodeBench/841": [ + "General", + "General", + "General", + "System" + ], + "BigCodeBench/842": [ + "System", + "General" + ], + "BigCodeBench/843": [ + "General", + "General" + ], + "BigCodeBench/844": [ + "General", + "System", + "General" + ], + "BigCodeBench/845": [ + "General", + "General", + "Computation", + "Computation" + ], + "BigCodeBench/846": [ + "Computation", + "General" + ], + "BigCodeBench/847": [ + "General", + "General", + "General", + "System" + ], + "BigCodeBench/848": [ + "General", + "General" + ], + "BigCodeBench/849": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/850": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/851": [ + "General", + "General" + ], + "BigCodeBench/852": [ + "General", + "General" + ], + "BigCodeBench/853": [ + "System", + "General", + "System" + ], + "BigCodeBench/854": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/855": [ + "General", + "General", + "General" + ], + "BigCodeBench/856": [ + "General", + "Computation", + "General" + ], + "BigCodeBench/857": [ + "System", + "General", + "Time", + "System", + "System" + ], + "BigCodeBench/858": [ + "General", + "General", + "General" + ], + "BigCodeBench/859": [ + "General", + "Computation" + ], + "BigCodeBench/860": [ + "General", + "General", + "General" + ], + "BigCodeBench/861": [ + "General", + "General" + ], + "BigCodeBench/862": [ + "General", + "General", + "General" + ], + "BigCodeBench/863": [ + "Computation", + "Computation" + ], + "BigCodeBench/864": [ + "Computation", + "Computation" + ], + "BigCodeBench/865": [ + "Computation", + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/866": [ + "Computation", + "Computation" + ], + "BigCodeBench/867": [ + "General", + "General" + ], + "BigCodeBench/868": [ + "General", + "General" + ], + "BigCodeBench/869": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/870": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/871": [ + "Computation", + "General" + ], + "BigCodeBench/872": [ + "Computation", + "General" + ], + "BigCodeBench/873": [ + "System", + "System" + ], + "BigCodeBench/874": [ + "General", + "Computation" + ], + "BigCodeBench/875": [ + "Computation", + "General" + ], + "BigCodeBench/876": [ + "General", + "System", + "General", + "System" + ], + "BigCodeBench/877": [ + "Computation", + "Computation" + ], + "BigCodeBench/878": [ + "Computation", + "Computation" + ], + "BigCodeBench/879": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/880": [ + "Computation", + "Computation" + ], + "BigCodeBench/881": [ + "Computation", + "General" + ], + "BigCodeBench/882": [ + "System", + "Computation", + "System" + ], + "BigCodeBench/883": [ + "Computation", + "Computation" + ], + "BigCodeBench/884": [ + "Computation", + "Computation" + ], + "BigCodeBench/885": [ + "Computation", + "Computation" + ], + "BigCodeBench/886": [ + "Computation", + "General" + ], + "BigCodeBench/887": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/888": [ + "Computation", + "System" + ], + "BigCodeBench/889": [ + "Computation", + "Computation", + "System" + ], + "BigCodeBench/890": [ + "Computation", + "General", + "System" + ], + "BigCodeBench/891": [ + "Computation", + "Computation" + ], + "BigCodeBench/892": [ + "General", + "General" + ], + "BigCodeBench/893": [ + "Time", + "General" + ], + "BigCodeBench/894": [ + "Computation", + "Visualization" + ], + "BigCodeBench/895": [ + "Computation", + "Visualization" + ], + "BigCodeBench/896": [ + "General", + "General", + "General" + ], + "BigCodeBench/897": [ + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/898": [ + "General", + "General" + ], + "BigCodeBench/899": [ + "Computation", + "General" + ], + "BigCodeBench/900": [ + "Computation", + "Computation" + ], + "BigCodeBench/901": [ + "Computation", + "Computation" + ], + "BigCodeBench/902": [ + "Computation", + "General" + ], + "BigCodeBench/903": [ + "Computation", + "Computation" + ], + "BigCodeBench/904": [ + "Computation", + "Visualization" + ], + "BigCodeBench/905": [ + "System", + "System", + "System" + ], + "BigCodeBench/906": [ + "General", + "System", + "System", + "System" + ], + "BigCodeBench/907": [ + "General", + "System" + ], + "BigCodeBench/908": [ + "Computation", + "Visualization", + "General", + "System" + ], + "BigCodeBench/909": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/910": [ + "Computation", + "Visualization" + ], + "BigCodeBench/911": [ + "General", + "General", + "General" + ], + "BigCodeBench/912": [ + "General", + "General" + ], + "BigCodeBench/913": [ + "General", + "Computation", + "Computation" + ], + "BigCodeBench/914": [ + "Computation", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/915": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/916": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/917": [ + "Computation", + "General", + "Visualization", + "Computation" + ], + "BigCodeBench/918": [ + "Computation", + "General" + ], + "BigCodeBench/919": [ + "Computation", + "Visualization" + ], + "BigCodeBench/920": [ + "Computation", + "Visualization" + ], + "BigCodeBench/921": [ + "Computation", + "Computation" + ], + "BigCodeBench/922": [ + "Computation", + "General" + ], + "BigCodeBench/923": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/924": [ + "Computation", + "System", + "System" + ], + "BigCodeBench/925": [ + "Computation", + "Computation" + ], + "BigCodeBench/926": [ + "System", + "Computation" + ], + "BigCodeBench/927": [ + "Computation", + "Computation" + ], + "BigCodeBench/928": [ + "General", + "General", + "General" + ], + "BigCodeBench/929": [ + "Computation", + "Computation" + ], + "BigCodeBench/930": [ + "General", + "General" + ], + "BigCodeBench/931": [ + "General", + "General" + ], + "BigCodeBench/932": [ + "General", + "General" + ], + "BigCodeBench/933": [ + "Computation", + "General" + ], + "BigCodeBench/934": [ + "Cryptography", + "General" + ], + "BigCodeBench/935": [ + "Computation", + "General" + ], + "BigCodeBench/936": [ + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/937": [ + "General", + "General" + ], + "BigCodeBench/938": [ + "Computation", + "General" + ], + "BigCodeBench/939": [ + "System", + "General", + "System" + ], + "BigCodeBench/940": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/941": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/942": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/943": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/944": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/945": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/946": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/947": [ + "Time", + "Computation", + "General" + ], + "BigCodeBench/948": [ + "Computation", + "Computation" + ], + "BigCodeBench/949": [ + "Computation", + "Computation" + ], + "BigCodeBench/950": [ + "Computation", + "Computation" + ], + "BigCodeBench/951": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/952": [ + "Computation", + "Time", + "General" + ], + "BigCodeBench/953": [ + "Computation", + "Visualization", + "System" + ], + "BigCodeBench/954": [ + "General", + "General" + ], + "BigCodeBench/955": [ + "General", + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/956": [ + "General", + "General", + "General" + ], + "BigCodeBench/957": [ + "General", + "General" + ], + "BigCodeBench/958": [ + "General", + "General" + ], + "BigCodeBench/959": [ + "General", + "General" + ], + "BigCodeBench/960": [ + "General", + "General" + ], + "BigCodeBench/961": [ + "System", + "General", + "System" + ], + "BigCodeBench/962": [ + "System", + "System", + "System", + "System" + ], + "BigCodeBench/963": [ + "System", + "System", + "System", + "System" + ], + "BigCodeBench/964": [ + "Computation", + "System", + "System", + "System" + ], + "BigCodeBench/965": [ + "System", + "General", + "System" + ], + "BigCodeBench/966": [ + "Computation", + "Visualization" + ], + "BigCodeBench/967": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/968": [ + "Computation", + "Visualization" + ], + "BigCodeBench/969": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/970": [ + "Computation", + "Visualization" + ], + "BigCodeBench/971": [ + "Time", + "System", + "System" + ], + "BigCodeBench/972": [ + "System", + "System" + ], + "BigCodeBench/973": [ + "System", + "System" + ], + "BigCodeBench/974": [ + "System", + "System" + ], + "BigCodeBench/975": [ + "Computation", + "Computation" + ], + "BigCodeBench/976": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/977": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/978": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/979": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/980": [ + "Computation", + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/981": [ + "Computation", + "Time", + "General" + ], + "BigCodeBench/982": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/983": [ + "Computation", + "Visualization" + ], + "BigCodeBench/984": [ + "Visualization", + "Computation" + ], + "BigCodeBench/985": [ + "Computation", + "Computation", + "System", + "System" + ], + "BigCodeBench/986": [ + "Computation", + "Visualization", + "Computation", + "Visualization", + "System" + ], + "BigCodeBench/987": [ + "Computation", + "Computation", + "Visualization", + "System" + ], + "BigCodeBench/988": [ + "System", + "General", + "System" + ], + "BigCodeBench/989": [ + "General", + "General" + ], + "BigCodeBench/990": [ + "Cryptography", + "Network", + "Cryptography", + "Cryptography" + ], + "BigCodeBench/991": [ + "General", + "General", + "Cryptography" + ], + "BigCodeBench/992": [ + "System", + "System" + ], + "BigCodeBench/993": [ + "General", + "Visualization", + "Computation" + ], + "BigCodeBench/994": [ + "Computation", + "Network", + "Network" + ], + "BigCodeBench/995": [ + "Computation", + "Computation", + "Visualization", + "System" + ], + "BigCodeBench/996": [ + "Network", + "Network", + "System" + ], + "BigCodeBench/997": [ + "Network", + "System", + "System" + ], + "BigCodeBench/998": [ + "System", + "Network", + "Cryptography", + "System" + ], + "BigCodeBench/999": [ + "Network", + "System", + "General", + "System" + ], + "BigCodeBench/1000": [ + "Computation", + "Network", + "System", + "System" + ], + "BigCodeBench/1001": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1002": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1003": [ + "Computation", + "Network", + "Network" + ], + "BigCodeBench/1004": [ + "Network", + "General", + "Visualization", + "General" + ], + "BigCodeBench/1005": [ + "Network", + "System", + "System" + ], + "BigCodeBench/1006": [ + "System", + "Network", + "System" + ], + "BigCodeBench/1007": [ + "Computation", + "Network" + ], + "BigCodeBench/1008": [ + "Computation", + "System", + "Network", + "Network" + ], + "BigCodeBench/1009": [ + "Network", + "System" + ], + "BigCodeBench/1010": [ + "System", + "Visualization", + "Network" + ], + "BigCodeBench/1011": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1012": [ + "System", + "Network", + "System" + ], + "BigCodeBench/1013": [ + "System", + "Network", + "Network", + "Network" + ], + "BigCodeBench/1014": [ + "Computation", + "Network" + ], + "BigCodeBench/1015": [ + "System", + "Computation", + "Network", + "Network" + ], + "BigCodeBench/1016": [ + "Visualization", + "Computation", + "Visualization", + "Network" + ], + "BigCodeBench/1017": [ + "Computation", + "Computation" + ], + "BigCodeBench/1018": [ + "Network", + "Network" + ], + "BigCodeBench/1019": [ + "Cryptography", + "Computation", + "Visualization" + ], + "BigCodeBench/1020": [ + "Network", + "Network", + "System" + ], + "BigCodeBench/1021": [ + "Cryptography", + "Cryptography" + ], + "BigCodeBench/1022": [ + "Computation", + "Time", + "System" + ], + "BigCodeBench/1023": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/1024": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/1025": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1026": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1027": [ + "Network", + "Cryptography" + ], + "BigCodeBench/1028": [ + "System", + "System", + "System", + "Time" + ], + "BigCodeBench/1029": [ + "Computation", + "Computation" + ], + "BigCodeBench/1030": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/1031": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/1032": [ + "Computation", + "Visualization", + "General", + "General", + "Visualization" + ], + "BigCodeBench/1033": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/1034": [ + "Computation", + "Computation" + ], + "BigCodeBench/1035": [ + "Computation", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1036": [ + "Computation", + "Visualization", + "Visualization" + ], + "BigCodeBench/1037": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1038": [ + "Time", + "System" + ], + "BigCodeBench/1039": [ + "Cryptography", + "System", + "Network" + ], + "BigCodeBench/1040": [ + "General", + "Time", + "System", + "Network" + ], + "BigCodeBench/1041": [ + "General", + "System" + ], + "BigCodeBench/1042": [ + "Network", + "Network", + "System" + ], + "BigCodeBench/1043": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1044": [ + "Computation", + "Time" + ], + "BigCodeBench/1045": [ + "Time", + "Time", + "Computation" + ], + "BigCodeBench/1046": [ + "Computation", + "Time", + "General" + ], + "BigCodeBench/1047": [ + "Time", + "General", + "Visualization" + ], + "BigCodeBench/1048": [ + "Time", + "Computation", + "Visualization" + ], + "BigCodeBench/1049": [ + "Computation", + "General" + ], + "BigCodeBench/1050": [ + "Cryptography", + "System" + ], + "BigCodeBench/1051": [ + "General", + "Computation", + "Visualization" + ], + "BigCodeBench/1052": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1053": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1054": [ + "System", + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1055": [ + "Computation", + "General", + "General" + ], + "BigCodeBench/1056": [ + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/1057": [ + "Computation", + "Computation", + "General" + ], + "BigCodeBench/1058": [ + "Visualization", + "General", + "Visualization" + ], + "BigCodeBench/1059": [ + "Computation", + "Computation", + "General", + "General" + ], + "BigCodeBench/1060": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1061": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1062": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1063": [ + "Visualization", + "Computation" + ], + "BigCodeBench/1064": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1065": [ + "Visualization", + "Computation" + ], + "BigCodeBench/1066": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1067": [ + "System", + "Network" + ], + "BigCodeBench/1068": [ + "System", + "Computation", + "General" + ], + "BigCodeBench/1069": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1070": [ + "Computation", + "General" + ], + "BigCodeBench/1071": [ + "General", + "Computation", + "Visualization", + "General" + ], + "BigCodeBench/1072": [ + "Computation", + "Computation" + ], + "BigCodeBench/1073": [ + "Visualization", + "Time" + ], + "BigCodeBench/1074": [ + "Time", + "Time" + ], + "BigCodeBench/1075": [ + "Time", + "Computation", + "Visualization" + ], + "BigCodeBench/1076": [ + "Time", + "Computation", + "Time" + ], + "BigCodeBench/1077": [ + "Time", + "Time", + "Computation" + ], + "BigCodeBench/1078": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1079": [ + "Computation", + "Computation", + "Visualization" + ], + "BigCodeBench/1080": [ + "Computation", + "Computation" + ], + "BigCodeBench/1081": [ + "Computation", + "Visualization" + ], + "BigCodeBench/1082": [ + "Computation", + "Computation" + ], + "BigCodeBench/1083": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1084": [ + "Computation", + "Computation" + ], + "BigCodeBench/1085": [ + "General", + "Visualization", + "General" + ], + "BigCodeBench/1086": [ + "Computation", + "Computation", + "General", + "General" + ], + "BigCodeBench/1087": [ + "Computation", + "Visualization", + "Computation" + ], + "BigCodeBench/1088": [ + "Computation", + "Computation", + "Computation" + ], + "BigCodeBench/1089": [ + "General", + "Computation" + ], + "BigCodeBench/1090": [ + "General", + "General", + "System" + ], + "BigCodeBench/1091": [ + "System", + "General", + "System" + ], + "BigCodeBench/1092": [ + "General", + "Network", + "Network" + ], + "BigCodeBench/1093": [ + "General", + "General" + ], + "BigCodeBench/1094": [ + "Computation", + "General" + ], + "BigCodeBench/1095": [ + "Computation", + "General", + "System" + ], + "BigCodeBench/1096": [ + "Computation", + "System", + "General", + "System" + ], + "BigCodeBench/1097": [ + "General", + "General" + ], + "BigCodeBench/1098": [ + "General", + "General" + ], + "BigCodeBench/1099": [ + "General", + "General" + ], + "BigCodeBench/1100": [ + "General", + "Computation" + ], + "BigCodeBench/1101": [ + "System", + "System", + "System", + "Time" + ], + "BigCodeBench/1102": [ + "System", + "Time", + "System" + ], + "BigCodeBench/1103": [ + "System", + "System", + "System" + ], + "BigCodeBench/1104": [ + "System", + "System", + "System" + ], + "BigCodeBench/1105": [ + "System", + "System", + "System", + "Time" + ], + "BigCodeBench/1106": [ + "Time", + "System", + "System" + ], + "BigCodeBench/1107": [ + "Time", + "Time" + ], + "BigCodeBench/1108": [ + "General", + "General" + ], + "BigCodeBench/1109": [ + "Computation", + "System" + ], + "BigCodeBench/1110": [ + "General", + "General", + "General" + ], + "BigCodeBench/1111": [ + "General", + "General", + "General" + ], + "BigCodeBench/1112": [ + "System", + "General" + ], + "BigCodeBench/1113": [ + "System", + "General" + ], + "BigCodeBench/1114": [ + "General", + "General" + ], + "BigCodeBench/1115": [ + "General", + "General" + ], + "BigCodeBench/1116": [ + "Computation", + "General" + ], + "BigCodeBench/1117": [ + "General", + "General", + "System" + ], + "BigCodeBench/1118": [ + "System", + "System", + "Network", + "System" + ], + "BigCodeBench/1119": [ + "Cryptography", + "Cryptography", + "General", + "General" + ], + "BigCodeBench/1120": [ + "Network", + "General", + "Network", + "System" + ], + "BigCodeBench/1121": [ + "Network", + "General", + "Network", + "System" + ], + "BigCodeBench/1122": [ + "Network", + "Network", + "General" + ], + "BigCodeBench/1123": [ + "Network", + "General", + "Network", + "Network" + ], + "BigCodeBench/1124": [ + "General", + "Network", + "Network", + "Network" + ], + "BigCodeBench/1125": [ + "Network", + "General", + "System" + ], + "BigCodeBench/1126": [ + "Cryptography", + "General" + ], + "BigCodeBench/1127": [ + "Cryptography", + "General", + "System" + ], + "BigCodeBench/1128": [ + "Cryptography", + "Cryptography", + "Time", + "System", + "System" + ], + "BigCodeBench/1129": [ + "System", + "Time", + "Network", + "System" + ], + "BigCodeBench/1130": [ + "Cryptography", + "System", + "System", + "System" + ], + "BigCodeBench/1131": [ + "Cryptography", + "Cryptography" + ], + "BigCodeBench/1132": [ + "Cryptography", + "Cryptography", + "System" + ], + "BigCodeBench/1133": [ + "Network", + "System" + ], + "BigCodeBench/1134": [ + "System", + "Cryptography", + "System" + ], + "BigCodeBench/1135": [ + "General", + "Network", + "System" + ], + "BigCodeBench/1136": [ + "System", + "General", + "Network", + "Network" + ], + "BigCodeBench/1137": [ + "General", + "Network", + "Network", + "System" + ], + "BigCodeBench/1138": [ + "Computation", + "General" + ], + "BigCodeBench/1139": [ + "Computation", + "Computation", + "Computation" + ] +} \ No newline at end of file diff --git a/analysis/utils.py b/analysis/utils.py index ba2c1a5..88fc003 100755 --- a/analysis/utils.py +++ b/analysis/utils.py @@ -602,7 +602,7 @@ "act_param": 7, "open-data": "None", }, - "mistralai/Codestral-22B-v0.1": { + "codestral-2405": { "name": "Codestral-22B-v0.1", "link": "https://huggingface.co/mistralai/Codestral-22B-v0.1", "prompted": True, @@ -611,6 +611,15 @@ "act_param": 22, "open-data": "None", }, + "codestral-mamba-2407": { + "name": "Codestral-Mamba", + "link": "https://huggingface.co/mistralai/mamba-codestral-7B-v0.1", + "prompted": True, + "moe": False, + "size": 7, + "act_param": 7, + "open-data": "None", + }, "mistralai/Mistral-7B-Instruct-v0.3": { "name": "Mistral-7B-Instruct-v0.3", "link": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3", @@ -863,4 +872,103 @@ "act_param": 9, "open-data": "None", }, + "gpt-4o-mini-2024-07-18": { + "name": "GPT-4o-mini-2024-07-18", + "link": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/", + "prompted": True, + "moe": False, + "size": None, + "act_param": None, + "open-data": "None", + }, + "Nexusflow/Athene-70B": { + "name": "Athene-70B", + "link": "https://huggingface.co/Nexusflow/Athene-70B", + "prompted": True, + "moe": False, + "size": 70, + "act_param": 70, + "open-data": "None", + }, + "NTQAI/Nxcode-CQ-7B-orpo": { + "name": "Nxcode-CQ-7B-Orpo", + "link": "https://huggingface.co/NTQAI/Nxcode-CQ-7B-orpo", + "prompted": True, + "moe": False, + "size": 7, + "act_param": 7, + "open-data": "None", + }, + "migtissera/Llama-3-70B-Synthia-v3.5": { + "name": "Llama-3-70B-Synthia-v3.5", + "link": "https://huggingface.co/migtissera/Llama-3-70B-Synthia-v3.5", + "prompted": True, + "moe": False, + "size": 70, + "act_param": 70, + "open-data": "None", + }, + "migtissera/Tess-v2.5.2-Qwen2-72B": { + "name": "Tess-v2.5.2-Qwen2-72B", + "link": "https://huggingface.co/migtissera/Tess-v2.5.2-Qwen2-72B", + "prompted": True, + "moe": False, + "size": 72, + "act_param": 72, + "open-data": "None", + }, + "WhiteRabbitNeo/WhiteRabbitNeo-33B-v1.5": { + "name": "WhiteRabbitNeo-33B-v1.5", + "link": "https://huggingface.co/WhiteRabbitNeo/WhiteRabbitNeo-33B-v1.5", + "prompted": True, + "moe": False, + "size": 33, + "act_param": 33, + "open-data": "None", + }, + "mistral-large-2407": { + "name": "Mistral-Large-Instruct-2407", + "link": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407", + "prompted": True, + "moe": False, + "size": 123, + "act_param": 123, + "open-data": "None", + }, + "meta-llama/Meta-Llama-3.1-8B-Instruct": { + "name": "Llama-3.1-8B-Instruct", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct", + "prompted": True, + "moe": False, + "size": 8, + "act_param": 8, + "open-data": "None", + }, + "meta-llama/Meta-Llama-3.1-70B-Instruct": { + "name": "Llama-3.1-70B-Instruct", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct", + "prompted": True, + "moe": False, + "size": 70, + "act_param": 70, + "open-data": "None", + }, + "meta--llama-3.1-405b-instruct": { + "name": "Llama-3.1-405B-Instruct", + "link": "https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct", + "prompted": True, + "moe": False, + "size": 405, + "act_param": 405, + "open-data": "None", + }, + "deepseek-coder-20240724": { + "name": "DeepSeek-Coder-V2-Instruct (2024-07-24)", + "link": "https://www.deepseek.com/", + "prompted": True, + "moe": True, + "size": 236, + "act_param": 21, + "open-data": "None", + }, } \ No newline at end of file diff --git a/bigcodebench/eval/__init__.py b/bigcodebench/eval/__init__.py index 1a8fcfc..3596f53 100644 --- a/bigcodebench/eval/__init__.py +++ b/bigcodebench/eval/__init__.py @@ -178,8 +178,8 @@ def untrusted_check( min_time_limit: float = 10, gt_time_limit: float = 60 ) -> Tuple[str, np.ndarray]: - time_limit = max(min_time_limit, gt_time_limit) - timeout = max(os.getenv("BIGCODEBENCH_TIMEOUT_PER_TASK", TIMEOUT_LIMIT), time_limit) + 1 + min_time_limit = max(min_time_limit, gt_time_limit) + timeout = max(os.getenv("BIGCODEBENCH_TIMEOUT_PER_TASK", TIMEOUT_LIMIT), min_time_limit) + 1 # shared memory objects stat = Value("i", _UNKNOWN) manager = Manager() diff --git a/bigcodebench/eval/utils.py b/bigcodebench/eval/utils.py index 844d2ea..82b8085 100644 --- a/bigcodebench/eval/utils.py +++ b/bigcodebench/eval/utils.py @@ -29,6 +29,7 @@ import tempfile import subprocess import multiprocessing +import time from typing import Optional TIMEOUT_LIMIT=240.0 @@ -141,7 +142,7 @@ def safe_kill(pid, sig): else: print(f"Prevented attempt to kill PID {pid} with signal {sig}") except ProcessLookupError: - print(f"Process {pid} does not exist.") + pass def safe_killpg(pgid, sig): if pgid == current_pgid or pgid in {os.getpgid(pid) for pid in child_pids}: @@ -221,7 +222,22 @@ def safe_exec(*args, **kwargs): try: yield finally: - # Restore original functions after the block + for pid in child_pids: + try: + os.kill(pid, signal.SIGTERM) + for _ in range(10): + time.sleep(0.1) + try: + os.kill(pid, 0) + except ProcessLookupError: + break + else: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + pass + except Exception as e: + print(f"Error handling process {pid}: {e}") + os.kill = original_kill os.killpg = original_killpg os.system = original_system diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py index 8cc91d8..61e2a43 100644 --- a/bigcodebench/evaluate.py +++ b/bigcodebench/evaluate.py @@ -34,7 +34,7 @@ Result = Tuple[str, List[bool]] -def get_groundtruth(n_workers, problems, hashcode, check_gt_only, max_as_limit, max_data_limit, max_stack_limit): +def get_groundtruth(n_workers, problems, hashcode, check_gt_only, max_as_limit, max_data_limit, max_stack_limit, min_time_limit): cache_file = os.path.join(CACHE_DIR, f"{hashcode}.pkl") if os.path.exists(cache_file): if check_gt_only: @@ -60,7 +60,8 @@ def get_groundtruth(n_workers, problems, hashcode, check_gt_only, max_as_limit, problem["task_id"], max_as_limit, max_data_limit, - max_stack_limit + max_stack_limit, + min_time_limit, ) futures.append(executor.submit(trusted_check, *args)) @@ -129,11 +130,12 @@ def evaluate(flags): dataset_hash = get_bigcodebench_hash(subset=flags.subset) if not flags.no_gt: - expected_time = get_groundtruth(n_workers, problems, dataset_hash, flags.check_gt_only, flags.max_as_limit, flags.max_data_limit, flags.max_stack_limit) + expected_time = get_groundtruth(n_workers, problems, dataset_hash, flags.check_gt_only, flags.max_as_limit, flags.max_data_limit, flags.max_stack_limit, flags.min_time_limit) else: expected_time = {task_id: None for task_id in problems} gt_pass_rate = np.mean([1 if v is not None else 0 for k, v in expected_time.items() if k in problems]) + failed_tasks = [k for k, v in expected_time.items() if v is None and k in problems] if os.path.isfile(result_path): print(f"Load from previous results from {result_path}") @@ -148,6 +150,10 @@ def evaluate(flags): cprint(f"Groundtruth pass rate: {gt_pass_rate:.3f}", "green") else: cprint(f"Groundtruth pass rate: {gt_pass_rate:.3f}\nPlease be cautious!", "red") + + if len(failed_tasks) > 0: + cprint(f"Failed tasks: {failed_tasks}", "red") + return results = { @@ -259,6 +265,9 @@ def stucking_checker(): cprint(f"Groundtruth pass rate: {gt_pass_rate:.3f}", "green") else: cprint(f"Groundtruth pass rate: {gt_pass_rate:.3f}\nPlease be cautious!", "red") + + if len(failed_tasks) > 0: + cprint(f"Failed tasks: {failed_tasks}", "red") for k, v in pass_at_k.items(): cprint(f"{k}:\t{v:.3f}", "green") @@ -281,33 +290,34 @@ def stucking_checker(): if not os.path.isfile(result_path): with open(result_path, "w") as f: json.dump(results, f, indent=2) - - pass_at_k_path = result_path.replace("_eval_results.json", "_pass_at_k.json") - pass_at_k["model"] = os.path.basename(flags.samples).split("--bigcodebench-")[0] - pass_at_k["calibrated"] = "sanitized-calibrated" in flags.samples - pass_at_k["subset"] = flags.subset - - def save_pass_at_k(): - with open(pass_at_k_path, "w") as f: - json.dump(pass_at_k, f, indent=2) - - if os.path.isfile(pass_at_k_path): - saved_pass_at_k = json.load(open(pass_at_k_path, "r")) - # compare saved_pass_at_k with pass_at_k - for k in saved_pass_at_k.keys(): - if pass_at_k[k] != saved_pass_at_k[k]: - cprint(f"Warning: {k} is different from the saved one", "yellow") + + if flags.save_pass_rate: + pass_at_k_path = result_path.replace("_eval_results.json", "_pass_at_k.json") + pass_at_k["model"] = os.path.basename(flags.samples).split("--bigcodebench-")[0] + pass_at_k["calibrated"] = "sanitized-calibrated" in flags.samples + pass_at_k["subset"] = flags.subset + + def save_pass_at_k(): + with open(pass_at_k_path, "w") as f: + json.dump(pass_at_k, f, indent=2) + + if os.path.isfile(pass_at_k_path): + saved_pass_at_k = json.load(open(pass_at_k_path, "r")) + # compare saved_pass_at_k with pass_at_k + for k in saved_pass_at_k.keys(): + if pass_at_k[k] != saved_pass_at_k[k]: + cprint(f"Warning: {k} is different from the saved one", "yellow") + + # ask user whether to save the pass@k + decision = "" + while decision.lower() not in ["y", "n"]: + print(f"Save pass@k to {pass_at_k_path}? [Y/N]") + decision = input() + if decision.lower() == "y": + save_pass_at_k() - # ask user whether to save the pass@k - decision = "" - while decision.lower() not in ["y", "n"]: - print(f"Save pass@k to {pass_at_k_path}? [Y/N]") - decision = input() - if decision.lower() == "y": + else: save_pass_at_k() - - else: - save_pass_at_k() def main(): @@ -315,18 +325,19 @@ def main(): parser.add_argument( "--split", required=True, type=str, choices=["complete", "instruct"] ) - parser.add_argument("--subset", default="full", type=str, choices=["full", "hard"]) + parser.add_argument("--subset", default="hard", type=str, choices=["full", "hard"]) parser.add_argument("--samples", required=True, type=str) + parser.add_argument("--save_pass_rate", action="store_true") parser.add_argument("--parallel", default=None, type=int) parser.add_argument("--min-time-limit", default=1, type=float) - parser.add_argument("--max-as-limit", default=128*1024, type=int) - parser.add_argument("--max-data-limit", default=4*1024, type=int) - parser.add_argument("--max-stack-limit", default=5, type=int) + parser.add_argument("--max-as-limit", default=30*1024, type=int) + parser.add_argument("--max-data-limit", default=30*1024, type=int) + parser.add_argument("--max-stack-limit", default=10, type=int) parser.add_argument( - "--check-gt-only", action="store_true", help="Check the groundtruth" + "--check-gt-only", action="store_true", help="Check the ground truth" ) parser.add_argument( - "--no-gt", action="store_true", help="Check the groundtruth" + "--no-gt", action="store_true", help="Skip the ground truth" ) args = parser.parse_args() diff --git a/bigcodebench/gen/util/__init__.py b/bigcodebench/gen/util/__init__.py index f8f6238..d8088ad 100644 --- a/bigcodebench/gen/util/__init__.py +++ b/bigcodebench/gen/util/__init__.py @@ -54,8 +54,10 @@ def trusted_exec(code, test_code, task_id, max_as_limit, max_data_limit, max_sta start = time.time() with safe_environment(), swallow_io(), time_limit(seconds=TIMEOUT_LIMIT): suite.run(test_result) - - if len(test_result.failures + test_result.errors) > 0: + + errors = test_result.failures + test_result.errors + if len(errors) > 0: + print(errors) times.value = -1 else: times.value = time.time() - start @@ -83,8 +85,9 @@ def trusted_check( max_as_limit: float, max_data_limit: float, max_stack_limit: float, + min_time_limit: float = 10, ): - timeout = os.getenv("BIGCODEBENCH_TIMEOUT_PER_TASK", TIMEOUT_LIMIT) + 1 + timeout = max(os.getenv("BIGCODEBENCH_TIMEOUT_PER_TASK", TIMEOUT_LIMIT), min_time_limit) + 1 # shared memory objects times = Value("d", -1) manager = Manager() diff --git a/bigcodebench/inspect.py b/bigcodebench/inspect.py index b06f5bd..50c7e01 100755 --- a/bigcodebench/inspect.py +++ b/bigcodebench/inspect.py @@ -14,15 +14,17 @@ def inspection(args): -- completion.py: prompt + completion -- execution_trace.txt: execution trace """ - path = os.path.join("inspect", args.eval_results.split("/")[-1].replace(".json", "")) + path = os.path.join(args.save_path, args.eval_results.split("/")[-1].replace(".json", "")) if args.in_place: shutil.rmtree(path, ignore_errors=True) if not os.path.exists(path): os.makedirs(path) - problems = get_bigcodebench() + problems = get_bigcodebench(subset=args.subset) eval_results = json.load(open(args.eval_results, "r")) for task_id, results in eval_results["eval"].items(): + if task_id not in problems: + continue if all(result["status"] == "pass" for result in results): continue task_path = os.path.join(path, task_id) @@ -30,7 +32,7 @@ def inspection(args): os.makedirs(task_path) task_id_data = problems[task_id] with open(os.path.join(task_path, "ground_truth.py"), "w") as f: - f.write(task_id_data[f"{args.subset}_prompt"] + "\n\n" + task_id_data["canonical_solution"]) + f.write(task_id_data[f"{args.split}_prompt"] + "\n\n" + task_id_data["canonical_solution"]) # write test with open(os.path.join(task_path, "test_case.py"), "w") as f: @@ -48,9 +50,13 @@ def inspection(args): f.write("="*50 + "\n") def main(): parser = argparse.ArgumentParser() - parser.add_argument("--eval-results", required=True, type=str) - parser.add_argument("--subset", required=True, type=str) - parser.add_argument("--in-place", action="store_true") + parser.add_argument("--eval_results", required=True, type=str) + parser.add_argument( + "--split", required=True, type=str, choices=["complete", "instruct"] + ) + parser.add_argument("--subset", default="hard", type=str, choices=["full", "hard"]) + parser.add_argument("--save_path", default="inspect", type=str) + parser.add_argument("--in_place", action="store_true") args = parser.parse_args() inspection(args) diff --git a/release_docker.sh b/release_docker.sh index 3c26ed1..3b9d104 100755 --- a/release_docker.sh +++ b/release_docker.sh @@ -28,4 +28,9 @@ docker push bigcodebench/bigcodebench-evaluate:latest docker build -f Docker/Generate.Dockerfile . -t bigcodebench/bigcodebench-generate:$version docker tag bigcodebench/bigcodebench-generate:$version bigcodebench/bigcodebench-generate:latest docker push bigcodebench/bigcodebench-generate:$version -docker push bigcodebench/bigcodebench-generate:latest \ No newline at end of file +docker push bigcodebench/bigcodebench-generate:latest + +docker build -f Docker/Gradio.Dockerfile . -t bigcodebench/bigcodebench-gradio:$version +docker tag bigcodebench/bigcodebench-gradio:$version bigcodebench/bigcodebench-gradio:latest +docker push bigcodebench/bigcodebench-gradio:$version +docker push bigcodebench/bigcodebench-gradio:latest \ No newline at end of file