Skip to content

Commit c108d0b

Browse files
committed
update papers
1 parent 8121f92 commit c108d0b

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

_bibliography/papers.bib

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
---
22
---
33
@misc{manh2024codemmlumultitaskbenchmarkassessing,
4+
abbr={ICLR'25},
45
title={CodeMMLU: A Multi-Task Benchmark for Assessing Code Understanding Capabilities of CodeLLMs},
56
author={Dung Nguyen Manh and Thang Phan Chau and Nam Le Hai and Thong T. Doan and Nam V. Nguyen and Quang Pham and Nghi D. Q. Bui},
67
abstract={Recent advancements in Code Large Language Models (CodeLLMs) have predominantly focused on open-ended code generation tasks, often neglecting the critical aspect of code understanding and comprehension. To bridge this gap, we present CodeMMLU, a comprehensive multiple-choice question-answer benchmark designed to evaluate the depth of software and code understanding in LLMs. CodeMMLU includes over 10,000 questions sourced from diverse domains, encompassing tasks such as code analysis, defect detection, and software engineering principles across multiple programming languages. Unlike traditional benchmarks, CodeMMLU assesses models’ ability to reason about code rather than merely generate it, providing deeper insights into their grasp of complex software concepts and systems. Our extensive evaluation reveals that even state-of-the-art models face significant challenges with CodeMMLU, highlighting deficiencies in comprehension beyond code generation. By underscoring the crucial relationship between code understanding and effective generation, CodeMMLU serves as a vital resource for advancing AI-assisted software development, ultimately aiming to create more reliable and capable coding assistants.},
@@ -15,10 +16,10 @@ @misc{manh2024codemmlumultitaskbenchmarkassessing
1516
code={https://github.com/FSoft-AI4Code/CodeMMLU},
1617
selected={true},
1718
preview={codemmlu-preview.png},
18-
abbr={ArXiv.preprint},
1919
}
2020

2121
@misc{hai2024impactscontextsrepositorylevelcode,
22+
abbr={NAACL'25},
2223
title={On the Impacts of Contexts on Repository-Level Code Generation},
2324
author={Nam Le Hai and Dung Manh Nguyen and Nghi D. Q. Bui},
2425
abstract={CodeLLMs have gained widespread adoption for code generation tasks, yet their capacity to handle repository-level code generation with complex contextual dependencies remains underexplored. Our work underscores the critical importance of leveraging repository-level contexts to generate executable and functionally correct code. We present RepoExec, a novel benchmark designed to evaluate repository-level code generation, with a focus on three key aspects: executability, functional correctness through comprehensive test case generation, and accurate utilization of cross-file contexts. Our study examines a controlled scenario where developers specify essential code dependencies (contexts), challenging models to integrate them effectively. Additionally, we introduce an instruction-tuned dataset that enhances CodeLLMs' ability to leverage dependencies, along with a new metric, Dependency Invocation Rate (DIR), to quantify context utilization. Experimental results reveal that while pretrained LLMs demonstrate superior performance in terms of correctness, instruction-tuned models excel in context utilization and debugging capabilities. RepoExec offers a comprehensive evaluation framework for assessing code functionality and alignment with developer intent, thereby advancing the development of more reliable CodeLLMs for real-world applications. The dataset and source code are available at this https URL.},
@@ -29,16 +30,16 @@ @misc{hai2024impactscontextsrepositorylevelcode
2930
pdf={2406.11927v3.pdf},
3031
url={https://arxiv.org/abs/2406.11927},
3132
bibtex_show={true},
33+
selected={true},
3234
arxiv={2406.11927},
33-
abbr={ArXiv.preprint},
3435
code={https://github.com/FSoft-AI4Code/RepoExec}
3536
}
3637

3738
@misc{manh2023vault,
3839
abbr={EMNLP'23},
3940
title={The Vault: A Comprehensive Multilingual Dataset for Advancing Code Understanding and Generation},
40-
abstract={We present The Vault, an open-source, large-scale code-text dataset designed to enhance the training of code-focused large language models (LLMs). Existing open-source datasets for training code-based LLMs often face challenges in terms of size, quality (due to noisy signals), and format (only containing code function and text explanation pairings). The Vault overcomes these limitations by providing 40 million code-text pairs across 10 popular programming languages, thorough cleaning for 10+ prevalent issues, and various levels of code-text pairings, including class, function, and line levels. Researchers and practitioners can utilize The Vault for training diverse code-focused LLMs or incorporate the provided data cleaning methods and scripts to improve their datasets. By employing The Vault as the training dataset for code-centric LLMs, we anticipate significant advancements in code understanding and generation tasks, fostering progress in both artificial intelligence research and software development practices.},
4141
author={Dung Nguyen Manh and Nam Le Hai and Anh T. V. Dau and Anh Minh Nguyen and Khanh Nghiem and Jin Guo and Nghi D. Q. Bui},
42+
abstract={We present The Vault, an open-source, large-scale code-text dataset designed to enhance the training of code-focused large language models (LLMs). Existing open-source datasets for training code-based LLMs often face challenges in terms of size, quality (due to noisy signals), and format (only containing code function and text explanation pairings). The Vault overcomes these limitations by providing 40 million code-text pairs across 10 popular programming languages, thorough cleaning for 10+ prevalent issues, and various levels of code-text pairings, including class, function, and line levels. Researchers and practitioners can utilize The Vault for training diverse code-focused LLMs or incorporate the provided data cleaning methods and scripts to improve their datasets. By employing The Vault as the training dataset for code-centric LLMs, we anticipate significant advancements in code understanding and generation tasks, fostering progress in both artificial intelligence research and software development practices.},
4243
year={2023},
4344
eprint={2305.06156},
4445
archivePrefix={arXiv},

_data/venues.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,11 @@
1212
"ArXiv.preprint":
1313
url: https://arxiv.org/
1414
color: "#A03232"
15+
16+
"ICLR’25":
17+
url: https://iclr.cc/Conferences/2025
18+
color: "#B7E892"
19+
20+
"NAACL’25":
21+
url: https://2025.naacl.org/
22+
color: "#B7E892"

0 commit comments

Comments
 (0)