@inproceedings{awasthi-etal-2023-bootstrapping,
title = "Bootstrapping Multilingual Semantic Parsers using Large Language Models",
author = "Awasthi, Abhijeet and
Gupta, Nitish and
Samanta, Bidisha and
Dave, Shachi and
Sarawagi, Sunita and
Talukdar, Partha",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-main.180/",
doi = "10.18653/v1/2023.eacl-main.180",
pages = "2455--2467",
abstract = "Despite cross-lingual generalization demonstrated by pre-trained multilingual models, the translate-train paradigm of transferring English datasets across multiple languages remains to be a key mechanism for training task-specific multilingual models. However, for many low-resource languages, the availability of a reliable translation service entails significant amounts of costly human-annotated translation pairs. Further, translation services may continue to be brittle due to domain mismatch between task-specific input text and general-purpose text used for training translation models. For multilingual semantic parsing, we demonstrate the effectiveness and flexibility offered by large language models (LLMs) for translating English datasets into several languages via few-shot prompting. Through extensive comparisons on two public datasets, MTOP and MASSIVE, spanning 50 languages and several domains, we show that our method of translating data using LLMs outperforms a strong translate-train baseline on 41 out of 50 languages. We study the key design choices that enable more effective multilingual data translation via prompted LLMs."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="awasthi-etal-2023-bootstrapping">
<titleInfo>
<title>Bootstrapping Multilingual Semantic Parsers using Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhijeet</namePart>
<namePart type="family">Awasthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitish</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bidisha</namePart>
<namePart type="family">Samanta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shachi</namePart>
<namePart type="family">Dave</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunita</namePart>
<namePart type="family">Sarawagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Partha</namePart>
<namePart type="family">Talukdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Despite cross-lingual generalization demonstrated by pre-trained multilingual models, the translate-train paradigm of transferring English datasets across multiple languages remains to be a key mechanism for training task-specific multilingual models. However, for many low-resource languages, the availability of a reliable translation service entails significant amounts of costly human-annotated translation pairs. Further, translation services may continue to be brittle due to domain mismatch between task-specific input text and general-purpose text used for training translation models. For multilingual semantic parsing, we demonstrate the effectiveness and flexibility offered by large language models (LLMs) for translating English datasets into several languages via few-shot prompting. Through extensive comparisons on two public datasets, MTOP and MASSIVE, spanning 50 languages and several domains, we show that our method of translating data using LLMs outperforms a strong translate-train baseline on 41 out of 50 languages. We study the key design choices that enable more effective multilingual data translation via prompted LLMs.</abstract>
<identifier type="citekey">awasthi-etal-2023-bootstrapping</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-main.180</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-main.180/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>2455</start>
<end>2467</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bootstrapping Multilingual Semantic Parsers using Large Language Models
%A Awasthi, Abhijeet
%A Gupta, Nitish
%A Samanta, Bidisha
%A Dave, Shachi
%A Sarawagi, Sunita
%A Talukdar, Partha
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F awasthi-etal-2023-bootstrapping
%X Despite cross-lingual generalization demonstrated by pre-trained multilingual models, the translate-train paradigm of transferring English datasets across multiple languages remains to be a key mechanism for training task-specific multilingual models. However, for many low-resource languages, the availability of a reliable translation service entails significant amounts of costly human-annotated translation pairs. Further, translation services may continue to be brittle due to domain mismatch between task-specific input text and general-purpose text used for training translation models. For multilingual semantic parsing, we demonstrate the effectiveness and flexibility offered by large language models (LLMs) for translating English datasets into several languages via few-shot prompting. Through extensive comparisons on two public datasets, MTOP and MASSIVE, spanning 50 languages and several domains, we show that our method of translating data using LLMs outperforms a strong translate-train baseline on 41 out of 50 languages. We study the key design choices that enable more effective multilingual data translation via prompted LLMs.
%R 10.18653/v1/2023.eacl-main.180
%U https://aclanthology.org/2023.eacl-main.180/
%U https://doi.org/10.18653/v1/2023.eacl-main.180
%P 2455-2467
Markdown (Informal)
[Bootstrapping Multilingual Semantic Parsers using Large Language Models](https://aclanthology.org/2023.eacl-main.180/) (Awasthi et al., EACL 2023)
ACL
- Abhijeet Awasthi, Nitish Gupta, Bidisha Samanta, Shachi Dave, Sunita Sarawagi, and Partha Talukdar. 2023. Bootstrapping Multilingual Semantic Parsers using Large Language Models. In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, pages 2455–2467, Dubrovnik, Croatia. Association for Computational Linguistics.