@inproceedings{veerendranath-etal-2022-c3po,
title = "{C3PO}: {A} Lightweight Copying Mechanism for Translating Pseudocode to Code",
author = "Veerendranath, Vishruth and
Masti, Vibha and
Anagani, Prajwal and
Hr, Mamatha",
editor = "Hanqi, Yan and
Zonghan, Yang and
Ruder, Sebastian and
Xiaojun, Wan",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing: Student Research Workshop",
month = nov,
year = "2022",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-srw.7",
pages = "47--53",
abstract = "Writing computer programs is a skill that remains inaccessible to most due to the barrier of programming language (PL) syntax. While large language models (LLMs) have been proposed to translate natural language pseudocode to PL code, they are costly in terms of data and compute. We propose a lightweight alternative to LLMs that exploits the property of code wherein most tokens can be simply copied from the pseudocode. We divide the problem into three phases: Copy, Generate, and Combine. In the Copy Phase, a binary classifier is employed to determine and mask the pseudocode tokens that can be directly copied into the code. In the Generate Phase, a Sequence-to-Sequence model is used to generate the masked PL code equivalent. In the Combine Phase, the generated sequence is combined with the tokens that the Copy Phase had masked. We show that our C3PO models achieve similar performance to non-C3PO models while reducing the computational cost of training as well as the vocabulary sizes.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="veerendranath-etal-2022-c3po">
<titleInfo>
<title>C3PO: A Lightweight Copying Mechanism for Translating Pseudocode to Code</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vishruth</namePart>
<namePart type="family">Veerendranath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vibha</namePart>
<namePart type="family">Masti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prajwal</namePart>
<namePart type="family">Anagani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mamatha</namePart>
<namePart type="family">Hr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yan</namePart>
<namePart type="family">Hanqi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Zonghan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Ruder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wan</namePart>
<namePart type="family">Xiaojun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Writing computer programs is a skill that remains inaccessible to most due to the barrier of programming language (PL) syntax. While large language models (LLMs) have been proposed to translate natural language pseudocode to PL code, they are costly in terms of data and compute. We propose a lightweight alternative to LLMs that exploits the property of code wherein most tokens can be simply copied from the pseudocode. We divide the problem into three phases: Copy, Generate, and Combine. In the Copy Phase, a binary classifier is employed to determine and mask the pseudocode tokens that can be directly copied into the code. In the Generate Phase, a Sequence-to-Sequence model is used to generate the masked PL code equivalent. In the Combine Phase, the generated sequence is combined with the tokens that the Copy Phase had masked. We show that our C3PO models achieve similar performance to non-C3PO models while reducing the computational cost of training as well as the vocabulary sizes.</abstract>
<identifier type="citekey">veerendranath-etal-2022-c3po</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-srw.7</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>47</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T C3PO: A Lightweight Copying Mechanism for Translating Pseudocode to Code
%A Veerendranath, Vishruth
%A Masti, Vibha
%A Anagani, Prajwal
%A Hr, Mamatha
%Y Hanqi, Yan
%Y Zonghan, Yang
%Y Ruder, Sebastian
%Y Xiaojun, Wan
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing: Student Research Workshop
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online
%F veerendranath-etal-2022-c3po
%X Writing computer programs is a skill that remains inaccessible to most due to the barrier of programming language (PL) syntax. While large language models (LLMs) have been proposed to translate natural language pseudocode to PL code, they are costly in terms of data and compute. We propose a lightweight alternative to LLMs that exploits the property of code wherein most tokens can be simply copied from the pseudocode. We divide the problem into three phases: Copy, Generate, and Combine. In the Copy Phase, a binary classifier is employed to determine and mask the pseudocode tokens that can be directly copied into the code. In the Generate Phase, a Sequence-to-Sequence model is used to generate the masked PL code equivalent. In the Combine Phase, the generated sequence is combined with the tokens that the Copy Phase had masked. We show that our C3PO models achieve similar performance to non-C3PO models while reducing the computational cost of training as well as the vocabulary sizes.
%U https://aclanthology.org/2022.aacl-srw.7
%P 47-53
Markdown (Informal)
[C3PO: A Lightweight Copying Mechanism for Translating Pseudocode to Code](https://aclanthology.org/2022.aacl-srw.7) (Veerendranath et al., AACL-IJCNLP 2022)
ACL
- Vishruth Veerendranath, Vibha Masti, Prajwal Anagani, and Mamatha Hr. 2022. C3PO: A Lightweight Copying Mechanism for Translating Pseudocode to Code. In Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing: Student Research Workshop, pages 47–53, Online. Association for Computational Linguistics.