0% found this document useful (0 votes)
35 views2 pages

Translation App Code

Download as docx, pdf, or txt
Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1/ 2

import streamlit as st

from transformers import MarianMTModel, MarianTokenizer


from docx import Document
import os
import string

def extract_names(text):
"""
Extract potential names from the given text.

Args:
text (str): Input text to extract names from.

Returns:
set: Set of potential names.
"""
potential_names = set(word.strip(string.punctuation) for word in
text.split() if word.istitle())
return potential_names

def translate_to_language(text, target_language):


"""
Translate the given text to the target language using the specified
model.

Args:
text (str): Input text to translate.
target_language (str): Target language code.

Returns:
str: Translated text.
"""
if not text.strip():
return ""
else:
model_name = f"Helsinki-NLP/opus-mt-en-{target_language}"
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

inputs = tokenizer.encode(text, return_tensors="pt")


translated_ids = model.generate(inputs, max_length=100)[0]

translated_text = tokenizer.decode(translated_ids,
skip_special_tokens=True)
return translated_text

def translate_and_replace_names(input_docx, target_language,


name_replacements):
"""
Translate and replace names in a DOCX file.

Args:
input_docx (BytesIO): Input DOCX file.
target_language (str): Target language code.
name_replacements (dict): Dictionary of name replacements.

Returns:
Document: Translated and replaced DOCX document.
"""
doc = Document(input_docx)
translated_doc = Document()
for para in doc.paragraphs:
translated_text = para.text
for old_name, new_name in name_replacements.items():
translated_text = translated_text.replace(old_name, new_name)
translated_text = translate_to_language(translated_text,
target_language)
translated_doc.add_paragraph(translated_text)

return translated_doc

# Streamlit UI
st.title("DOCX Translation App")

# Dropdown for target language selection


target_language = st.selectbox("Select Target Language", ["es", "fr"]) #
Add more languages as needed

# Upload input DOCX file


uploaded_file = st.file_uploader("Upload Input DOCX", type=["docx"])

if uploaded_file:
doc = Document(uploaded_file)
doc_text = "\n".join([para.text for para in doc.paragraphs])

# Extract and display potential names from input DOCX


potential_names = extract_names(doc_text)
st.sidebar.header("Potential Names from Demo Script")
selected_names = st.sidebar.multiselect("Select Names to Replace:",
potential_names)

# Dictionary to store name replacements


name_replacement_dict = {}
for selected_name in selected_names:
new_name = st.sidebar.text_input(f"Replace '{selected_name}'
with:", "")
if new_name:
name_replacement_dict[selected_name] = new_name

# Translate and download button


if st.button("Translate and Download"):
translated_doc = translate_and_replace_names(uploaded_file,
target_language, name_replacement_dict)
output_filename = "translated_output.docx"
translated_doc.save(output_filename)
st.success("Translation and Name Replacement complete!")

# Input field for custom filename


custom_filename = st.text_input("Enter the filename:",
"translated_output.docx")

# Download button
if st.button("Download Translated DOCX"):
os.rename(output_filename, custom_filename)
st.download_button("Download", custom_filename,
key='download_button')

You might also like