0% found this document useful (0 votes)
58 views2 pages

Translation App Code

This document defines functions for translating text and documents between languages. It extracts names from text, translates text to a target language, and replaces names in translated documents. It then builds a Streamlit app with these functions to allow users to upload documents, select a target language, replace names, translate the document, and download the translated output.

Uploaded by

rakeshslrocky
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
58 views2 pages

Translation App Code

This document defines functions for translating text and documents between languages. It extracts names from text, translates text to a target language, and replaces names in translated documents. It then builds a Streamlit app with these functions to allow users to upload documents, select a target language, replace names, translate the document, and download the translated output.

Uploaded by

rakeshslrocky
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 2

import streamlit as st

from transformers import MarianMTModel, MarianTokenizer


from docx import Document
import os
import string

def extract_names(text):
"""
Extract potential names from the given text.

Args:
text (str): Input text to extract names from.

Returns:
set: Set of potential names.
"""
potential_names = set(word.strip(string.punctuation) for word in
text.split() if word.istitle())
return potential_names

def translate_to_language(text, target_language):


"""
Translate the given text to the target language using the specified
model.

Args:
text (str): Input text to translate.
target_language (str): Target language code.

Returns:
str: Translated text.
"""
if not text.strip():
return ""
else:
model_name = f"Helsinki-NLP/opus-mt-en-{target_language}"
model = MarianMTModel.from_pretrained(model_name)
tokenizer = MarianTokenizer.from_pretrained(model_name)

inputs = tokenizer.encode(text, return_tensors="pt")


translated_ids = model.generate(inputs, max_length=100)[0]

translated_text = tokenizer.decode(translated_ids,
skip_special_tokens=True)
return translated_text

def translate_and_replace_names(input_docx, target_language,


name_replacements):
"""
Translate and replace names in a DOCX file.

Args:
input_docx (BytesIO): Input DOCX file.
target_language (str): Target language code.
name_replacements (dict): Dictionary of name replacements.

Returns:
Document: Translated and replaced DOCX document.
"""
doc = Document(input_docx)
translated_doc = Document()
for para in doc.paragraphs:
translated_text = para.text
for old_name, new_name in name_replacements.items():
translated_text = translated_text.replace(old_name, new_name)
translated_text = translate_to_language(translated_text,
target_language)
translated_doc.add_paragraph(translated_text)

return translated_doc

# Streamlit UI
st.title("DOCX Translation App")

# Dropdown for target language selection


target_language = st.selectbox("Select Target Language", ["es", "fr"]) #
Add more languages as needed

# Upload input DOCX file


uploaded_file = st.file_uploader("Upload Input DOCX", type=["docx"])

if uploaded_file:
doc = Document(uploaded_file)
doc_text = "\n".join([para.text for para in doc.paragraphs])

# Extract and display potential names from input DOCX


potential_names = extract_names(doc_text)
st.sidebar.header("Potential Names from Demo Script")
selected_names = st.sidebar.multiselect("Select Names to Replace:",
potential_names)

# Dictionary to store name replacements


name_replacement_dict = {}
for selected_name in selected_names:
new_name = st.sidebar.text_input(f"Replace '{selected_name}'
with:", "")
if new_name:
name_replacement_dict[selected_name] = new_name

# Translate and download button


if st.button("Translate and Download"):
translated_doc = translate_and_replace_names(uploaded_file,
target_language, name_replacement_dict)
output_filename = "translated_output.docx"
translated_doc.save(output_filename)
st.success("Translation and Name Replacement complete!")

# Input field for custom filename


custom_filename = st.text_input("Enter the filename:",
"translated_output.docx")

# Download button
if st.button("Download Translated DOCX"):
os.rename(output_filename, custom_filename)
st.download_button("Download", custom_filename,
key='download_button')

You might also like