Vietnamese Text Normalizer

Normalize Vietnamese text using soe-vinorm (Client-side with PyScript)

packages = ["micropip"] [[runtimes]] src = "https://cdn.jsdelivr.net/pyodide/v0.24.1/full/pyodide.js" name = "pyodide-0.24.1" lang = "python" import asyncio import json from js import document, console # Global state normalizer = None setup_complete = False setup_in_progress = False def update_status(message): status_elem = document.getElementById("statusMessage") if status_elem: status_elem.textContent = message console.log(f"Status: {message}") async def setup_packages(): global normalizer, setup_complete, setup_in_progress if setup_complete: return True if setup_in_progress: # Wait for setup to complete while setup_in_progress: await asyncio.sleep(0.1) return setup_complete setup_in_progress = True update_status("Installing Python packages...") try: import micropip update_status("Installing soe-vinorm and dependencies (this may take a while)...") # Try to install packages # Note: onnxruntime might not be available in Pyodide packages_to_install = [ "soe-vinorm", ] await micropip.install(packages_to_install) update_status("Packages installed! Initializing normalizer...") from soe_vinorm import SoeNormalizer normalizer = SoeNormalizer() setup_complete = True setup_in_progress = False update_status("Ready!") return True except Exception as e: setup_in_progress = False error_msg = f"Failed to setup: {str(e)}" update_status(error_msg) console.error(error_msg) raise Exception(f"Package installation failed: {str(e)}") def normalize_text(text): global normalizer if normalizer is None: raise Exception("Normalizer not initialized. Please wait for packages to load.") if not text or not text.strip(): raise Exception("Text must be non-empty") return normalizer.normalize(text) async def handle_submit(event): event.preventDefault() input_elem = document.getElementById("inputText") result_container = document.getElementById("resultContainer") error_container = document.getElementById("errorContainer") loading_container = document.getElementById("loadingContainer") result_text = document.getElementById("resultText") error_message = document.getElementById("errorMessage") submit_btn = document.getElementById("submitBtn") loading_msg = document.getElementById("loadingMessage") input_text = input_elem.value.strip() if not input_text: error_message.textContent = "Please enter some text to normalize." error_container.style.display = "block" result_container.style.display = "none" return # Hide previous results result_container.style.display = "none" error_container.style.display = "none" loading_container.style.display = "block" submit_btn.disabled = True loading_msg.textContent = "Processing..." try: # Initialize if needed if not setup_complete: loading_msg.textContent = "Loading Python packages (first time only)..." await setup_packages() loading_msg.textContent = "Normalizing text..." # Normalize normalized = normalize_text(input_text) # Show result result_text.textContent = normalized result_container.style.display = "block" loading_container.style.display = "none" error_container.style.display = "none" except Exception as e: error_msg = str(e) error_message.textContent = f"Error: {error_msg}" error_container.style.display = "block" result_container.style.display = "none" loading_container.style.display = "none" console.error(f"Normalization error: {error_msg}") finally: submit_btn.disabled = False # Initialize on page load async def init(): update_status("Initializing PyScript...") # Pre-load packages in background try: await setup_packages() update_status("Ready to normalize text!") except Exception as e: update_status(f"Initialization error: {str(e)}") console.error(f"Init error: {str(e)}") # Attach event listener form = document.getElementById("normalizeForm") form.addEventListener("submit", lambda e: asyncio.ensure_future(handle_submit(e))) # Start initialization asyncio.ensure_future(init())