From 853dc37c2570f7379084c5bd76d6f72381645fe4 Mon Sep 17 00:00:00 2001 From: Saswatsusmoy Date: Wed, 30 Apr 2025 16:18:27 +0530 Subject: [PATCH] Voice Based Banking - Implemented voice based banking with mock database and mock users. - Added test-cases and detailed documentation for setting up the project. - Removed previous implementation of selenium based automation for better compatibility (Will add regulated selenium based automation in future PRs) - Added a demo UI using HTML CSS JS for better usability and testing purposes --- voice_driven_banking/.gitignore | 49 ++ voice_driven_banking/Procfile | 1 + voice_driven_banking/README.md | 193 ++++- voice_driven_banking/RENDER_DEPLOYMENT.md | 48 ++ voice_driven_banking/app.py | 193 +++++ voice_driven_banking/config.json | 58 -- .../config/intent_patterns.json | 104 +++ voice_driven_banking/demo.md | 201 +++++ voice_driven_banking/environment.yml | 25 + .../models/intent_recognition.py | 267 +++++++ .../models/speech_recognition.py | 182 +++++ .../models/voice_biometrics.py | 104 +++ voice_driven_banking/render.yaml | 9 + voice_driven_banking/requirements.txt | 27 + voice_driven_banking/run.bat | 13 + voice_driven_banking/run.sh | 15 + voice_driven_banking/selenium_automation.py | 269 ------- .../services/banking_service.py | 187 +++++ voice_driven_banking/services/user_service.py | 104 +++ voice_driven_banking/static/css/style.css | 648 ++++++++++++++++ voice_driven_banking/static/css/toast.css | 61 ++ voice_driven_banking/static/js/app.js | 689 ++++++++++++++++++ voice_driven_banking/templates/index.html | 153 ++++ voice_driven_banking/test.py | 120 +++ voice_driven_banking/test_hindi_numbers.py | 33 + voice_driven_banking/test_hindi_transfers.py | 29 + voice_driven_banking/update_user_data.py | 147 ++++ .../voice_banking_test_suite.py | 209 ------ voice_driven_banking/voice_simulator.py | 108 --- 29 files changed, 3584 insertions(+), 662 deletions(-) create mode 100644 voice_driven_banking/.gitignore create mode 100644 voice_driven_banking/Procfile create mode 100644 voice_driven_banking/RENDER_DEPLOYMENT.md create mode 100644 voice_driven_banking/app.py delete mode 100644 voice_driven_banking/config.json create mode 100644 voice_driven_banking/config/intent_patterns.json create mode 100644 voice_driven_banking/demo.md create mode 100644 voice_driven_banking/environment.yml create mode 100644 voice_driven_banking/models/intent_recognition.py create mode 100644 voice_driven_banking/models/speech_recognition.py create mode 100644 voice_driven_banking/models/voice_biometrics.py create mode 100644 voice_driven_banking/render.yaml create mode 100644 voice_driven_banking/requirements.txt create mode 100644 voice_driven_banking/run.bat create mode 100644 voice_driven_banking/run.sh delete mode 100644 voice_driven_banking/selenium_automation.py create mode 100644 voice_driven_banking/services/banking_service.py create mode 100644 voice_driven_banking/services/user_service.py create mode 100644 voice_driven_banking/static/css/style.css create mode 100644 voice_driven_banking/static/css/toast.css create mode 100644 voice_driven_banking/static/js/app.js create mode 100644 voice_driven_banking/templates/index.html create mode 100644 voice_driven_banking/test.py create mode 100644 voice_driven_banking/test_hindi_numbers.py create mode 100644 voice_driven_banking/test_hindi_transfers.py create mode 100644 voice_driven_banking/update_user_data.py delete mode 100644 voice_driven_banking/voice_banking_test_suite.py delete mode 100644 voice_driven_banking/voice_simulator.py diff --git a/voice_driven_banking/.gitignore b/voice_driven_banking/.gitignore new file mode 100644 index 00000000..6af3edd9 --- /dev/null +++ b/voice_driven_banking/.gitignore @@ -0,0 +1,49 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual Environment +venv/ +ENV/ + +# Flask +instance/ +.webassets-cache + +# Project specific +uploads/ +data/ +voice_prints/ + +# IDEs and editors +.idea/ +.vscode/ +*.swp +*.sublime-workspace + +# OS specific files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db diff --git a/voice_driven_banking/Procfile b/voice_driven_banking/Procfile new file mode 100644 index 00000000..ca6e941c --- /dev/null +++ b/voice_driven_banking/Procfile @@ -0,0 +1 @@ +web: gunicorn app:app diff --git a/voice_driven_banking/README.md b/voice_driven_banking/README.md index 8febe6d3..ecbc9e24 100644 --- a/voice_driven_banking/README.md +++ b/voice_driven_banking/README.md @@ -1,28 +1,185 @@ -# Voice Banking Automated Testing Framework +# Voice-Driven Banking via LAMs -This framework provides automated testing of voice commands for banking interfaces using Selenium and a voice command simulator. +A proof-of-concept for a voice-based banking platform supporting multiple languages including low-resource languages and dialects. -## Features +## Project Overview -- Simulates voice commands with realistic variations and confidence scores -- Automates testing of banking interfaces through Selenium -- Configurable command sets and locators -- Detailed test reports with success rates and recognized text +This project demonstrates how Large Acoustic Models (LAMs) can be utilized to create an inclusive, voice-controlled banking system that works with languages that are typically underserved by mainstream voice recognition technologies. -## Prerequisites +### Key Features -- Python 3.7+ -- Chrome browser -- ChromeDriver matching your Chrome version -- Selenium and other required packages (see requirements.txt) +- **Multilingual Speech Recognition**: Supports English, Hindi, and Tamil (extendable to other languages) +- **Intent Recognition**: Identifies banking operations such as balance checks, money transfers, and transaction history requests +- **Voice Biometrics**: Simple voice authentication system for security +- **Banking Operations**: Basic simulation of banking functionality +- **Responsive UI**: Web interface for interacting with the voice banking system + +## Technology Stack + +- **Backend**: Python, Flask +- **Speech Recognition**: SpeechRecognition, Wav2Vec2 (for low-resource languages) +- **NLP**: spaCy, regex-based intent detection +- **Voice Biometrics**: Gaussian Mixture Models with MFCC features +- **Frontend**: HTML, CSS, JavaScript +- **Data Storage**: Simple JSON-based storage (for demonstration purposes) ## Installation -1. Clone this repository -2. Install dependencies: `pip install -r requirements.txt` -3. Update `config.json` with your test environment details +### Prerequisites + +- Miniconda or Anaconda +- A modern web browser + +### Setup with Miniconda + +1. Install Miniconda: + - [Download Miniconda](https://docs.conda.io/en/latest/miniconda.html) + - Follow the installation instructions for your operating system + +2. Clone the repository: + ``` + git clone + cd GSoC'25 Mifos POC + ``` + +3. Create and activate the conda environment: + ``` + conda env create -f environment.yml + conda activate voice-banking + ``` + +4. Download required language models for spaCy: + ``` + python -m spacy download en_core_web_sm + python -m spacy download xx_ent_wiki_sm + ``` + +5. (Alternative) If you prefer using pip instead of conda environment: + ``` + pip install -r requirements.txt + ``` + +## Running the Application + +1. Start the Flask server: + ``` + python app.py + ``` + +2. Open your web browser and navigate to: + ``` + http://127.0.0.1:5000 + ``` + +3. Register a new account or log in with the demo accounts: + - Username: `johndoe`, Password: `password123` (English) + - Username: `janesmith`, Password: `password456` (Hindi) + +## Usage Guide + +### Voice Commands + +The system supports the following banking operations: + +1. **Check Balance** + - English: "What is my balance?", "Check my account balance" + - Hindi: "मेरा बैलेंस क्या है", "मेरा बैलेंस दिखाएं" + - Tamil: "என் இருப்பு என்ன", "என் கணக்கு இருப்பு காட்டு" + +2. **Transfer Money** + - English: "Transfer 100 dollars to Jane", "Send 50 to John" + - Hindi: "जेन को 100 रुपये ट्रांसफर करें" + - Tamil: "ஜேனுக்கு 100 ரூபாய் அனுப்பு" + +3. **Transaction History** + - English: "Show my recent transactions", "Show my transaction history" + - Hindi: "मेरे हाल के लेनदेन दिखाएं" + - Tamil: "என் சமீபத்திய பரிவர்த்தனைகளைக் காட்டு" + +### Voice Authentication + +Upon first use, the system will automatically enroll your voice. For subsequent uses, it will authenticate your voice against the stored voiceprint. In this proof-of-concept, authentication thresholds are set low for ease of demonstration. + +## Project Structure + +``` +/ +├── app.py # Main Flask application +├── requirements.txt # Python dependencies +├── README.md # Project documentation +├── /config/ +│ └── intent_patterns.json # Language-specific patterns for intent recognition +├── /data/ +│ ├── mock_db.json # Mock banking data (auto-generated) +│ ├── users.json # User data (auto-generated) +│ └── /voice_prints/ # Voice authentication models (auto-generated) +├── /models/ +│ ├── speech_recognition.py # Speech-to-text conversion +│ ├── intent_recognition.py # Banking intent detection +│ └── voice_biometrics.py # Voice authentication logic +├── /services/ +│ ├── banking_service.py # Banking operations +│ └── user_service.py # User management +├── /static/ +│ ├── /css/ +│ │ └── style.css # Frontend styling +│ └── /js/ +│ └── app.js # Frontend logic +├── /templates/ +│ └── index.html # Main application page +└── /uploads/ + └── /audio/ # Temporary storage for audio files +``` + +## Technical Implementation + +### Speech Recognition + +- For English and well-supported languages, we use Google's Speech Recognition API +- For low-resource languages like Hindi and Tamil, we employ fine-tuned versions of Wav2Vec2 models + +### Intent Recognition + +Intent recognition uses a combination of: +- Regular expression pattern matching based on language-specific patterns +- Simple NLP processing using spaCy to handle variations + +### Voice Biometrics + +The voice authentication system: +- Extracts MFCC features from audio samples +- Uses Gaussian Mixture Models (GMMs) to create voice prints +- Computes likelihood scores for authentication decisions + +### Banking Simulation + +The banking functionality: +- Uses a simple JSON file as a mock database +- Supports account balance queries +- Processes simulated money transfers +- Returns transaction history + +## Limitations and Future Work + +This project is a proof-of-concept with the following limitations: + +1. **Speech Recognition**: Uses pre-trained models rather than custom-trained LAMs for low-resource languages +2. **Voice Authentication**: Uses basic GMM modeling rather than more sophisticated deep learning approaches +3. **Banking Integration**: Simulates banking operations rather than connecting to actual banking systems +4. **Security**: Implements basic security measures; a production system would need more robust security +5. **Offline Support**: Currently requires internet for some speech recognition; a full implementation would support offline operation + +Future work would focus on: +- Training custom LAMs for targeted low-resource languages +- Improving voice biometrics with anti-spoofing measures +- Adding more banking operations +- Creating native mobile applications +- Supporting offline operation for areas with limited connectivity + +## License + +[Specify license information] -## Usage +## Contact -Run the main test suite: -Or run individual components: +[Your contact information] diff --git a/voice_driven_banking/RENDER_DEPLOYMENT.md b/voice_driven_banking/RENDER_DEPLOYMENT.md new file mode 100644 index 00000000..b7d3dec7 --- /dev/null +++ b/voice_driven_banking/RENDER_DEPLOYMENT.md @@ -0,0 +1,48 @@ +# Deploying Voice Banking System on Render + +This guide explains how to deploy the Voice-Driven Banking System to Render.com. + +## Deployment Options + +### Option 1: Manual Deployment + +1. Create a new Web Service on Render +2. Connect your GitHub repository +3. Use the following settings: + - **Environment**: Python + - **Build Command**: `pip install -r requirements.txt` + - **Start Command**: `gunicorn app:app` +4. Click "Create Web Service" + +### Option 2: Blueprint Deployment + +1. Fork this repository to your GitHub account +2. Go to Render Dashboard: https://dashboard.render.com/ +3. Click "New" and select "Blueprint" +4. Connect your GitHub repository +5. Render will automatically detect the `render.yaml` file and set up the service + +## Environment Variables + +If your application uses any API keys or sensitive information, add them as environment variables in the Render dashboard: + +1. Go to your web service in the Render dashboard +2. Click on "Environment" tab +3. Add your environment variables (e.g., API keys) + +## Persistent Storage (Optional) + +If you need persistent storage for user data or voice prints: + +1. Create a Render Disk +2. Attach it to your service +3. Update your code to use the disk path + +## Custom Domain (Optional) + +To use a custom domain: + +1. Go to your web service in the Render dashboard +2. Click on "Settings" tab +3. Scroll to "Custom Domains" section +4. Add your domain and follow the DNS configuration instructions diff --git a/voice_driven_banking/app.py b/voice_driven_banking/app.py new file mode 100644 index 00000000..12e99e86 --- /dev/null +++ b/voice_driven_banking/app.py @@ -0,0 +1,193 @@ +from flask import Flask, request, jsonify, render_template +import os +from werkzeug.utils import secure_filename +import json +from models.speech_recognition import recognize_speech +from models.intent_recognition import extract_intent, preprocess_text +from models.voice_biometrics import authenticate_voice, enroll_user_voice +from services.banking_service import process_banking_request +from services.user_service import get_user_by_id, authenticate_user, create_user, update_user_language +from datetime import datetime + +app = Flask(__name__) +app.config['UPLOAD_FOLDER'] = 'uploads/audio' +os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True) + +# Ensure data directory exists +data_dir = os.path.join(os.path.dirname(__file__), 'data') +os.makedirs(data_dir, exist_ok=True) + +@app.route('/') +def index(): + return render_template('index.html) + +@app.route('/api/process-voice', methods=['POST']) +def process_voice(): + if 'audio' not in request.files: + return jsonify({'error': 'No audio file provided'}), 400 + + audio_file = request.files['audio'] + if not audio_file or not audio_file.filename: + return jsonify({'error': 'Invalid audio file'}), 400 + + user_id = request.form.get('user_id') + if not user_id: + return jsonify({'error': 'User ID is required'}), 400 + + language = request.form.get('language', 'en-US') # Default to English + + # Save audio file temporarily with a unique name to avoid conflicts + original_filename = secure_filename(audio_file.filename) + filename = f"{os.path.splitext(original_filename)[0]}_{os.urandom(4).hex()}{os.path.splitext(original_filename)[1]}" + filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) + + try: + audio_file.save(filepath) + + if not os.path.exists(filepath) or os.path.getsize(filepath) == 0: + return jsonify({'error': 'Failed to save audio file or file is empty'}), 500 + + # Step 1: Authenticate voice + auth_result = authenticate_voice(filepath, user_id) + if not auth_result['authenticated']: + return jsonify({'error': 'Voice authentication failed'}), 401 + + # Step 2: Speech recognition + text = recognize_speech(filepath, language) + + # Check if there was a speech recognition error + if text and text.startswith('Error processing speech:'): + return jsonify({'error': text}), 500 + + # Preprocess text for display + preprocessed_text = preprocess_text(text) + + # Step 3: Intent recognition + intent_data = extract_intent(text, language) + + # Step 4: Process banking request + user = get_user_by_id(user_id) + response = process_banking_request(intent_data, user) + + return jsonify({ + 'recognized_text': text, + 'preprocessed_text': preprocessed_text, + 'intent': intent_data, + 'response': response + }) + + except Exception as e: + return jsonify({'error': str(e)}), 500 + + finally: + # Clean up the temporary file + if os.path.exists(filepath): + try: + os.remove(filepath) + except Exception as e: + app.logger.error(f"Failed to remove temporary file {filepath}: {str(e)}") + +# New routes for user authentication and management +@app.route('/api/login', methods=['POST']) +def login(): + data = request.json + username = data.get('username') + password = data.get('password') + + if not username or not password: + return jsonify({'success': False, 'message': 'Username and password required'}), 400 + + user = authenticate_user(username, password) + + if user: + # Remove password hash before sending to client + user_data = {k: v for k, v in user.items() if k != 'password_hash'} + return jsonify({'success': True, 'user': user_data}) + else: + return jsonify({'success': False, 'message': 'Invalid username or password'}), 401 + +@app.route('/api/register', methods=['POST']) +def register(): + data = request.json + username = data.get('username') + password = data.get('password') + name = data.get('name') + email = data.get('email') + phone = data.get('phone') + language = data.get('language', 'en-US') + + if not all([username, password, name, email, phone]): + return jsonify({'success': False, 'message': 'All fields are required'}), 400 + + result = create_user(username, password, name, email, phone, language) + + if result['success']: + return jsonify({'success': True, 'user_id': result['user_id']}) + else: + return jsonify({'success': False, 'message': result['message']}), 400 + +@app.route('/api/update-language', methods=['POST']) +def update_language(): + data = request.json + user_id = data.get('user_id') + language = data.get('language') + + if not user_id or not language: + return jsonify({'success': False, 'message': 'User ID and language required'}), 400 + + result = update_user_language(user_id, language) + return jsonify(result) + +@app.route('/api/enroll-voice', methods=['POST']) +def enroll_voice(): + if 'audio' not in request.files: + return jsonify({'error': 'No audio file provided'}), 400 + + audio_file = request.files['audio'] + user_id = request.form.get('user_id') + + if not user_id: + return jsonify({'success': False, 'message': 'User ID required'}), 400 + + # Save audio file temporarily + filename = secure_filename(audio_file.filename) + filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) + audio_file.save(filepath) + + try: + result = enroll_user_voice(filepath, user_id) + return jsonify(result) + except Exception as e: + return jsonify({'success': False, 'message': str(e)}), 500 + finally: + # Clean up the temporary file + if os.path.exists(filepath): + os.remove(filepath) + +# Add a health check endpoint +@app.route('/api/health', methods=['GET']) +def health_check(): + """Health check endpoint to verify the server is running.""" + status = { + 'status': 'ok', + 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + 'services': { + 'voice_recognition': True, + 'database': os.path.exists(os.path.join(os.path.dirname(__file__), 'data')) + } + } + return jsonify(status) + +# Add error handlers +@app.errorhandler(404) +def not_found(error): + return jsonify({'error': 'Not found'}), 404 + +@app.errorhandler(500) +def server_error(error): + return jsonify({'error': 'Internal server error'}), 500 + +if __name__ == '__main__': + # Use environment variable PORT if available (Render will set this) + port = int(os.environ.get('PORT', 5000)) + app.run(host='0.0.0.0', port=port, debug=False) diff --git a/voice_driven_banking/config.json b/voice_driven_banking/config.json deleted file mode 100644 index eaa9b39a..00000000 --- a/voice_driven_banking/config.json +++ /dev/null @@ -1,58 +0,0 @@ - -{ - "banking_url": "https://demo.mifos.io", - "username": "mifos", - "password": "password", - "language": "en-US", - "confidence_range": [0.85, 0.98], - "download_dir": "test_results", - "test_variations": true, - "record_video": false, - "commands": [ - { - "name": "balance_inquiry", - "voice_command": "What is my account balance", - "variations": [ - "Check my balance", - "Show me my current balance", - "How much money do I have" - ], - "locators": { - "input_field": "//input[@id='voice-command-input']", - "submit_button": "//button[@id='voice-submit']", - "result_container": "//div[contains(@class, 'balance-display')]" - }, - "success_indicators": ["current balance", "available balance", "$"] - }, - { - "name": "fund_transfer", - "voice_command": "Transfer 50 dollars to John Doe", - "variations": [ - "Send 50 dollars to John", - "Pay John Doe 50 dollars", - "Move 50 dollars to John's account" - ], - "locators": { - "input_field": "//input[@id='voice-command-input']", - "submit_button": "//button[@id='voice-submit']", - "result_container": "//div[contains(@class, 'transfer-result')]" - }, - "success_indicators": ["transfer successful", "transaction completed", "confirmation"] - }, - { - "name": "transaction_history", - "voice_command": "Show my recent transactions", - "variations": [ - "List my recent transactions", - "Show transaction history", - "What are my recent transactions" - ], - "locators": { - "input_field": "//input[@id='voice-command-input']", - "submit_button": "//button[@id='voice-submit']", - "result_container": "//div[contains(@class, 'transaction-list')]" - }, - "success_indicators": ["transaction", "date", "amount"] - } - ] - } \ No newline at end of file diff --git a/voice_driven_banking/config/intent_patterns.json b/voice_driven_banking/config/intent_patterns.json new file mode 100644 index 00000000..e0451256 --- /dev/null +++ b/voice_driven_banking/config/intent_patterns.json @@ -0,0 +1,104 @@ +{ + "en-US": { + "check_balance": { + "patterns": [ + "what(('s)|( is)) my balance", + "what(('s)|( is)) my (bank|account) balance", + "check (my )?(bank |account )?balance", + "how much (money )?(do i have|is in my account)", + "account balance", + "bank balance", + "show me (my )?(account |bank )?balance", + "balance (inquiry|check|information)", + "tell me (my )?balance" + ] + }, + "transfer_money": { + "patterns": [ + "transfer .+ to .+", + "send .+ to .+", + "pay .+ to .+", + "i want to (transfer|send) .+ to .+" + ] + }, + "transaction_history": { + "patterns": [ + "show .+ transactions", + "recent transactions", + "(show|get|view) (my )?transaction history", + "what are my recent transactions", + "show me (my )?(recent )?transactions" + ] + } + }, + "hi-IN": { + "check_balance": { + "patterns": [ + "मेरा बैलेंस क्या है", + "बैलेंस चेक करें", + "मेरे खाते में कितना पैसा है", + "खाता शेष", + "मेरा बैलेंस दिखाएं", + "बैंक बैलेंस मेरा बताओ", + "मेरा बैंक बैलेंस बताओ", + "मेरा बैलेंस बताओ", + "बैंक बैलेंस क्या है", + "बैलेंस दिखाओ", + "मेरा बैंक बैलेंस क्या है", + "मुझे मेरा बैलेंस बताओ", + "बैलेंस कितना है" + ] + }, + "transfer_money": { + "patterns": [ + ".+ को .+ ट्रांसफर करें", + ".+ को .+ भेजें", + ".+ को .+ भुगतान करें", + "मुझे .+ को .+ ट्रांसफर करना है", + ".+ को .+ (भेजिए|भेजना|भेज दो|भेज दें)", + ".+ को .+ रुपया (भेजें|भेजिए|भेजना|भेज दो|भेज दें)", + ".+ को .+ रुपये (भेजें|भेजिए|भेजना|भेज दो|भेज दें)", + ".+ को (सौ|एक सौ) रुपये भेजिए", + ".+ को (सौ|एक सौ) रुपया भेजिए", + ".+ को .+ रुपये (भेज|ट्रांसफर कर) दीजिए" + ] + }, + "transaction_history": { + "patterns": [ + ".+ लेनदेन दिखाएं", + "हाल के लेनदेन", + "मेरा लेनदेन इतिहास दिखाएं", + "मेरे हालिया लेनदेन क्या हैं", + "मेरे लेनदेन दिखाएं" + ] + } + }, + "ta-IN": { + "check_balance": { + "patterns": [ + "என் இருப்பு என்ன", + "என் கணக்கு இருப்பு காட்டு", + "என் பணம் எவ்வளவு உள்ளது", + "கணக்கு இருப்பு", + "இருப்பு நிலை காட்டு" + ] + }, + "transfer_money": { + "patterns": [ + ".+ க்கு .+ அனுப்பு", + ".+ க்கு .+ பரிமாற்றம் செய்", + ".+ க்கு .+ செலுத்து", + "நான் .+ க்கு .+ அனுப்ப வேண்டும்" + ] + }, + "transaction_history": { + "patterns": [ + ".+ பரிவர்த்தனைகளைக் காட்டு", + "சமீபத்திய பரிவர்த்தனைகள்", + "என் பரிவர்த்தனை வரலாற்றைக் காட்டு", + "என் சமீபத்திய பரிவர்த்தனைகள் என்ன", + "என் பரிவர்த்தனைகளைக் காட்டு" + ] + } + } +} diff --git a/voice_driven_banking/demo.md b/voice_driven_banking/demo.md new file mode 100644 index 00000000..11004a45 --- /dev/null +++ b/voice_driven_banking/demo.md @@ -0,0 +1,201 @@ +# Voice-Driven Banking via LAMs: Demonstration Guide + +This guide provides a step-by-step demonstration of the Voice-Driven Banking proof-of-concept, showcasing its multilingual capabilities, voice recognition features, and banking functionalities. + +## Table of Contents +- [Setup](#setup) +- [Running the Application](#running-the-application) +- [User Authentication](#user-authentication) +- [Voice Banking Demo](#voice-banking-demo) +- [Multilingual Support](#multilingual-support) +- [Voice Biometrics](#voice-biometrics) +- [Troubleshooting](#troubleshooting) + +## Setup + +### Prerequisites +- Python 3.9 or higher +- Miniconda or Anaconda (recommended) +- Modern web browser with microphone access +- Internet connection + +### Installation + +1. **Clone the repository**: + ``` + git clone + cd GSoC'25 Mifos POC + ``` + +2. **Set up the environment** (choose one method): + + **Using Conda** (recommended): + ``` + conda env create -f environment.yml + conda activate voice-banking + ``` + + **Using pip**: + ``` + pip install -r requirements.txt + ``` + +3. **Download language models**: + ``` + python -m spacy download en_core_web_sm + python -m spacy download xx_ent_wiki_sm + ``` + +## Running the Application + +1. **Start the server**: + + On Windows: + ``` + run.bat + ``` + + On macOS/Linux: + ``` + chmod +x run.sh + ./run.sh + ``` + + Or manually: + ``` + python app.py + ``` + +2. **Access the web interface** by opening a browser and navigating to: + ``` + http://127.0.0.1:5000 + ``` + +3. **Allow microphone access** when prompted by your browser. + +## User Authentication + +### Demo Accounts +Use these pre-configured accounts for quick testing: + +| Username | Password | Language | +|----------|----------|----------| +| johndoe | password123 | English (en-US) | +| janesmith | password456 | Hindi (hi-IN) | +| jacobbrown | password789 | Tamil (ta-IN) | + +### Login Process +1. On the login screen, enter the username and password +2. Click the "Login" button +3. Upon successful login, you'll be redirected to the banking interface + +![Login Screen](images/login_screen.png) *(Screenshot description: Login form with username and password fields)* + +### Registration Process +1. Click "Register here" on the login screen +2. Fill in all the required details: + - Username (unique identifier) + - Password + - Full Name + - Email + - Phone + - Preferred Language (English, Hindi, or Tamil) +3. Click "Register" to create your account +4. After successful registration, you'll be redirected to the login page + +![Registration Screen](images/registration_screen.png) *(Screenshot description: Registration form with all fields)* + +## Voice Banking Demo + +### Checking Account Balance + +1. **Set up**: Log in to your account and ensure your microphone is working +2. **Select Language**: Choose your preferred language from the dropdown +3. **Start Recording**: Click the "Start Recording" button with the microphone icon +4. **Speak Command**: Say one of the following phrases (based on selected language): + - English: "What is my account balance?" or "Check my balance" + - Hindi: "मेरा बैलेंस क्या है" or "मेरा बैलेंस दिखाएं" + - Tamil: "என் இருப்பு என்ன" or "என் கணக்கு இருப்பு காட்டு" +5. **Wait for Processing**: The system will process your speech (this may take a few seconds) +6. **View Results**: You'll see: + - Recognized text (what the system heard) + - Preprocessed text (after normalization) + - Detected intent (Balance Check) + - Response with your account balances + +![Balance Check Demo](images/balance_check.png) *(Screenshot description: Interface showing recognized speech and account balance information)* + +### Transferring Money + +1. **Start Recording**: Click the microphone button +2. **Speak Command**: Say something like: + - English: "Transfer 100 dollars to Jane" or "Send 50 to John" + - Hindi: "जेन को 100 रुपये ट्रांसफर करें" + - Tamil: "ஜேனுக்கு 100 ரூபாய் அனுப்பு" +3. **View Results**: The system will display: + - The recognized command + - The transfer details (amount and recipient) + - Confirmation of the transaction + - Updated account balance + +![Money Transfer Demo](images/transfer_demo.png) *(Screenshot description: Interface showing transfer command recognition and confirmation)* + +### Viewing Transaction History + +1. **Start Recording**: Click the microphone button +2. **Speak Command**: Say something like: + - English: "Show my recent transactions" or "Show transaction history" + - Hindi: "मेरे हाल के लेनदेन दिखाएं" + - Tamil: "என் சமீபத்திய பரிவர்த்தனைகளைக் காட்டு" +3. **View Results**: The system will display: + - Your recent transactions in a table format + - Date, type, amount, and description for each transaction + +![Transaction History Demo](images/transaction_history.png) *(Screenshot description: Interface showing transaction history table)* + +## Multilingual Support + +### Changing Languages + +1. Select your preferred language from the dropdown menu in the banking interface +2. The example phrases will update to show commands in the selected language +3. Speak in the selected language for optimal recognition + +### Language Support Details + +| Language | Code | Speech Recognition | Banking Commands | +|----------|------|-------------------|------------------| +| English | en-US | Google Speech API | Full support | +| Hindi | hi-IN | Wav2Vec2 (Harveen Chadha) | Full support | +| Tamil | ta-IN | Wav2Vec2 (Harveen Chadha) | Full support | + +### Demonstration Video + +![Language Switching](images/language_demo.gif) *(GIF description: Demonstration of switching languages and speaking commands)* + +## Voice Biometrics + +The POC includes a simplified voice biometrics system for authentication. + +### First-time Use + +1. On first voice interaction, the system will automatically enroll your voice +2. A voice print is created and stored for future authentication + +### Authentication Process + +1. For subsequent voice commands, your voice is automatically verified +2. The authentication happens seamlessly before processing your commands +3. If authentication fails, you'll see an error message + +> **Note**: For the POC, authentication thresholds are set low to facilitate demonstrations. In a production environment, more strict thresholds would be applied. + +## Troubleshooting + +### Common Issues and Solutions + +#### Microphone Not Working +- Ensure your browser has permission to access the microphone +- Check if your microphone is working in other applications +- Try using a different browser (Chrome recommended) + diff --git a/voice_driven_banking/environment.yml b/voice_driven_banking/environment.yml new file mode 100644 index 00000000..60e26694 --- /dev/null +++ b/voice_driven_banking/environment.yml @@ -0,0 +1,25 @@ +name: voice-banking +channels: + - conda-forge + - pytorch + - defaults +dependencies: + - python=3.9 + - pip=22.0 + - flask=2.2.3 + - numpy=1.24.3 + - scikit-learn=1.2.2 + - pytorch=2.0.0 + - torchaudio=2.0.0 + - librosa=0.10.0 + - pydub=0.25.1 + - requests=2.28.2 + - ffmpeg=4.4 + - soundfile=0.12.1 + - pip: + - SpeechRecognition==3.10.0 + - transformers==4.28.1 + - python-dotenv==1.0.0 + - PyAudio==0.2.13 + - spacy==3.5.2 + - werkzeug==2.2.3 diff --git a/voice_driven_banking/models/intent_recognition.py b/voice_driven_banking/models/intent_recognition.py new file mode 100644 index 00000000..8baed314 --- /dev/null +++ b/voice_driven_banking/models/intent_recognition.py @@ -0,0 +1,267 @@ +import re +import json +from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer +import os +import spacy +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# Load intent configuration +INTENT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), '../config/intent_patterns.json') + +with open(INTENT_CONFIG_PATH, 'r', encoding='utf-8') as f: + INTENT_PATTERNS = json.load(f) + +# Supported languages with their models +LANGUAGE_MODELS = { + 'en-US': 'en_core_web_sm', + 'hi-IN': 'xx_ent_wiki_sm', # Spacy's multilingual model for Hindi + # Add more language models as needed +} + +# Load NLP models for each language +nlp_models = {} + +# Hindi number words mapping +HINDI_NUMBER_WORDS = { + 'एक': 1, 'दो': 2, 'तीन': 3, 'चार': 4, 'पांच': 5, 'छह': 6, 'सात': 7, 'आठ': 8, 'नौ': 9, 'दस': 10, + 'ग्यारह': 11, 'बारह': 12, 'तेरह': 13, 'चौदह': 14, 'पंद्रह': 15, 'सोलह': 16, 'सत्रह': 17, 'अठारह': 18, 'उन्नीस': 19, 'बीस': 20, + 'तीस': 30, 'चालीस': 40, 'पचास': 50, 'साठ': 60, 'सत्तर': 70, 'अस्सी': 80, 'नब्बे': 90, + 'सौ': 100, 'हजार': 1000, 'लाख': 100000, 'करोड़': 10000000 +} + +def preprocess_text(text): + """ + Preprocess text to remove any special formatting + like tags that might be added during speech recognition + """ + # Remove tags that might be present in speech recognition output + text = re.sub(r'', '', text) + # Remove any other potential artifacts + text = re.sub(r'', '', text) + return text + +def load_nlp_model(language): + """Load the appropriate NLP model for the language if not already loaded.""" + if language not in nlp_models: + model_name = LANGUAGE_MODELS.get(language, LANGUAGE_MODELS.get('en-US')) + try: + nlp_models[language] = spacy.load(model_name) + except OSError: + # If model isn't available, download it (not recommended in production) + spacy.cli.download(model_name) + nlp_models[language] = spacy.load(model_name) + return nlp_models[language] + +def extract_intent(text, language='en-US'): + """ + Extract banking intent from the recognized speech text. + Returns the intent type and relevant parameters. + """ + if not text: + logger.warning("Empty text provided for intent extraction") + return {'intent_type': 'unknown', 'parameters': {}} + + # Preprocess the text to remove any special formatting tags + text = preprocess_text(text) + + # Normalize text - lowercase and remove extra spaces + normalized_text = ' '.join(text.lower().split()) + + logger.info(f"Preprocessed text for intent matching: '{normalized_text}'") + + # Load the appropriate NLP model + try: + nlp = load_nlp_model(language) + + # Process the text + doc = nlp(normalized_text) + except Exception as e: + logger.error(f"Error processing text with NLP model: {str(e)}") + doc = None + + # Initialize intent data + intent_data = { + 'intent_type': 'unknown', + 'parameters': {} + } + + # Check for patterns in the text based on the language + language_patterns = INTENT_PATTERNS.get(language, INTENT_PATTERNS.get('en-US')) + + logger.info(f"Matching intent for: '{normalized_text}'") + + # Try pattern matching first + for intent, patterns in language_patterns.items(): + for pattern in patterns['patterns']: + # For non-English languages, try more flexible matching + if language != 'en-US': + # Count matching words instead of exact pattern for non-Latin script languages + pattern_words = set(pattern.lower().split()) + text_words = set(normalized_text.split()) + common_words = pattern_words.intersection(text_words) + + # If more than 50% of pattern words are in the text, consider it a match + if len(common_words) >= len(pattern_words) * 0.5: + logger.info(f"Flexible match for pattern '{pattern}' in language {language}") + intent_data['intent_type'] = intent + + # Extract parameters like amounts, accounts, etc. + if intent == 'transfer_money': + # Extract amount for all languages (numbers are usually the same) + amount_matches = re.findall(r'(\d+(?:\.\d+)?)', normalized_text) + if amount_matches: + intent_data['parameters']['amount'] = float(amount_matches[0]) + + # Extract Hindi number words (like सौ = 100) + if language == 'hi-IN': + extract_hindi_parameters(normalized_text, intent_data) + + return intent_data + + # Traditional regex pattern matching as fallback + if re.search(pattern, normalized_text): + logger.info(f"Matched pattern '{pattern}' for intent '{intent}'") + intent_data['intent_type'] = intent + + # Extract parameters like amounts, accounts, etc. + if intent == 'check_balance': + # No additional parameters needed + pass + + elif intent == 'transfer_money': + # Extract amount + amount_matches = re.findall(r'(\d+(?:\.\d+)?)', normalized_text) + if amount_matches: + intent_data['parameters']['amount'] = float(amount_matches[0]) + + # Extract Hindi number words and recipient for Hindi + if language == 'hi-IN': + extract_hindi_parameters(normalized_text, intent_data) + + # Extract recipient for English - simplified approach for demo + if language == 'en-US': + recipient_matches = re.findall(r'to\s+(\w+)', normalized_text) + if recipient_matches: + intent_data['parameters']['recipient'] = recipient_matches[0] + + elif intent == 'transaction_history': + # Extract time period if mentioned + if 'last month' in normalized_text: + intent_data['parameters']['period'] = 'last_month' + elif 'last week' in normalized_text: + intent_data['parameters']['period'] = 'last_week' + else: + intent_data['parameters']['period'] = 'recent' + + return intent_data + + # If no pattern matched, try keyword matching as fallback + if intent_data['intent_type'] == 'unknown' and doc is not None: + # Define keywords for different languages + keywords = { + 'en-US': { + 'check_balance': ['balance', 'money', 'account', 'bank', 'have', 'much'], + 'transfer_money': ['transfer', 'send', 'pay', 'give'], + 'transaction_history': ['transaction', 'history', 'recent', 'activity'] + }, + 'hi-IN': { + 'check_balance': ['बैलेंस', 'पैसा', 'खाता', 'बैंक', 'शेष', 'बताओ', 'दिखाओ', 'कितना'], + 'transfer_money': ['भेजो', 'ट्रांसफर', 'भुगतान', 'दो', 'भेजें', 'भेजिए', 'रुपया', 'रुपये', 'को'], + 'transaction_history': ['लेनदेन', 'इतिहास', 'हाल', 'गतिविधि'] + }, + 'ta-IN': { + 'check_balance': ['இருப்பு', 'பணம்', 'கணக்கு', 'வங்கி', 'காட்டு'], + 'transfer_money': ['அனுப்பு', 'பரிமாற்றம்', 'செலுத்து'], + 'transaction_history': ['பரிவர்த்தனை', 'வரலாறு', 'சமீபத்திய'] + } + } + + # Use the appropriate language keywords or default to English + lang_keywords = keywords.get(language, keywords['en-US']) + text_tokens = [token.text for token in doc] + + # Count keyword occurrences + intent_scores = {} + for intent, kw_list in lang_keywords.items(): + intent_scores[intent] = sum(1 for word in text_tokens if word in kw_list) + + # Determine intent by highest keyword match count + if any(intent_scores.values()): # Only if we found any keywords + max_intent = max(intent_scores, key=intent_scores.get) + if intent_scores[max_intent] > 0: + logger.info(f"Matched intent '{max_intent}' via keyword count: {intent_scores}") + intent_data['intent_type'] = max_intent + + # For transfer_money intent, try to extract parameters + if max_intent == 'transfer_money': + # Extract Hindi parameters if in Hindi + if language == 'hi-IN': + extract_hindi_parameters(normalized_text, intent_data) + + logger.info(f"Final detected intent: {intent_data['intent_type']}") + if 'parameters' in intent_data: + logger.info(f"Extracted parameters: {intent_data['parameters']}") + return intent_data + +def extract_hindi_parameters(text, intent_data): + """Extract amount and recipient from Hindi text""" + # Extract recipient by looking for words before "को" + ko_matches = re.findall(r'([\u0900-\u097F\w]+)\s+को', text) + if ko_matches: + intent_data['parameters']['recipient'] = ko_matches[0] + logger.info(f"Found Hindi recipient: {ko_matches[0]}") + + # Extract Hindi number words for amount - check for compound numbers + # First scan the text for all Hindi number words + found_numbers = [] + text_words = text.split() + for i, word in enumerate(text_words): + if word in HINDI_NUMBER_WORDS: + found_numbers.append((i, word, HINDI_NUMBER_WORDS[word])) + + if found_numbers: + # Process found numbers + if len(found_numbers) == 1: + # Single number word + intent_data['parameters']['amount'] = float(found_numbers[0][2]) + logger.info(f"Found Hindi number word: {found_numbers[0][1]} = {found_numbers[0][2]}") + else: + # Check for compound numbers like "दो सौ" (two hundred) + found_numbers.sort(key=lambda x: x[0]) # Sort by position in text + total = 0 + current_value = 0 + + for i, (pos, word, value) in enumerate(found_numbers): + if value >= 100: # For सौ (100), हजार (1000), etc. + if current_value == 0: + current_value = value + else: + current_value *= value + + if i == len(found_numbers)-1 or found_numbers[i+1][2] < 100: + total += current_value + current_value = 0 + else: + if i < len(found_numbers)-1 and found_numbers[i+1][2] >= 100: + current_value = value # Store for multiplication with next number + else: + total += value + + # Add any remaining value + if current_value > 0: + total += current_value + + intent_data['parameters']['amount'] = float(total) + number_words = ' '.join(word for _, word, _ in found_numbers) + logger.info(f"Found compound Hindi number: {number_words} = {total}") + + # If we couldn't extract the amount from number words, try digits + if 'amount' not in intent_data['parameters']: + amount_matches = re.findall(r'(\d+(?:\.\d+)?)', text) + if amount_matches: + intent_data['parameters']['amount'] = float(amount_matches[0]) + logger.info(f"Found numeric amount: {amount_matches[0]}") diff --git a/voice_driven_banking/models/speech_recognition.py b/voice_driven_banking/models/speech_recognition.py new file mode 100644 index 00000000..b78a7f97 --- /dev/null +++ b/voice_driven_banking/models/speech_recognition.py @@ -0,0 +1,182 @@ +import speech_recognition as sr +from pydub import AudioSegment +import os +import torch +import librosa +import numpy as np +from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor +import logging +import tempfile + +# Set up logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +# Dictionary mapping language codes to pretrained models +LANGUAGE_MODELS = { + 'en-US': 'Harveenchadha/vakyansh-wav2vec2-indian-english-enm-700', + 'hi-IN': 'Harveenchadha/vakyansh-wav2vec2-hindi-him-4200', + 'ta-IN': 'Harveenchadha/vakyansh-wav2vec2-tamil-tam-250', + 'default': 'facebook/wav2vec2-large-xlsr-53' # Multilingual model as fallback +} + +# Cache for loaded models to avoid reloading +_model_cache = {} + +def convert_audio_format(audio_path): + """ + Convert audio to WAV format if needed. + Returns the path to a WAV file or raises an exception if conversion fails. + """ + try: + # Check if file exists and has content + if not os.path.isfile(audio_path): + raise FileNotFoundError(f"Audio file not found: {audio_path}") + if os.path.getsize(audio_path) == 0: + raise ValueError(f"Audio file is empty: {audio_path}") + + # If it's already a WAV file, try to validate it + if audio_path.lower().endswith('.wav'): + try: + with sr.AudioFile(audio_path) as source: + # This will raise an exception if the file is not a valid WAV + sr.Recognizer().record(source) + return audio_path + except Exception as e: + logger.warning(f"Existing WAV file is not valid, will try to convert: {str(e)}") + # Continue to conversion + + # Create a unique filename in the same directory for the converted file + base_dir = os.path.dirname(audio_path) + base_name = os.path.splitext(os.path.basename(audio_path))[0] + wav_path = os.path.join(base_dir, f"{base_name}_converted_{os.urandom(4).hex()}.wav") + + # Try multiple methods to convert the audio + try: + # Method 1: Try using pydub with explicit format + file_ext = os.path.splitext(audio_path)[1].lower().replace('.', '') + if file_ext: + try: + audio = AudioSegment.from_file(audio_path, format=file_ext) + audio.export(wav_path, format='wav') + logger.info(f"Successfully converted audio using pydub with format {file_ext}") + return wav_path + except Exception as e1: + logger.warning(f"Failed to convert with explicit format {file_ext}: {str(e1)}") + + # Method 2: Let pydub guess the format + try: + audio = AudioSegment.from_file(audio_path) + audio.export(wav_path, format='wav') + logger.info("Successfully converted audio using pydub auto-detection") + return wav_path + except Exception as e2: + logger.warning(f"Failed to convert with pydub auto-detection: {str(e2)}") + + # Method 3: Try using ffmpeg directly if available + try: + import subprocess + logger.info("Attempting conversion with direct ffmpeg call") + result = subprocess.run( + ['ffmpeg', '-i', audio_path, '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', wav_path], + capture_output=True, text=True, check=False + ) + if os.path.exists(wav_path) and os.path.getsize(wav_path) > 0: + logger.info("Successfully converted audio using ffmpeg") + return wav_path + else: + logger.warning(f"FFMPEG conversion failed: {result.stderr}") + except Exception as e3: + logger.warning(f"Failed to convert with ffmpeg: {str(e3)}") + + # Method 4: Try using librosa + try: + y, sr = librosa.load(audio_path, sr=None) + import soundfile as sf + sf.write(wav_path, y, sr) + logger.info("Successfully converted audio using librosa") + return wav_path + except Exception as e4: + logger.warning(f"Failed to convert with librosa: {str(e4)}") + + # All methods failed + raise Exception("All conversion methods failed. Cannot process this audio format.") + + except Exception as e: + # Clean up failed conversions + if os.path.exists(wav_path): + try: + os.remove(wav_path) + except: + pass + raise e + + except Exception as e: + logger.error(f"Error in audio conversion: {str(e)}") + raise Exception(f"Audio conversion failed: {str(e)}") + +def get_model_and_processor(language): + """Get or load the model and processor for the specified language.""" + model_name = LANGUAGE_MODELS.get(language, LANGUAGE_MODELS['default']) + + if model_name == 'default': + return None, None + + if model_name in _model_cache: + return _model_cache[model_name] + + logger.info(f"Loading model {model_name} for language {language}") + processor = Wav2Vec2Processor.from_pretrained(model_name) + model = Wav2Vec2ForCTC.from_pretrained(model_name) + + # Cache the loaded model + _model_cache[model_name] = (processor, model) + return processor, model + +def recognize_speech(audio_path, language='en-US'): + """ + Recognize speech from audio file using appropriate model for the language. + """ + try: + wav_path = convert_audio_format(audio_path) + + # For English and other well-supported languages, use SpeechRecognition + if language == 'en-US' or language not in LANGUAGE_MODELS: + recognizer = sr.Recognizer() + with sr.AudioFile(wav_path) as source: + audio_data = recognizer.record(source) + try: + text = recognizer.recognize_google(audio_data, language=language) + return text + except sr.UnknownValueError: + return "Speech recognition could not understand audio" + except sr.RequestError: + return "Could not request results from speech recognition service" + + # For low-resource languages, use specialized models + else: + processor, model = get_model_and_processor(language) + + # Load and preprocess the audio + speech_array, sampling_rate = librosa.load(wav_path, sr=16000) + inputs = processor(speech_array, sampling_rate=16000, return_tensors="pt", padding=True) + + with torch.no_grad(): + logits = model(inputs.input_values).logits + + # Get predicted ids and convert to text + predicted_ids = torch.argmax(logits, dim=-1) + transcription = processor.batch_decode(predicted_ids) + + return transcription[0] + + except Exception as e: + logger.error(f"Error in speech recognition: {str(e)}") + return f"Error processing speech: {str(e)}" + finally: + # Clean up temporary converted file if it's different from the original + if 'wav_path' in locals() and wav_path != audio_path and os.path.exists(wav_path): + try: + os.remove(wav_path) + except: + pass diff --git a/voice_driven_banking/models/voice_biometrics.py b/voice_driven_banking/models/voice_biometrics.py new file mode 100644 index 00000000..0e24ff21 --- /dev/null +++ b/voice_driven_banking/models/voice_biometrics.py @@ -0,0 +1,104 @@ +import librosa +import numpy as np +from sklearn.mixture import GaussianMixture +import os +import pickle +import json +from services.user_service import get_user_by_id + +# Path to store voice prints +VOICE_PRINTS_DIR = os.path.join(os.path.dirname(__file__), '../data/voice_prints') +os.makedirs(VOICE_PRINTS_DIR, exist_ok=True) + +def extract_voice_features(audio_path): + """ + Extract MFCC features from an audio file for voice biometrics. + """ + # Load the audio file + y, sr = librosa.load(audio_path, sr=None) + + # Extract MFCCs (Mel-Frequency Cepstral Coefficients) + mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) + + # Normalize features + mfccs = (mfccs - np.mean(mfccs, axis=1, keepdims=True)) / np.std(mfccs, axis=1, keepdims=True) + + return mfccs.T # Transpose for sklearn compatibility + +def get_voice_print_path(user_id): + """Get the path to a user's voice print file.""" + return os.path.join(VOICE_PRINTS_DIR, f"user_{user_id}_voiceprint.pkl") + +def enroll_user_voice(audio_path, user_id): + """ + Enroll a new user by creating a voice print from their audio sample. + In a real system, multiple samples would be used. + """ + features = extract_voice_features(audio_path) + + # Train a Gaussian Mixture Model on the user's voice + gmm = GaussianMixture(n_components=16, covariance_type='diag', max_iter=200) + gmm.fit(features) + + # Save the model + with open(get_voice_print_path(user_id), 'wb') as f: + pickle.dump(gmm, f) + + return {'success': True, 'message': 'Voice enrolled successfully'} + +def authenticate_voice(audio_path, user_id, threshold=None): + """ + Authenticate a user based on their voice. + Returns True if authenticated, False otherwise. + + In a POC, this is simplified. A real system would: + - Have more sophisticated models + - Use multiple enrollment samples + - Have better thresholding + - Include anti-spoofing measures + """ + # Check if we have a voice print for this user + voice_print_path = get_voice_print_path(user_id) + + if not os.path.exists(voice_print_path): + # For demo purposes, if no voice print exists, create one + # In a real system, this would return an error + return enroll_user_voice(audio_path, user_id) + + # Load the user's voice model + try: + with open(voice_print_path, 'rb') as f: + gmm = pickle.load(f) + except (pickle.PickleError, IOError) as e: + return { + 'authenticated': False, + 'error': f"Error loading voice model: {str(e)}", + 'user_id': user_id + } + + # Extract features from the provided audio + try: + features = extract_voice_features(audio_path) + except Exception as e: + return { + 'authenticated': False, + 'error': f"Error extracting voice features: {str(e)}", + 'user_id': user_id + } + + # Calculate log likelihood + score = gmm.score(features) + + # Use adaptive thresholding - for demo we're setting a very permissive threshold + if threshold is None: + # This is very permissive for the POC, adjust based on your testing + threshold = -80 # Default value based on typical log-likelihood scores + + authenticated = score > threshold + + return { + 'authenticated': authenticated, + 'confidence': score, + 'threshold': threshold, + 'user_id': user_id + } diff --git a/voice_driven_banking/render.yaml b/voice_driven_banking/render.yaml new file mode 100644 index 00000000..b188846d --- /dev/null +++ b/voice_driven_banking/render.yaml @@ -0,0 +1,9 @@ +services: + - type: web + name: voice-banking + env: python + buildCommand: pip install -r requirements.txt + startCommand: gunicorn app:app + envVars: + - key: PYTHON_VERSION + value: 3.9 diff --git a/voice_driven_banking/requirements.txt b/voice_driven_banking/requirements.txt new file mode 100644 index 00000000..5074bf1c --- /dev/null +++ b/voice_driven_banking/requirements.txt @@ -0,0 +1,27 @@ +# Core dependencies first +numpy==2.1.3 +python-dotenv==1.1.0 +requests==2.32.3 + +# Audio processing libraries +soundfile==0.13.1 +PyAudio==0.2.14 +pydub==0.25.1 +ffmpeg-python==0.2.0 +librosa==0.11.0 + +# Machine learning and NLP libraries +scikit-learn==1.6.1 +torch==2.6.0 +torchaudio==2.6.0 +spaCy==3.8.4 +transformers==4.50.2 +SpeechRecognition==3.14.2 + +# Web frameworks and servers +werkzeug==2.2.3 +flask +flask-cors==5.0.1 +fastapi==0.115.11 +uvicorn==0.34.0 +gunicorn diff --git a/voice_driven_banking/run.bat b/voice_driven_banking/run.bat new file mode 100644 index 00000000..55896161 --- /dev/null +++ b/voice_driven_banking/run.bat @@ -0,0 +1,13 @@ +@echo off +echo Starting Voice Banking Application... +echo. +echo Activating conda environment... +call conda activate voice-banking || ( + echo Failed to activate conda environment. + echo Please ensure Miniconda is installed and the environment is created with: + echo conda env create -f environment.yml + exit /b 1 +) +echo. +echo Starting Flask server... +python app.py diff --git a/voice_driven_banking/run.sh b/voice_driven_banking/run.sh new file mode 100644 index 00000000..4ceec974 --- /dev/null +++ b/voice_driven_banking/run.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +echo "Starting Voice Banking Application..." +echo "" +echo "Activating conda environment..." +source $(conda info --base)/etc/profile.d/conda.sh +conda activate voice-banking || { + echo "Failed to activate conda environment." + echo "Please ensure Miniconda is installed and the environment is created with:" + echo "conda env create -f environment.yml" + exit 1 +} +echo "" +echo "Starting Flask server..." +python app.py diff --git a/voice_driven_banking/selenium_automation.py b/voice_driven_banking/selenium_automation.py deleted file mode 100644 index 9ec5fa68..00000000 --- a/voice_driven_banking/selenium_automation.py +++ /dev/null @@ -1,269 +0,0 @@ -import os -import time -import zipfile -import json -import random -from selenium import webdriver -from selenium.webdriver.chrome.service import Service -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC -from selenium.common.exceptions import TimeoutException, NoSuchElementException - -class VoiceBankingAutomation: - def __init__(self, download_dir=None): - # Set up download directory - self.download_dir = os.path.abspath(download_dir or "downloads") - if not os.path.exists(self.download_dir): - os.makedirs(self.download_dir) - - # Set up Chrome options - self.chrome_options = webdriver.ChromeOptions() - prefs = { - "download.default_directory": self.download_dir, - "download.prompt_for_download": False, - "download.directory_upgrade": True, - "safebrowsing.enabled": True - } - self.chrome_options.add_experimental_option("prefs", prefs) - self.driver = None - - # Define test voice commands and their expected results - self.test_commands = { - "balance_inquiry": { - "command": "What is my account balance", - "locators": { - "input_field": "//input[@id='voice-command-input']", - "submit_button": "//button[@id='voice-submit']", - "result_container": "//div[contains(@class, 'balance-display')]" - }, - "success_indicators": ["current balance", "available balance", "$"] - }, - "fund_transfer": { - "command": "Transfer $50 to John Doe", - "locators": { - "input_field": "//input[@id='voice-command-input']", - "submit_button": "//button[@id='voice-submit']", - "result_container": "//div[contains(@class, 'transfer-result')]" - }, - "success_indicators": ["transfer successful", "transaction completed", "confirmation"] - }, - "transaction_history": { - "command": "Show my recent transactions", - "locators": { - "input_field": "//input[@id='voice-command-input']", - "submit_button": "//button[@id='voice-submit']", - "result_container": "//div[contains(@class, 'transaction-list')]" - }, - "success_indicators": ["transaction", "date", "amount"] - } - } - - def start_driver(self): - """Initialize and start the Chrome driver""" - self.driver = webdriver.Chrome(options=self.chrome_options) - - def clone_repository(self, repo_url): - """Clone a GitHub repository by downloading and extracting the ZIP""" - if not self.driver: - self.start_driver() - - try: - # Navigate to the GitHub repository page - self.driver.get(repo_url) - - # Wait until the "Code" button is clickable and click it - wait = WebDriverWait(self.driver, 10) - code_button = wait.until(EC.element_to_be_clickable((By.XPATH, - "//button[@data-variant='primary']//span[contains(@class, 'prc-Button-Label-pTQ')]"))) - code_button.click() - - # Click on download ZIP - download_zip = wait.until(EC.element_to_be_clickable((By.XPATH, - "//span[contains(text(), 'Download ZIP')]"))) - download_zip.click() - - # Wait for the ZIP file to appear in the download folder - zip_filename = None - timeout = 60 # seconds - start_time = time.time() - while time.time() - start_time < timeout: - for filename in os.listdir(self.download_dir): - if filename.endswith(".zip"): - zip_filename = filename - break - if zip_filename: - break - time.sleep(1) - - if not zip_filename: - print("Download timed out or ZIP file not found.") - return None - - zip_path = os.path.join(self.download_dir, zip_filename) - print(f"Downloaded file: {zip_path}") - - # Unzip the downloaded archive into a subfolder called "extracted" - extract_dir = os.path.join(self.download_dir, "extracted") - if not os.path.exists(extract_dir): - os.makedirs(extract_dir) - with zipfile.ZipFile(zip_path, 'r') as zip_ref: - zip_ref.extractall(extract_dir) - print(f"Extracted ZIP contents to: {extract_dir}") - - return extract_dir - - except Exception as e: - print(f"Error cloning repository: {e}") - return None - - def navigate_to_banking_interface(self, url): - """Navigate to the specified banking interface URL""" - if not self.driver: - self.start_driver() - - try: - self.driver.get(url) - print(f"Navigated to banking interface: {url}") - return True - except Exception as e: - print(f"Error navigating to banking interface: {e}") - return False - - def login(self, username, password): - """Log in to the banking interface""" - try: - # Wait for the username field to be visible and enter username - username_field = WebDriverWait(self.driver, 10).until( - EC.visibility_of_element_located((By.ID, "username"))) - username_field.clear() - username_field.send_keys(username) - - # Enter password - password_field = self.driver.find_element(By.ID, "password") - password_field.clear() - password_field.send_keys(password) - - # Click login button - login_button = self.driver.find_element(By.XPATH, "//button[@type='submit']") - login_button.click() - - # Wait for login to complete - WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'dashboard')]"))) - - print("Successfully logged in") - return True - except (TimeoutException, NoSuchElementException) as e: - print(f"Login failed: {e}") - return False - - def execute_voice_command(self, command_type): - """Execute a specific voice command and validate the result""" - if command_type not in self.test_commands: - print(f"Unknown command type: {command_type}") - return False - - command_data = self.test_commands[command_type] - - try: - # Find and fill the voice command input field - input_field = WebDriverWait(self.driver, 10).until( - EC.visibility_of_element_located((By.XPATH, command_data["locators"]["input_field"]))) - input_field.clear() - input_field.send_keys(command_data["command"]) - - # Click the submit button - submit_button = self.driver.find_element(By.XPATH, command_data["locators"]["submit_button"]) - submit_button.click() - - # Wait for the result container to appear - result_container = WebDriverWait(self.driver, 10).until( - EC.visibility_of_element_located((By.XPATH, command_data["locators"]["result_container"]))) - - # Validate the result contains expected success indicators - result_text = result_container.text.lower() - success = any(indicator.lower() in result_text for indicator in command_data["success_indicators"]) - - if success: - print(f"Voice command '{command_type}' executed successfully") - return True - else: - print(f"Voice command '{command_type}' failed validation") - return False - - except Exception as e: - print(f"Error executing voice command: {e}") - return False - - def run_test_suite(self, banking_url, username, password): - """Run a full test suite of voice commands""" - results = {} - - # Navigate to the banking interface and login - if not self.navigate_to_banking_interface(banking_url): - return {"error": "Failed to navigate to banking interface"} - - if not self.login(username, password): - return {"error": "Failed to login to banking interface"} - - # Execute each voice command test - for command_type in self.test_commands.keys(): - results[command_type] = self.execute_voice_command(command_type) - time.sleep(2) # Brief pause between commands - - # Generate summary - success_count = sum(1 for result in results.values() if result) - total_count = len(results) - results["summary"] = { - "success_count": success_count, - "total_count": total_count, - "success_rate": f"{(success_count / total_count) * 100:.2f}%" - } - - return results - - def save_test_results(self, results, filename="voice_test_results.json"): - """Save test results to a JSON file""" - output_path = os.path.join(self.download_dir, filename) - with open(output_path, 'w') as f: - json.dump(results, f, indent=4) - print(f"Test results saved to {output_path}") - - def close(self): - """Close the driver""" - if self.driver: - self.driver.quit() - self.driver = None - -def main(): - # Test configuration - repo_url = "https://github.com/openMF/mifos-gazelle" - banking_url = "https://demo.mifos.io" # Replace with actual banking interface URL - test_username = "mifos" # Replace with test account username - test_password = "password" # Replace with test account password - - # Initialize the automation - automation = VoiceBankingAutomation() - - try: - # Clone repository (optional if you're just testing banking interface) - # extract_dir = automation.clone_repository(repo_url) - - # Run the test suite - results = automation.run_test_suite(banking_url, test_username, test_password) - - # Save test results - automation.save_test_results(results) - - # Print summary - print("\n=== Test Summary ===") - print(f"Success Rate: {results['summary']['success_rate']}") - print(f"Passed: {results['summary']['success_count']}/{results['summary']['total_count']} tests") - - finally: - # Clean up - automation.close() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/voice_driven_banking/services/banking_service.py b/voice_driven_banking/services/banking_service.py new file mode 100644 index 00000000..e541ce10 --- /dev/null +++ b/voice_driven_banking/services/banking_service.py @@ -0,0 +1,187 @@ +import json +import os +import random +from datetime import datetime, timedelta + +# Path to mock database +DB_PATH = os.path.join(os.path.dirname(__file__), '../data/mock_db.json') + +def load_mock_db(): + """Load mock database or create if it doesn't exist.""" + if not os.path.exists(DB_PATH): + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) + + # Create a mock database with sample data + mock_db = { + 'users': { + '1': { + 'id': '1', + 'name': 'John Doe', + 'accounts': { + 'savings': { + 'account_id': 'SAV12345', + 'balance': 5000.00, + 'currency': 'USD' + }, + 'checking': { + 'account_id': 'CHK67890', + 'balance': 1200.50, + 'currency': 'USD' + } + }, + 'transactions': generate_mock_transactions('1') + }, + '2': { + 'id': '2', + 'name': 'Jane Smith', + 'accounts': { + 'savings': { + 'account_id': 'SAV54321', + 'balance': 8500.75, + 'currency': 'USD' + } + }, + 'transactions': generate_mock_transactions('2') + } + } + } + + with open(DB_PATH, 'w') as f: + json.dump(mock_db, f, indent=2) + + with open(DB_PATH, 'r') as f: + return json.load(f) + +def save_mock_db(db): + """Save changes to mock database.""" + with open(DB_PATH, 'w') as f: + json.dump(db, f, indent=2) + +def generate_mock_transactions(user_id, count=10): + """Generate mock transaction history for demo purposes.""" + transaction_types = ['deposit', 'withdrawal', 'transfer_in', 'transfer_out', 'payment'] + transactions = [] + + for i in range(count): + transaction_type = random.choice(transaction_types) + amount = round(random.uniform(10, 500), 2) + date = (datetime.now() - timedelta(days=random.randint(1, 30))).strftime('%Y-%m-%d') + + transaction = { + 'transaction_id': f'T{user_id}{i}', + 'type': transaction_type, + 'amount': amount, + 'date': date, + 'description': f'{transaction_type.replace("_", " ").title()} of ${amount}' + } + + if transaction_type in ['transfer_in', 'transfer_out']: + transaction['counterparty'] = f'User{random.randint(1, 5)}' + + transactions.append(transaction) + + return sorted(transactions, key=lambda x: x['date'], reverse=True) + +def process_banking_request(intent_data, user): + """ + Process banking requests based on the intent. + This is a simplified version for the POC. + """ + intent_type = intent_data['intent_type'] + parameters = intent_data['parameters'] + + # Load the mock database + db = load_mock_db() + user_data = db['users'].get(user['id']) + + if not user_data: + return {'error': 'User not found'} + + response = { + 'success': True, + 'intent_type': intent_type, + } + + if intent_type == 'check_balance': + response['accounts'] = user_data['accounts'] + response['message'] = f"Your current balances are: " + for acc_type, acc_data in user_data['accounts'].items(): + response['message'] += f"{acc_type}: {acc_data['balance']} {acc_data['currency']}, " + response['message'] = response['message'].rstrip(', ') + + elif intent_type == 'transfer_money': + if 'amount' not in parameters: + return {'error': 'Amount not specified', 'success': False} + + amount = parameters['amount'] + recipient = parameters.get('recipient', 'unknown') + + # Find recipient in our mock DB (simplified) + recipient_id = None + for uid, udata in db['users'].items(): + if recipient.lower() in udata['name'].lower(): + recipient_id = uid + break + + if not recipient_id: + return {'error': f'Recipient {recipient} not found', 'success': False} + + # Check if sufficient funds (from first available account) + source_account = next(iter(user_data['accounts'].values())) + if source_account['balance'] < amount: + return {'error': 'Insufficient funds', 'success': False} + + # Update balances + source_account['balance'] -= amount + db['users'][recipient_id]['accounts'][next(iter(db['users'][recipient_id]['accounts']))]['balance'] += amount + + # Add transaction records + timestamp = datetime.now().strftime('%Y-%m-%d') + tx_id = f"TX{timestamp.replace('-', '')}{random.randint(1000, 9999)}" + + # Add to sender's transactions + user_data['transactions'].insert(0, { + 'transaction_id': tx_id, + 'type': 'transfer_out', + 'amount': amount, + 'date': timestamp, + 'description': f'Transfer to {recipient}', + 'counterparty': recipient + }) + + # Add to recipient's transactions + db['users'][recipient_id]['transactions'].insert(0, { + 'transaction_id': tx_id, + 'type': 'transfer_in', + 'amount': amount, + 'date': timestamp, + 'description': f'Transfer from {user_data["name"]}', + 'counterparty': user_data['name'] + }) + + save_mock_db(db) + response['message'] = f"Successfully transferred {amount} to {recipient}" + response['new_balance'] = source_account['balance'] + + elif intent_type == 'transaction_history': + period = parameters.get('period', 'recent') + transactions = user_data['transactions'] + + if period == 'last_week': + one_week_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d') + transactions = [t for t in transactions if t['date'] >= one_week_ago] + elif period == 'last_month': + one_month_ago = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') + transactions = [t for t in transactions if t['date'] >= one_month_ago] + + response['transactions'] = transactions[:5] # Limit to 5 for demo + response['message'] = f"Here are your recent transactions" + + else: + response = { + 'success': False, + 'message': "I'm sorry, I don't understand that banking request." + } + + return response diff --git a/voice_driven_banking/services/user_service.py b/voice_driven_banking/services/user_service.py new file mode 100644 index 00000000..7318b5a4 --- /dev/null +++ b/voice_driven_banking/services/user_service.py @@ -0,0 +1,104 @@ +import json +import os +from werkzeug.security import generate_password_hash, check_password_hash + +# Path to users database file +USERS_DB_PATH = os.path.join(os.path.dirname(__file__), '../data/users.json') + +def load_users_db(): + """Load user database or create if it doesn't exist.""" + if not os.path.exists(USERS_DB_PATH): + # Create directory if it doesn't exist + os.makedirs(os.path.dirname(USERS_DB_PATH), exist_ok=True) + + # Create sample users + users = { + '1': { + 'id': '1', + 'username': 'johndoe', + 'password_hash': generate_password_hash('password123'), + 'name': 'John Doe', + 'email': 'john@example.com', + 'phone': '+1234567890', + 'language': 'en-US' + }, + '2': { + 'id': '2', + 'username': 'janesmith', + 'password_hash': generate_password_hash('password456'), + 'name': 'Jane Smith', + 'email': 'jane@example.com', + 'phone': '+0987654321', + 'language': 'hi-IN' + } + } + + with open(USERS_DB_PATH, 'w') as f: + json.dump(users, f, indent=2) + + with open(USERS_DB_PATH, 'r') as f: + return json.load(f) + +def get_user_by_id(user_id): + """Get user by ID.""" + users = load_users_db() + return users.get(str(user_id)) + +def get_user_by_username(username): + """Get user by username.""" + users = load_users_db() + for user in users.values(): + if user['username'] == username: + return user + return None + +def authenticate_user(username, password): + """Authenticate a user with username and password.""" + user = get_user_by_username(username) + if not user: + return None + + if check_password_hash(user['password_hash'], password): + return user + + return None + +def create_user(username, password, name, email, phone, language='en-US'): + """Create a new user.""" + users = load_users_db() + + # Check if username exists + for user in users.values(): + if user['username'] == username: + return {'success': False, 'message': 'Username already exists'} + + # Create new user + new_user_id = str(max(int(uid) for uid in users.keys()) + 1) if users else '1' + users[new_user_id] = { + 'id': new_user_id, + 'username': username, + 'password_hash': generate_password_hash(password), + 'name': name, + 'email': email, + 'phone': phone, + 'language': language + } + + # Save to file + with open(USERS_DB_PATH, 'w') as f: + json.dump(users, f, indent=2) + + return {'success': True, 'user_id': new_user_id} + +def update_user_language(user_id, language): + """Update user's preferred language.""" + users = load_users_db() + if str(user_id) not in users: + return {'success': False, 'message': 'User not found'} + + users[str(user_id)]['language'] = language + + with open(USERS_DB_PATH, 'w') as f: + json.dump(users, f, indent=2) + + return {'success': True} diff --git a/voice_driven_banking/static/css/style.css b/voice_driven_banking/static/css/style.css new file mode 100644 index 00000000..492efe8d --- /dev/null +++ b/voice_driven_banking/static/css/style.css @@ -0,0 +1,648 @@ +:root { + --primary-color: #4f46e5; + --primary-dark: #3730a3; + --secondary-color: #06b6d4; + --secondary-dark: #0891b2; + --accent-color: #f97316; + --accent-dark: #ea580c; + --light-color: #f9fafb; + --dark-color: #1e293b; + --success-color: #10b981; + --warning-color: #f59e0b; + --error-color: #ef4444; + --gray-100: #f3f4f6; + --gray-200: #e5e7eb; + --gray-300: #d1d5db; + --gray-400: #9ca3af; + --gray-500: #6b7280; + --gray-600: #4b5563; + --gray-700: #374151; + --gray-800: #1f2937; + --gray-900: #111827; + --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05); + --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); + --shadow-md: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05); + --shadow-lg: 0 20px 25px -5px rgba(0, 0, 0, 0.1), 0 10px 10px -5px rgba(0, 0, 0, 0.04); + --border-radius: 0.375rem; + --transition: all 0.3s ease; +} + +* { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +html { + font-size: 16px; +} + +body { + font-family: 'Roboto', -apple-system, BlinkMacSystemFont, 'Segoe UI', Oxygen, Ubuntu, Cantarell, sans-serif; + background-color: var(--light-color); + color: var(--gray-800); + line-height: 1.6; + min-height: 100vh; + position: relative; + overflow-x: hidden; +} + +.container { + max-width: 1200px; + width: 100%; + margin: 0 auto; + padding: 20px; +} + +header { + display: flex; + justify-content: space-between; + align-items: center; + padding: 20px 0; + margin-bottom: 30px; + border-bottom: 1px solid var(--gray-200); +} + +h1 { + color: var(--primary-color); + font-size: 2rem; + font-weight: 700; +} + +h2 { + color: var(--gray-800); + font-size: 1.5rem; + margin-bottom: 20px; + font-weight: 600; +} + +h3 { + color: var(--gray-700); + font-size: 1.25rem; + margin-bottom: 15px; + font-weight: 500; +} + +h4 { + color: var(--gray-700); + font-size: 1.125rem; + margin-bottom: 10px; + font-weight: 500; +} + +.hidden { + display: none !important; +} + +/* Form Styling */ +.form-group { + margin-bottom: 20px; +} + +label { + display: block; + margin-bottom: 8px; + font-weight: 500; + color: var(--gray-700); +} + +input, select { + width: 100%; + padding: 12px; + border: 1px solid var(--gray-300); + border-radius: var(--border-radius); + font-size: 16px; + transition: var(--transition); + background-color: white; +} + +input:focus, select:focus { + outline: none; + border-color: var(--primary-color); + box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.2); +} + +.btn { + display: inline-flex; + align-items: center; + justify-content: center; + padding: 12px 24px; + background-color: var(--primary-color); + color: white; + border: none; + border-radius: var(--border-radius); + cursor: pointer; + font-size: 16px; + font-weight: 500; + text-align: center; + transition: var(--transition); + box-shadow: var(--shadow-sm); + text-transform: uppercase; + letter-spacing: 0.025em; + position: relative; + overflow: hidden; +} + +.btn:hover { + background-color: var(--primary-dark); + transform: translateY(-1px); + box-shadow: var(--shadow-md); +} + +.btn:active { + transform: translateY(0); + box-shadow: var(--shadow-sm); +} + +.btn:focus { + outline: none; + box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.4); +} + +.btn:disabled { + opacity: 0.7; + cursor: not-allowed; + transform: none; +} + +.btn::after { + content: ''; + display: block; + position: absolute; + width: 100%; + height: 100%; + top: 0; + left: 0; + pointer-events: none; + background-image: radial-gradient(circle, #fff 10%, transparent 10.01%); + background-repeat: no-repeat; + background-position: 50%; + transform: scale(10, 10); + opacity: 0; + transition: transform 0.5s, opacity 0.5s; +} + +.btn:active::after { + transform: scale(0, 0); + opacity: 0.3; + transition: 0s; +} + +.primary-btn { + background-color: var(--primary-color); +} + +.primary-btn:hover { + background-color: var(--primary-dark); +} + +.secondary-btn { + background-color: var(--secondary-color); +} + +.secondary-btn:hover { + background-color: var(--secondary-dark); +} + +.accent-btn { + background-color: var(--accent-color); +} + +.accent-btn:hover { + background-color: var(--accent-dark); +} + +.outline-btn { + background-color: transparent; + color: var(--primary-color); + border: 2px solid var(--primary-color); +} + +.outline-btn:hover { + background-color: var(--primary-color); + color: white; +} + +/* Card Styling */ +.card { + background-color: white; + border-radius: var(--border-radius); + box-shadow: var(--shadow); + padding: 24px; + margin-bottom: 20px; + transition: var(--transition); +} + +.card:hover { + box-shadow: var(--shadow-md); +} + +/* Voice Interface */ +.voice-interface { + background-color: white; + padding: 30px; + border-radius: var(--border-radius); + box-shadow: var(--shadow-md); + margin-top: 20px; +} + +.examples { + background-color: var(--gray-100); + padding: 15px 20px; + border-radius: var(--border-radius); + margin: 20px 0; + border-left: 4px solid var(--secondary-color); +} + +.examples ul { + padding-left: 20px; +} + +.examples li { + margin-bottom: 10px; +} + +.voice-controls { + display: flex; + flex-direction: column; + align-items: center; + margin: 40px 0; +} + +.record-btn { + display: flex; + align-items: center; + justify-content: center; + background-color: var(--secondary-color); + border-radius: 50px; + padding: 16px 40px; + min-width: 200px; + position: relative; + transform-style: preserve-3d; + transition: transform 0.3s ease, box-shadow 0.3s ease; +} + +.record-btn:hover { + background-color: var(--secondary-dark); + transform: translateY(-3px); +} + +.record-btn:active { + transform: translateY(0); +} + +.mic-icon { + font-size: 24px; + margin-right: 12px; + transition: transform 0.3s ease; +} + +.record-btn:hover .mic-icon { + animation: pulse 1.5s infinite; +} + +.record-btn.recording { + background-color: var(--accent-color); + animation: recordPulse 1.5s infinite; +} + +.record-btn.recording .mic-icon { + animation: micPulse 1.5s infinite; +} + +@keyframes pulse { + 0% { transform: scale(1); } + 50% { transform: scale(1.1); } + 100% { transform: scale(1); } +} + +@keyframes recordPulse { + 0% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4); } + 70% { box-shadow: 0 0 0 10px rgba(239, 68, 68, 0); } + 100% { box-shadow: 0 0 0 0 rgba(239, 68, 68, 0); } +} + +@keyframes micPulse { + 0% { transform: scale(1); } + 10% { transform: scale(1.2); } + 20% { transform: scale(0.9); } + 30% { transform: scale(1.1); } + 40% { transform: scale(0.95); } + 50% { transform: scale(1); } + 100% { transform: scale(1); } +} + +#recording-indicator { + margin-top: 16px; + color: var(--accent-color); + font-weight: bold; + display: flex; + align-items: center; + gap: 8px; +} + +#recording-indicator::before { + content: ''; + display: inline-block; + width: 12px; + height: 12px; + background-color: var(--accent-color); + border-radius: 50%; + animation: blink 1s infinite; +} + +@keyframes blink { + 0% { opacity: 0; } + 50% { opacity: 1; } + 100% { opacity: 0; } +} + +.processing-indicator { + margin-top: 16px; + color: var(--secondary-color); + font-weight: 500; + text-align: center; + display: flex; + align-items: center; + justify-content: center; + gap: 10px; +} + +.processing-indicator::before { + content: ''; + display: inline-block; + width: 20px; + height: 20px; + border: 3px solid var(--gray-300); + border-top: 3px solid var(--secondary-color); + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} + +.error-message { + background-color: #fee2e2; + border-left: 4px solid var(--error-color); + padding: 16px; + margin-bottom: 20px; + border-radius: var(--border-radius); +} + +.error-message p { + margin: 5px 0; +} + +.error-message strong { + color: var(--error-color); +} + +.success-message { + background-color: #d1fae5; + border-left: 4px solid var(--success-color); + padding: 16px; + margin-bottom: 20px; + border-radius: var(--border-radius); +} + +.success-message strong { + color: var(--success-color); +} + +/* Results Section */ +.result-card { + background-color: white; + border-radius: var(--border-radius); + padding: 24px; + box-shadow: var(--shadow-md); + margin-top: 30px; + animation: fadeIn 0.5s ease; + border-top: 4px solid var(--secondary-color); +} + +@keyframes fadeIn { + from { opacity: 0; transform: translateY(20px); } + to { opacity: 1; transform: translateY(0); } +} + +#response-message { + background-color: var(--gray-100); + padding: 16px; + border-radius: var(--border-radius); + margin-bottom: 20px; + font-size: 1.1rem; +} + +/* Table Styling */ +table { + width: 100%; + border-collapse: collapse; + margin-top: 16px; + border-radius: var(--border-radius); + overflow: hidden; + box-shadow: var(--shadow-sm); +} + +table th, table td { + padding: 12px 16px; + text-align: left; + border-bottom: 1px solid var(--gray-200); +} + +table th { + background-color: var(--gray-100); + font-weight: 500; + color: var(--gray-800); +} + +table tr:nth-child(even) { + background-color: var(--gray-50); +} + +table tr:last-child td { + border-bottom: none; +} + +table tr:hover { + background-color: var(--gray-100); +} + +/* Language Selector */ +.language-selector { + margin-bottom: 24px; + display: flex; + align-items: center; + background-color: white; + padding: 16px; + border-radius: var(--border-radius); + box-shadow: var(--shadow-sm); +} + +.language-selector label { + margin-right: 16px; + margin-bottom: 0; + white-space: nowrap; +} + +.language-selector select { + width: auto; + min-width: 150px; +} + +/* Auth sections */ +#login-section, #register-section { + background-color: white; + border-radius: var(--border-radius); + padding: 32px; + box-shadow: var(--shadow-md); + max-width: 500px; + margin: 0 auto; + border-top: 4px solid var(--primary-color); +} + +#login-section h2, #register-section h2 { + color: var(--primary-color); + text-align: center; + margin-bottom: 24px; +} + +#login-section p, #register-section p { + text-align: center; + margin-top: 24px; + color: var(--gray-600); +} + +#login-section a, #register-section a { + color: var(--primary-color); + text-decoration: none; + font-weight: 500; + transition: var(--transition); +} + +#login-section a:hover, #register-section a:hover { + color: var(--primary-dark); + text-decoration: underline; +} + +#user-info { + display: flex; + align-items: center; + gap: 16px; +} + +#logout-btn { + padding: 6px 16px; + font-size: 14px; + background-color: var(--gray-200); + color: var(--gray-700); +} + +#logout-btn:hover { + background-color: var(--gray-300); +} + +/* Balance and transaction styling */ +#balance-container, #transaction-container { + margin-top: 24px; + animation: fadeIn 0.5s ease; +} + +.balance-item { + background-color: var(--gray-100); + padding: 12px 16px; + border-radius: var(--border-radius); + margin-bottom: 10px; + border-left: 3px solid var(--secondary-color); + display: flex; + justify-content: space-between; + align-items: center; +} + +.balance-item small { + color: var(--gray-500); + margin-left: 8px; +} + +/* Responsive styles */ +@media (max-width: 768px) { + header { + flex-direction: column; + text-align: center; + } + + h1 { + margin-bottom: 16px; + font-size: 1.75rem; + } + + .language-selector { + flex-direction: column; + align-items: flex-start; + } + + .language-selector label { + margin-bottom: 8px; + } + + .language-selector select { + width: 100%; + } + + .record-btn { + width: 100%; + max-width: 300px; + } + + table { + display: block; + overflow-x: auto; + } +} + +@media (max-width: 480px) { + .container { + padding: 16px; + } + + #login-section, #register-section { + padding: 20px; + } + + .voice-interface { + padding: 20px; + } + + .record-btn { + padding: 12px 24px; + } +} + +/* Accessibility improvements */ +.btn:focus, input:focus, select:focus { + outline: 3px solid rgba(79, 70, 229, 0.4); + outline-offset: 2px; +} + +/* Loading state for buttons */ +.btn.loading { + position: relative; + color: transparent !important; + pointer-events: none; +} + +.btn.loading::after { + content: ''; + position: absolute; + width: 20px; + height: 20px; + top: calc(50% - 10px); + left: calc(50% - 10px); + border: 2px solid rgba(255, 255, 255, 0.5); + border-top-color: white; + border-radius: 50%; + animation: spin 1s infinite linear; + box-sizing: border-box; +} diff --git a/voice_driven_banking/static/css/toast.css b/voice_driven_banking/static/css/toast.css new file mode 100644 index 00000000..b7f2fe21 --- /dev/null +++ b/voice_driven_banking/static/css/toast.css @@ -0,0 +1,61 @@ +/* Toast notifications */ +.toast { + position: fixed; + bottom: 20px; + right: 20px; + padding: 12px 20px; + border-radius: var(--border-radius); + color: white; + font-weight: 500; + box-shadow: 0 3px 10px rgba(0,0,0,0.15); + transform: translateY(100px); + opacity: 0; + transition: all 0.3s ease; + z-index: 1000; + max-width: 350px; + display: flex; + align-items: center; + gap: 10px; +} + +.toast.success { + background-color: var(--success-color); +} + +.toast.error { + background-color: var(--error-color); +} + +.toast.warning { + background-color: var(--warning-color); +} + +.toast.info { + background-color: var(--secondary-color); +} + +.toast.show { + transform: translateY(0); + opacity: 1; +} + +.toast::before { + font-family: 'FontAwesome'; + font-size: 18px; +} + +.toast.success::before { + content: '\f00c'; +} + +.toast.error::before { + content: '\f00d'; +} + +.toast.warning::before { + content: '\f071'; +} + +.toast.info::before { + content: '\f129'; +} diff --git a/voice_driven_banking/static/js/app.js b/voice_driven_banking/static/js/app.js new file mode 100644 index 00000000..5b129fcb --- /dev/null +++ b/voice_driven_banking/static/js/app.js @@ -0,0 +1,689 @@ +document.addEventListener('DOMContentLoaded', function() { + // DOM Elements + const loginSection = document.getElementById('login-section'); + const registerSection = document.getElementById('register-section'); + const bankingSection = document.getElementById('banking-section'); + const userInfo = document.getElementById('user-info'); + const usernameSpan = document.getElementById('username'); + + const loginForm = document.getElementById('login-form'); + const loginButton = document.getElementById('login-button'); + const registerForm = document.getElementById('register-form'); + const registerButton = document.getElementById('register-button'); + const showRegisterLink = document.getElementById('show-register'); + const showLoginLink = document.getElementById('show-login'); + + const logoutBtn = document.getElementById('logout-btn'); + const recordBtn = document.getElementById('record-btn'); + const recordText = document.getElementById('record-text'); + const recordingIndicator = document.getElementById('recording-indicator'); + const languageSelect = document.getElementById('language-select'); + const examplePhrases = document.getElementById('example-phrases'); + + const resultSection = document.getElementById('result-section'); + const recognizedText = document.getElementById('recognized-text'); + const preprocessedText = document.getElementById('preprocessed-text'); + const detectedIntent = document.getElementById('detected-intent'); + const responseMessage = document.getElementById('response-message'); + const balanceContainer = document.getElementById('balance-container'); + const balanceInfo = document.getElementById('balance-info'); + const transactionContainer = document.getElementById('transaction-container'); + const transactionsBody = document.getElementById('transactions-body'); + + // App state + let isRecording = false; + let mediaRecorder; + let audioChunks = []; + let currentUser = JSON.parse(localStorage.getItem('user')); + let isProcessing = false; // Track if we're currently processing a request + + // Debounce function to prevent multiple rapid button clicks + function debounce(func, wait) { + let timeout; + return function executedFunction(...args) { + const later = () => { + clearTimeout(timeout); + func(...args); + }; + clearTimeout(timeout); + timeout = setTimeout(later, wait); + }; + } + + // Set button state + function setButtonState(button, isLoading) { + if (isLoading) { + button.disabled = true; + button.classList.add('loading'); + } else { + button.disabled = false; + button.classList.remove('loading'); + } + } + + // Show toast message + function showToast(message, type = 'success') { + // Remove any existing toast + const existingToast = document.querySelector('.toast'); + if (existingToast) { + existingToast.remove(); + } + + // Create new toast + const toast = document.createElement('div'); + toast.className = `toast ${type}`; + toast.textContent = message; + document.body.appendChild(toast); + + // Animate in + setTimeout(() => { + toast.classList.add('show'); + }, 10); + + // Automatically remove after 3 seconds + setTimeout(() => { + toast.classList.remove('show'); + setTimeout(() => { + toast.remove(); + }, 300); + }, 3000); + } + + // Check if user is logged in + if (currentUser) { + showBankingInterface(); + } + + // Event listeners for auth + showRegisterLink.addEventListener('click', function(e) { + e.preventDefault(); + // Smooth transition + loginSection.style.opacity = 0; + setTimeout(() => { + loginSection.classList.add('hidden'); + registerSection.classList.remove('hidden'); + setTimeout(() => { + registerSection.style.opacity = 1; + }, 10); + }, 300); + }); + + showLoginLink.addEventListener('click', function(e) { + e.preventDefault(); + // Smooth transition + registerSection.style.opacity = 0; + setTimeout(() => { + registerSection.classList.add('hidden'); + loginSection.classList.remove('hidden'); + setTimeout(() => { + loginSection.style.opacity = 1; + }, 10); + }, 300); + }); + + loginForm.addEventListener('submit', function(e) { + e.preventDefault(); + + // Prevent multiple submissions + if (isProcessing) return; + isProcessing = true; + + const username = document.getElementById('username-input').value; + const password = document.getElementById('password-input').value; + + // Validate + if (!username || !password) { + showToast('Please enter both username and password', 'error'); + isProcessing = false; + return; + } + + // Visual feedback + setButtonState(loginButton, true); + + // Send login request + fetch('/api/login', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ username, password }) + }) + .then(response => response.json()) + .then(data => { + if (data.success) { + currentUser = data.user; + localStorage.setItem('user', JSON.stringify(currentUser)); + showBankingInterface(); + loginForm.reset(); + showToast(`Welcome back, ${currentUser.name}!`); + } else { + showToast(data.message || 'Login failed. Please check your credentials.', 'error'); + } + }) + .catch(error => { + console.error('Error:', error); + showToast('An error occurred during login. Please try again.', 'error'); + }) + .finally(() => { + isProcessing = false; + setButtonState(loginButton, false); + }); + }); + + registerForm.addEventListener('submit', function(e) { + e.preventDefault(); + + // Prevent multiple submissions + if (isProcessing) return; + isProcessing = true; + + const userData = { + username: document.getElementById('reg-username').value, + password: document.getElementById('reg-password').value, + name: document.getElementById('reg-name').value, + email: document.getElementById('reg-email').value, + phone: document.getElementById('reg-phone').value, + language: document.getElementById('reg-language').value + }; + + // Basic validation + for (const [key, value] of Object.entries(userData)) { + if (!value) { + showToast(`Please complete all fields (${key} is missing)`, 'error'); + isProcessing = false; + return; + } + } + + // Visual feedback + setButtonState(registerButton, true); + + // Send registration request + fetch('/api/register', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(userData) + }) + .then(response => response.json()) + .then(data => { + if (data.success) { + showToast('Registration successful! Please log in.'); + registerForm.reset(); + // Smooth transition + registerSection.style.opacity = 0; + setTimeout(() => { + registerSection.classList.add('hidden'); + loginSection.classList.remove('hidden'); + setTimeout(() => { + loginSection.style.opacity = 1; + }, 10); + }, 300); + } else { + showToast(data.message || 'Registration failed. Please try again.', 'error'); + } + }) + .catch(error => { + console.error('Error:', error); + showToast('An error occurred during registration. Please try again.', 'error'); + }) + .finally(() => { + isProcessing = false; + setButtonState(registerButton, false); + }); + }); + + logoutBtn.addEventListener('click', function() { + // Prevent double clicks + if (this.disabled) return; + + this.disabled = true; + localStorage.removeItem('user'); + currentUser = null; + showToast('You have been logged out successfully'); + showLoginInterface(); + setTimeout(() => { + this.disabled = false; + }, 1000); + }); + + // Voice recording functionality with debounce + recordBtn.addEventListener('click', debounce(toggleRecording, 300)); + + function toggleRecording() { + // Prevent multiple clicks + if (isProcessing) return; + + if (isRecording) { + stopRecording(); + } else { + startRecording(); + } + } + + function startRecording() { + // Reset previous results + resultSection.classList.add('hidden'); + + // Prevent multiple recordings + if (isProcessing) return; + isProcessing = true; + + navigator.mediaDevices.getUserMedia({ audio: true }) + .then(stream => { + isRecording = true; + recordBtn.classList.add('recording'); + recordText.textContent = 'Stop Recording'; + recordingIndicator.classList.remove('hidden'); + + mediaRecorder = new MediaRecorder(stream); + audioChunks = []; + + mediaRecorder.ondataavailable = event => { + audioChunks.push(event.data); + }; + + mediaRecorder.onstop = processRecording; + + mediaRecorder.start(); + isProcessing = false; + + // Auto stop after 10 seconds + setTimeout(() => { + if (isRecording) { + stopRecording(); + } + }, 10000); + }) + .catch(error => { + console.error('Error accessing microphone:', error); + showToast('Could not access your microphone. Please check permissions and try again.', 'error'); + isProcessing = false; + }); + } + + function stopRecording() { + if (mediaRecorder && isRecording) { + mediaRecorder.stop(); + isRecording = false; + recordBtn.classList.remove('recording'); + recordText.textContent = 'Start Recording'; + recordingIndicator.classList.add('hidden'); + + // Stop all audio tracks + mediaRecorder.stream.getTracks().forEach(track => track.stop()); + } + } + + function processRecording() { + if (audioChunks.length === 0) { + showToast('No audio recorded. Please try again.', 'error'); + return; + } + + isProcessing = true; + + const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); + + // Add visual feedback + recordBtn.disabled = true; + const processingIndicator = document.createElement('div'); + processingIndicator.textContent = 'Processing your audio...'; + processingIndicator.className = 'processing-indicator'; + recordBtn.parentNode.insertBefore(processingIndicator, recordBtn.nextSibling); + + // Create form data + const formData = new FormData(); + formData.append('audio', audioBlob, 'recording.wav'); + formData.append('user_id', currentUser.id); + formData.append('language', languageSelect.value); + + // Send to server with timeout handling + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30000); + + fetch('/api/process-voice', { + method: 'POST', + body: formData, + signal: controller.signal + }) + .then(response => { + clearTimeout(timeoutId); + if (!response.ok) { + return response.json().then(data => { + throw new Error(data.error || 'Server error'); + }); + } + return response.json(); + }) + .then(data => { + processingIndicator.remove(); + displayResults(data); + }) + .catch(error => { + processingIndicator.remove(); + console.error('Error processing audio:', error); + + // Show a user-friendly error message + let errorMessage = 'An error occurred while processing your voice command'; + + if (error.name === 'AbortError') { + errorMessage = 'Request timed out. The server took too long to respond.'; + } else if (error.message) { + errorMessage = error.message; + } + + resultSection.classList.remove('hidden'); + recognizedText.textContent = 'Could not process speech'; + detectedIntent.textContent = 'None'; + responseMessage.innerHTML = + `
+

Error: ${errorMessage}

+

Please try again with a clearer voice recording, or try using a different browser or device.

+
`; + balanceContainer.classList.add('hidden'); + transactionContainer.classList.add('hidden'); + }) + .finally(() => { + recordBtn.disabled = false; + isProcessing = false; + }); + } + + function displayResults(data) { + if (data.error) { + showToast(data.error, 'error'); + return; + } + + // Scroll to results if needed + resultSection.scrollIntoView({ behavior: 'smooth', block: 'start' }); + + // Display the results section with animation + resultSection.classList.remove('hidden'); + resultSection.style.opacity = 0; + setTimeout(() => { + resultSection.style.opacity = 1; + }, 10); + + // Update recognized text + recognizedText.textContent = data.recognized_text || 'No speech detected'; + + // Update preprocessed text + preprocessedText.textContent = data.preprocessed_text || data.recognized_text || 'No text to process'; + + // Update detected intent + detectedIntent.textContent = formatIntentName(data.intent.intent_type); + + // Update response message + if (data.response.success === false) { + responseMessage.innerHTML = ` +
+

${data.response.message || 'Could not complete your request'}

+
`; + } else { + responseMessage.textContent = data.response.message; + } + + // Handle specific intent displays + if (data.intent.intent_type === 'check_balance' && data.response.accounts) { + displayBalances(data.response.accounts); + } else { + balanceContainer.classList.add('hidden'); + } + + if (data.intent.intent_type === 'transaction_history' && data.response.transactions) { + displayTransactions(data.response.transactions); + } else { + transactionContainer.classList.add('hidden'); + } + } + + function displayBalances(accounts) { + balanceContainer.classList.remove('hidden'); + balanceInfo.innerHTML = ''; + + for (const [accountType, account] of Object.entries(accounts)) { + const balanceElement = document.createElement('div'); + balanceElement.className = 'balance-item'; + + // Format currency for better display + const formattedBalance = new Intl.NumberFormat('en-US', { + style: 'currency', + currency: account.currency + }).format(account.balance); + + balanceElement.innerHTML = ` +
+ ${accountType.charAt(0).toUpperCase() + accountType.slice(1)}: + (Account ID: ${account.account_id}) +
+
+ ${formattedBalance} +
+ `; + balanceInfo.appendChild(balanceElement); + } + } + + function displayTransactions(transactions) { + transactionContainer.classList.remove('hidden'); + transactionsBody.innerHTML = ''; + + if (transactions.length === 0) { + const row = document.createElement('tr'); + row.innerHTML = ` + No recent transactions found + `; + transactionsBody.appendChild(row); + return; + } + + transactions.forEach(transaction => { + const row = document.createElement('tr'); + + // Format currency and date for better display + const formattedAmount = new Intl.NumberFormat('en-US', { + style: 'currency', + currency: 'USD' + }).format(transaction.amount); + + const date = new Date(transaction.date); + const formattedDate = new Intl.DateTimeFormat('en-US', { + year: 'numeric', + month: 'short', + day: 'numeric' + }).format(date); + + row.innerHTML = ` + ${formattedDate} + ${formatTransactionType(transaction.type)} + ${formattedAmount} + ${transaction.description} + `; + transactionsBody.appendChild(row); + }); + } + + function formatIntentName(intent) { + if (!intent) return 'Unknown'; + return intent.split('_') + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + function formatTransactionType(type) { + if (!type) return 'Unknown'; + return type.split('_') + .map(word => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + // Language change handler with debounce + languageSelect.addEventListener('change', debounce(function() { + const language = this.value; + + // Update example phrases based on language + updateExamplePhrases(language); + + // Save user preference + if (currentUser) { + fetch('/api/update-language', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + user_id: currentUser.id, + language: language + }) + }) + .then(response => response.json()) + .then(data => { + if (data.success) { + showToast(`Language preference updated to ${getLanguageName(language)}`); + // Update user object in local storage + currentUser.language = language; + localStorage.setItem('user', JSON.stringify(currentUser)); + } + }) + .catch(error => { + console.error('Error updating language:', error); + }); + } + }, 300)); + + function getLanguageName(code) { + const languages = { + 'en-US': 'English', + 'hi-IN': 'Hindi', + 'ta-IN': 'Tamil' + }; + return languages[code] || code; + } + + function updateExamplePhrases(language) { + examplePhrases.innerHTML = ''; + + // Start with fade-out animation + examplePhrases.style.opacity = 0; + + let phrases; + if (language === 'hi-IN') { + phrases = [ + '"मेरा बैलेंस क्या है?"', + '"जेन को 100 रुपये ट्रांसफर करें"', + '"मेरे हाल के लेनदेन दिखाएं"' + ]; + } else if (language === 'ta-IN') { + phrases = [ + '"என் இருப்பு என்ன?"', + '"ஜேனுக்கு 100 ரூபாய் அனுப்பு"', + '"என் சமீபத்திய பரிவர்த்தனைகளைக் காட்டு"' + ]; + } else { + // Default to English + phrases = [ + '"What is my account balance?"', + '"Transfer 100 dollars to Jane"', + '"Show my recent transactions"' + ]; + } + + // Create and append list items + phrases.forEach(phrase => { + const li = document.createElement('li'); + li.textContent = phrase; + examplePhrases.appendChild(li); + }); + + // Fade back in + setTimeout(() => { + examplePhrases.style.opacity = 1; + }, 300); + } + + function showBankingInterface() { + // First hide login/register + loginSection.classList.add('hidden'); + registerSection.classList.add('hidden'); + + // Then show banking with animation + bankingSection.style.opacity = 0; + bankingSection.classList.remove('hidden'); + userInfo.classList.remove('hidden'); + + setTimeout(() => { + bankingSection.style.opacity = 1; + }, 10); + + // Set username + usernameSpan.textContent = currentUser.name; + + // Set language preference + languageSelect.value = currentUser.language || 'en-US'; + updateExamplePhrases(currentUser.language || 'en-US'); + } + + function showLoginInterface() { + // Hide banking + bankingSection.classList.add('hidden'); + registerSection.classList.add('hidden'); + userInfo.classList.add('hidden'); + resultSection.classList.add('hidden'); + + // Show login with animation + loginSection.style.opacity = 0; + loginSection.classList.remove('hidden'); + + setTimeout(() => { + loginSection.style.opacity = 1; + }, 10); + } + + // Add CSS for toast notifications + const style = document.createElement('style'); + style.textContent = ` + .toast { + position: fixed; + bottom: 20px; + right: 20px; + padding: 12px 20px; + border-radius: 4px; + color: white; + font-weight: 500; + box-shadow: 0 3px 10px rgba(0,0,0,0.15); + transform: translateY(100px); + opacity: 0; + transition: all 0.3s ease; + z-index: 1000; + max-width: 350px; + } + + .toast.success { + background-color: #10b981; + } + + .toast.error { + background-color: #ef4444; + } + + .toast.show { + transform: translateY(0); + opacity: 1; + } + + /* Add smooth transitions for sections */ + #login-section, #register-section, #banking-section, #result-section, #example-phrases { + transition: opacity 0.3s ease; + } + `; + document.head.appendChild(style); + + // Initialize transition opacity + loginSection.style.opacity = 1; + registerSection.style.opacity = 0; + bankingSection.style.opacity = 0; +}); diff --git a/voice_driven_banking/templates/index.html b/voice_driven_banking/templates/index.html new file mode 100644 index 00000000..8e40f2c3 --- /dev/null +++ b/voice_driven_banking/templates/index.html @@ -0,0 +1,153 @@ + + + + + + Voice Banking Assistant + + + + + +
+
+

Voice-Driven Banking Assistant

+ +
+ +
+

Login to Your Account

+
+
+ + +
+
+ + +
+ +
+

Don't have an account? Register here

+
+ + + + +
+ + + + diff --git a/voice_driven_banking/test.py b/voice_driven_banking/test.py new file mode 100644 index 00000000..62fc4739 --- /dev/null +++ b/voice_driven_banking/test.py @@ -0,0 +1,120 @@ +""" +Test script for the Voice-Driven Banking via LAMs POC + +This script tests the core functionality of the system including: +- Speech recognition +- Intent detection +- Banking operations + +Usage: + python test.py + +Prerequisites: + - All dependencies installed (pip install -r requirements.txt) + - The mock database has been created (run the app once) +""" + +import os +import json +from models.speech_recognition import recognize_speech +from models.intent_recognition import extract_intent +from services.banking_service import process_banking_request +from services.user_service import get_user_by_id, load_users_db + +# Test data +TEST_AUDIO_DIR = 'test_data' +os.makedirs(TEST_AUDIO_DIR, exist_ok=True) + +def test_intent_recognition(): + """Test the intent recognition system with text input.""" + print("\n--- Testing Intent Recognition ---") + + test_phrases = { + 'en-US': [ + "What is my account balance?", + "Transfer 100 dollars to Jane", + "Show my recent transactions" + ], + 'hi-IN': [ + "मेरा बैलेंस क्या है", + "जेन को 100 रुपये ट्रांसफर करें", + "मेरे हाल के लेनदेन दिखाएं" + ], + 'sw': [ + "Salio langu ni nini", + "Tuma shilingi 100 kwa Jane", + "Nionyeshe miamala yangu ya hivi karibuni" + ] + } + + expected_intents = ['check_balance', 'transfer_money', 'transaction_history'] + + for language, phrases in test_phrases.items(): + print(f"\nLanguage: {language}") + for i, phrase in enumerate(phrases): + intent_data = extract_intent(phrase, language) + expected = expected_intents[i] + result = "✓" if intent_data['intent_type'] == expected else "✗" + print(f"{result} \"{phrase}\" → {intent_data['intent_type']}") + + # Print parameters for transfer_money intent + if intent_data['intent_type'] == 'transfer_money' and 'parameters' in intent_data: + print(f" Parameters: {json.dumps(intent_data['parameters'])}") + +def test_banking_operations(): + """Test banking operations with the mock database.""" + print("\n--- Testing Banking Operations ---") + + # Load a test user + users = load_users_db() + user = users.get('1') # John Doe + + if not user: + print("Error: Test user not found. Please run the app once to create mock data.") + return + + # Test balance check + balance_intent = {'intent_type': 'check_balance', 'parameters': {}} + balance_response = process_banking_request(balance_intent, user) + + print("\nBalance Check:") + print(f"Success: {balance_response['success']}") + print(f"Message: {balance_response['message']}") + + # Test transaction history + history_intent = {'intent_type': 'transaction_history', 'parameters': {'period': 'recent'}} + history_response = process_banking_request(history_intent, user) + + print("\nTransaction History:") + print(f"Success: {history_response['success']}") + print(f"Message: {history_response['message']}") + print(f"Transactions: {len(history_response.get('transactions', []))}") + + # Test money transfer (if Jane exists) + recipient_exists = any('jane' in u['name'].lower() for u in users.values()) + + if recipient_exists: + transfer_intent = { + 'intent_type': 'transfer_money', + 'parameters': {'amount': 50.0, 'recipient': 'Jane'} + } + transfer_response = process_banking_request(transfer_intent, user) + + print("\nMoney Transfer:") + print(f"Success: {transfer_response['success']}") + print(f"Message: {transfer_response['message']}") + +def run_tests(): + """Run all the tests.""" + print("=== Voice-Driven Banking System Tests ===") + + # Test intent recognition + test_intent_recognition() + + # Test banking operations + test_banking_operations() + + print("\nTests completed.") + +if __name__ == "__main__": + run_tests() diff --git a/voice_driven_banking/test_hindi_numbers.py b/voice_driven_banking/test_hindi_numbers.py new file mode 100644 index 00000000..31258f1a --- /dev/null +++ b/voice_driven_banking/test_hindi_numbers.py @@ -0,0 +1,33 @@ +""" +Test script for Hindi number parsing in banking commands + +This script tests the enhanced Hindi number parsing functionality +""" + +import json +from models.intent_recognition import extract_intent + +def test_hindi_numbers(): + """Test parsing different Hindi number formats in money transfer commands.""" + print("\n=== Testing Hindi Number Parsing ===\n") + + test_phrases = [ + "जॉन को सौ रुपये भेजिए", # 100 rupees to John + "राम को दो सौ रुपये भेजें", # 200 rupees to Ram + "सीता को एक हजार रुपया भेजें", # 1000 rupees to Sita + "अनिल को पांच सौ रुपये ट्रांसफर करें", # 500 rupees to Anil + "मोहन को पचास रुपये भेजिए", # 50 rupees to Mohan + "राधा को दो हजार पांच सौ भेजें", # 2500 rupees to Radha + "विकास को 100 रुपये भेजो", # 100 rupees (numeric) to Vikas + "संजय को एक सौ बीस रुपये भेज दो" # 120 rupees to Sanjay + ] + + for phrase in test_phrases: + intent_data = extract_intent(phrase, 'hi-IN') + print(f"Phrase: \"{phrase}\"") + print(f"Detected Intent: {intent_data['intent_type']}") + print(f"Parameters: {json.dumps(intent_data.get('parameters', {}), ensure_ascii=False)}") + print("-" * 50) + +if __name__ == "__main__": + test_hindi_numbers() diff --git a/voice_driven_banking/test_hindi_transfers.py b/voice_driven_banking/test_hindi_transfers.py new file mode 100644 index 00000000..99a0886a --- /dev/null +++ b/voice_driven_banking/test_hindi_transfers.py @@ -0,0 +1,29 @@ +""" +Test script for Hindi money transfer intent recognition + +This script tests the Hindi money transfer intent detection with various phrases +""" + +import json +from models.intent_recognition import extract_intent + +def test_hindi_transfers(): + """Test Hindi money transfer intent recognition with various phrases.""" + print("\n=== Testing Hindi Money Transfer Intent Recognition ===\n") + + test_phrases = [ + "जॉन को सौ रुपये भेजिए", # Send 100 rupees to John + "राम को दो सौ रुपये ट्रांसफर करें", # Transfer 200 rupees to Ram + "सीता को हजार रुपया भेज दीजिए", # Send 1000 rupees to Sita + "अनिल को पांच सौ भेजो" # Send 500 to Anil + ] + + for phrase in test_phrases: + intent_data = extract_intent(phrase, 'hi-IN') + print(f"Phrase: \"{phrase}\"") + print(f"Detected Intent: {intent_data['intent_type']}") + print(f"Parameters: {json.dumps(intent_data.get('parameters', {}), ensure_ascii=False)}") + print("-" * 50) + +if __name__ == "__main__": + test_hindi_transfers() diff --git a/voice_driven_banking/update_user_data.py b/voice_driven_banking/update_user_data.py new file mode 100644 index 00000000..11bd302b --- /dev/null +++ b/voice_driven_banking/update_user_data.py @@ -0,0 +1,147 @@ +""" +This script updates the existing JSON database files to ensure they contain +all three demo users (John, Jane, and Jacob) with their accounts and transactions. +Run this once after cloning the repository to ensure consistent demo data. +""" + +import os +import json +import sys +from werkzeug.security import generate_password_hash +from services.banking_service import generate_mock_transactions + +# Path to database files +DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') +USERS_DB_PATH = os.path.join(DATA_DIR, 'users.json') +MOCK_DB_PATH = os.path.join(DATA_DIR, 'mock_db.json') + +def ensure_data_directory(): + """Ensure data directory exists.""" + if not os.path.exists(DATA_DIR): + os.makedirs(DATA_DIR) + print(f"Created directory: {DATA_DIR}") + +def update_users_db(): + """Update or create the users database file.""" + # Define demo users + users = { + '1': { + 'id': '1', + 'username': 'johndoe', + 'password_hash': generate_password_hash('password123'), + 'name': 'John Doe', + 'email': 'john@example.com', + 'phone': '+1234567890', + 'language': 'en-US' + }, + '2': { + 'id': '2', + 'username': 'janesmith', + 'password_hash': generate_password_hash('password456'), + 'name': 'Jane Smith', + 'email': 'jane@example.com', + 'phone': '+0987654321', + 'language': 'hi-IN' + }, + '3': { + 'id': '3', + 'username': 'jacobbrown', + 'password_hash': generate_password_hash('password789'), + 'name': 'Jacob Brown', + 'email': 'jacob@example.com', + 'phone': '+1122334455', + 'language': 'ta-IN' + } + } + + # Write to file + with open(USERS_DB_PATH, 'w') as f: + json.dump(users, f, indent=2) + + print(f"Updated users database: {USERS_DB_PATH}") + return users + +def update_mock_db(): + """Update or create the mock banking database file.""" + # Define mock banking data + mock_db = { + 'users': { + '1': { + 'id': '1', + 'name': 'John Doe', + 'accounts': { + 'savings': { + 'account_id': 'SAV12345', + 'balance': 5000.00, + 'currency': 'USD' + }, + 'checking': { + 'account_id': 'CHK67890', + 'balance': 1200.50, + 'currency': 'USD' + } + }, + 'transactions': generate_mock_transactions('1') + }, + '2': { + 'id': '2', + 'name': 'Jane Smith', + 'accounts': { + 'savings': { + 'account_id': 'SAV54321', + 'balance': 8500.75, + 'currency': 'USD' + } + }, + 'transactions': generate_mock_transactions('2') + }, + '3': { + 'id': '3', + 'name': 'Jacob Brown', + 'accounts': { + 'savings': { + 'account_id': 'SAV98765', + 'balance': 3200.50, + 'currency': 'USD' + }, + 'investment': { + 'account_id': 'INV12345', + 'balance': 10000.00, + 'currency': 'USD' + } + }, + 'transactions': generate_mock_transactions('3') + } + } + } + + # Write to file + with open(MOCK_DB_PATH, 'w') as f: + json.dump(mock_db, f, indent=2) + + print(f"Updated mock database: {MOCK_DB_PATH}") + +def main(): + """Main function to update all database files.""" + try: + print("Starting database update...") + + # Ensure data directory exists + ensure_data_directory() + + # Update users database + update_users_db() + + # Update mock banking database + update_mock_db() + + print("Database update completed successfully!") + + except Exception as e: + print(f"Error updating databases: {str(e)}") + return 1 + + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/voice_driven_banking/voice_banking_test_suite.py b/voice_driven_banking/voice_banking_test_suite.py deleted file mode 100644 index 5f327f84..00000000 --- a/voice_driven_banking/voice_banking_test_suite.py +++ /dev/null @@ -1,209 +0,0 @@ -import os -import json -import time -from voice_simulator import VoiceCommandSimulator -from selenium_automation import VoiceBankingAutomation - -class VoiceBankingTestSuite: - """ - Integrates the voice simulator with the Selenium automation to create - a complete test suite for voice-driven banking applications. - """ - - def __init__(self, config_file=None): - # Load configuration from file or use defaults - self.config = self._load_config(config_file) - - # Initialize the voice simulator - self.voice_simulator = VoiceCommandSimulator( - language=self.config.get("language", "en-US"), - confidence_range=tuple(self.config.get("confidence_range", (0.85, 0.98))) - ) - - # Initialize the Selenium automation - self.automation = VoiceBankingAutomation( - download_dir=self.config.get("download_dir", "test_results") - ) - - # Initialize test results storage - self.results = { - "test_run_id": int(time.time()), - "date": time.strftime("%Y-%m-%d %H:%M:%S"), - "config": self.config, - "tests": [] - } - - def _load_config(self, config_file): - """Load configuration from a JSON file or return defaults""" - default_config = { - "banking_url": "https://demo.mifos.io", - "username": "mifos", - "password": "password", - "language": "en-US", - "confidence_range": [0.85, 0.98], - "download_dir": "test_results", - "commands": [ - { - "name": "balance_inquiry", - "voice_command": "What is my account balance", - "variations": [ - "Check my balance", - "Show me my current balance", - "How much money do I have" - ] - }, - { - "name": "fund_transfer", - "voice_command": "Transfer 50 dollars to John Doe", - "variations": [ - "Send 50 dollars to John", - "Pay John Doe 50 dollars", - "Move 50 dollars to John's account" - ] - }, - { - "name": "transaction_history", - "voice_command": "Show my recent transactions", - "variations": [ - "List my recent transactions", - "Show transaction history", - "What are my recent transactions" - ] - } - ] - } - - if config_file and os.path.exists(config_file): - try: - with open(config_file, 'r') as f: - user_config = json.load(f) - # Merge user config with defaults - for key, value in user_config.items(): - default_config[key] = value - except Exception as e: - print(f"Error loading config file: {e}") - print("Using default configuration") - - return default_config - - def run_tests(self): - """Run the complete test suite""" - try: - # Start the browser and navigate to the banking interface - self.automation.start_driver() - - # Navigate to banking website - if not self.automation.navigate_to_banking_interface(self.config["banking_url"]): - raise Exception("Failed to navigate to banking interface") - - # Login - if not self.automation.login(self.config["username"], self.config["password"]): - raise Exception("Failed to login to banking interface") - - # Run tests for each command - for command_config in self.config["commands"]: - command_name = command_config["name"] - print(f"\nTesting command: {command_name}") - - # Test the primary command - self._test_command(command_name, command_config["voice_command"]) - - # Test variations if enabled - if self.config.get("test_variations", True) and "variations" in command_config: - for i, variation in enumerate(command_config["variations"]): - print(f"Testing variation {i+1}") - self._test_command(f"{command_name}_variation_{i+1}", variation) - - # Save the results - self._save_results() - - return self.results - - except Exception as e: - print(f"Error running tests: {e}") - self.results["error"] = str(e) - self._save_results() - return self.results - - finally: - # Clean up - self.automation.close() - - def _test_command(self, command_name, command_text): - """Test a single voice command and record results""" - # Simulate voice command - print(f"Simulating voice command: '{command_text}'") - voice_result = self.voice_simulator.simulate_command(command_text) - - # Extract the recognized text - recognized_text = voice_result["results"][0]["alternatives"][0]["transcript"] - - # Find the matching command type - command_type = None - for cmd_config in self.config["commands"]: - if command_name.startswith(cmd_config["name"]): - command_type = cmd_config["name"] - break - - if not command_type: - print(f"Warning: Could not identify command type for '{command_name}'") - command_type = "unknown" - - # Execute the command in the banking interface - result = self.automation.execute_voice_command(command_type) - - # Record the test result - test_result = { - "command_name": command_name, - "original_command": command_text, - "recognized_command": recognized_text, - "confidence": voice_result["results"][0]["alternatives"][0]["confidence"], - "success": result, - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S") - } - - self.results["tests"].append(test_result) - - # Print result - status = "✅ PASSED" if result else "❌ FAILED" - print(f"{status} - Original: '{command_text}', Recognized: '{recognized_text}'") - - return result - - def _save_results(self): - """Save test results to a JSON file""" - # Calculate summary statistics - total_tests = len(self.results["tests"]) - successful_tests = sum(1 for test in self.results["tests"] if test["success"]) - - self.results["summary"] = { - "total_tests": total_tests, - "successful_tests": successful_tests, - "success_rate": f"{(successful_tests / total_tests) * 100:.2f}%" if total_tests > 0 else "0%" - } - - # Create output directory if it doesn't exist - output_dir = self.config.get("download_dir", "test_results") - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - # Save to file - output_path = os.path.join(output_dir, f"voice_test_results_{self.results['test_run_id']}.json") - with open(output_path, 'w') as f: - json.dump(self.results, f, indent=4) - - print(f"\nTest results saved to: {output_path}") - - # Generate summary report - print("\n==== Test Summary ====") - print(f"Total Tests: {total_tests}") - print(f"Successful Tests: {successful_tests}") - print(f"Success Rate: {self.results['summary']['success_rate']}") - -def main(): - # Run the test suite - test_suite = VoiceBankingTestSuite() - test_suite.run_tests() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/voice_driven_banking/voice_simulator.py b/voice_driven_banking/voice_simulator.py deleted file mode 100644 index ee05acdd..00000000 --- a/voice_driven_banking/voice_simulator.py +++ /dev/null @@ -1,108 +0,0 @@ -import json -import random -import time - -class VoiceCommandSimulator: - """ - Simulates voice commands by generating appropriately formatted - text that mimics what would come from a speech-to-text system. - """ - - def __init__(self, language="en-US", confidence_range=(0.85, 0.98)): - self.language = language - self.confidence_range = confidence_range - - # Common words that might be misheard - self.sound_alikes = { - "balance": ["balance", "ballance", "valance"], - "account": ["account", "a count", "a mount"], - "transfer": ["transfer", "transfers", "transferred"], - "fifty": ["fifty", "15", "15 t", "fifth"], - "dollars": ["dollars", "dollar", "dollers"], - "recent": ["recent", "resent", "reason"], - "transactions": ["transactions", "transaction", "trans actions"] - } - - def generate_voice_result(self, command_text): - """ - Generate a simulated voice recognition result from a command text - Returns a dict similar to what a speech-to-text API would return - """ - # Add some randomness to simulate actual speech recognition - words = command_text.split() - processed_words = [] - - for word in words: - # Check if this word has common sound-alikes - if word.lower() in self.sound_alikes: - # 20% chance to use a sound-alike instead - if random.random() < 0.2: - processed_words.append(random.choice(self.sound_alikes[word.lower()])) - else: - processed_words.append(word) - else: - processed_words.append(word) - - # Join words back into a string - recognized_text = " ".join(processed_words) - - # Generate a confidence score within the specified range - confidence = random.uniform(self.confidence_range[0], self.confidence_range[1]) - - # Create a result object similar to what speech-to-text APIs return - result = { - "results": [ - { - "alternatives": [ - { - "transcript": recognized_text, - "confidence": confidence - } - ], - "is_final": True - } - ], - "language": self.language, - "processing_time_ms": random.randint(100, 500) - } - - return result - - def simulate_command(self, command_text): - """ - Simulate the process of speaking a command, with realistic timing - Returns the simulated recognition result - """ - # Simulate the time it takes to speak the command - speaking_time = 0.1 * len(command_text.split()) # ~100ms per word - time.sleep(speaking_time) - - # Simulate processing delay - processing_delay = random.uniform(0.2, 0.8) # 200-800ms - time.sleep(processing_delay) - - # Generate and return the result - return self.generate_voice_result(command_text) - -def test_simulator(): - """Test the voice command simulator with sample commands""" - simulator = VoiceCommandSimulator() - - # Sample commands - commands = [ - "What is my account balance", - "Transfer 50 dollars to John Doe", - "Show my recent transactions" - ] - - # Simulate each command and print the result - for command in commands: - print(f"Original command: '{command}'") - result = simulator.simulate_command(command) - print(f"Simulated result: '{result['results'][0]['alternatives'][0]['transcript']}'") - print(f"Confidence: {result['results'][0]['alternatives'][0]['confidence']:.2f}") - print(f"Processing time: {result['processing_time_ms']}ms") - print("-" * 50) - -if __name__ == "__main__": - test_simulator() \ No newline at end of file