ymemo/.env.example at main · dev-wei/ymemo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# ======================================================================
# YMemo Environment Configuration Template
# ======================================================================
#
# This file contains all the environment variables that YMemo supports.
# Copy this file to `.env` and update the values with your actual credentials.
#
# WARNING: Never commit actual credentials to version control!
#
# Usage:
#   1. Copy this file: cp .env.example .env
#   2. Edit .env with your actual values
#   3. Ensure .env is in your .gitignore file
#
# ======================================================================

# ======================================================================
# PROVIDER SELECTION
# ======================================================================

# Primary transcription provider to use
# Options: aws, azure, whisper, google
# Default: aws
TRANSCRIPTION_PROVIDER=aws

# Audio capture provider
# Options: pyaudio, file
# Default: pyaudio
CAPTURE_PROVIDER=pyaudio

# ======================================================================
# AUDIO CONFIGURATION
# ======================================================================

# Audio quality preset (preferred over AUDIO_SAMPLE_RATE)
# Options: high (44,100 Hz CD-quality), average (16,000 Hz speech-optimized)
# Default: not set (uses AUDIO_SAMPLE_RATE instead)
# AUDIO_QUALITY=high

# Audio sample rate in Hz (overridden by AUDIO_QUALITY if set)
# Common values: 16000, 44100, 48000
# Default: 16000
AUDIO_SAMPLE_RATE=16000

# Audio chunk size for processing
# Default: 1024
AUDIO_CHUNK_SIZE=1024

# Audio format
# Options: int16, int24, int32, float32
# Default: int16
AUDIO_FORMAT=int16

# ======================================================================
# AWS TRANSCRIBE CONFIGURATION
# ======================================================================

# AWS Credentials (Required for AWS provider)
# Get these from AWS IAM console: https://console.aws.amazon.com/iam/
AWS_ACCESS_KEY_ID=your_aws_access_key_here
AWS_SECRET_ACCESS_KEY=your_aws_secret_key_here

# AWS Region for Transcribe service
# Common regions: us-east-1, us-west-2, eu-west-1, ap-southeast-2
# Default: us-east-1
AWS_REGION=us-east-1

# AWS Profile name (alternative to access keys)
# Use if you have AWS CLI configured with named profiles
# AWS_PROFILE=your_profile_name

# Language code for transcription
# Examples: en-US, es-ES, fr-FR, de-DE, ja-JP, zh-CN
# Default: en-US
AWS_LANGUAGE_CODE=en-US

# Maximum number of speakers for diarization
# Range: 2-10
# Default: 10
AWS_MAX_SPEAKERS=10

# AWS connection strategy (DEPRECATED - auto-detected based on device channels)
# Connection strategy is now automatically determined:
# - 1 channel device → single AWS connection
# - 2+ channel device → dual AWS connections for optimal transcription
# This setting is ignored and will be removed in a future version.
# AWS_CONNECTION_STRATEGY=auto

# Enable dual connection fallback
# Default: true
AWS_DUAL_FALLBACK_ENABLED=true

# Channel balance threshold for dual mode fallback
# Range: 0.0-1.0 (0.3 means 30% imbalance triggers fallback)
# Default: 0.3
AWS_CHANNEL_BALANCE_THRESHOLD=0.3

# Dual connection test mode (for development)
# Default: false
AWS_DUAL_CONNECTION_TEST_MODE=false

# ======================================================================
# AZURE SPEECH SERVICE CONFIGURATION
# ======================================================================

# Azure Speech API Key (Required for Azure provider)
# Get from Azure Portal > Cognitive Services > Speech Service
AZURE_SPEECH_KEY=your_azure_speech_key_here

# Azure Speech Service Region
# Examples: eastus, westus2, westeurope, eastasia
# Default: eastus
AZURE_SPEECH_REGION=eastus

# Azure Speech Language
# Examples: en-US, es-ES, fr-FR, de-DE, ja-JP, zh-CN
# Default: en-US
AZURE_SPEECH_LANGUAGE=en-US

# Custom Azure Speech Service Endpoint (optional)
# Use if you have a custom speech endpoint
# AZURE_SPEECH_ENDPOINT=https://your-custom-endpoint.cognitiveservices.azure.com/

# Enable speaker diarization for Azure
# Default: false
AZURE_ENABLE_SPEAKER_DIARIZATION=false

# Maximum speakers for Azure diarization
# Range: 1-4
# Default: 4
AZURE_MAX_SPEAKERS=4

# Azure Speech Service timeout (seconds)
# Default: 30
AZURE_SPEECH_TIMEOUT=30

# ======================================================================
# OPENAI WHISPER CONFIGURATION (Future Provider)
# ======================================================================

# Whisper model size
# Options: tiny, base, small, medium, large
# Default: base
WHISPER_MODEL_SIZE=base

# Device for Whisper processing
# Options: auto, cpu, cuda
# Default: auto
WHISPER_DEVICE=auto

# ======================================================================
# GOOGLE CLOUD SPEECH CONFIGURATION (Future Provider)
# ======================================================================

# Path to Google Cloud service account credentials JSON
# GOOGLE_CREDENTIALS_PATH=/path/to/your/service-account-key.json

# ======================================================================
# PERFORMANCE & BEHAVIOR SETTINGS
# ======================================================================

# Maximum acceptable latency in milliseconds
# Default: 300
MAX_LATENCY_MS=300

# Enable partial results during transcription
# Default: true
ENABLE_PARTIAL_RESULTS=true

# How to handle partial results
# Options: replace, append, final_only
# Default: replace
PARTIAL_RESULT_HANDLING=replace

# Timeout for partial results (seconds)
# Default: 2.0
PARTIAL_RESULT_TIMEOUT=2.0

# Confidence threshold for accepting results
# Range: 0.0-1.0 (0.0 accepts all results)
# Default: 0.0
CONFIDENCE_THRESHOLD=0.0

# ======================================================================
# AUDIO SAVING (Provider-Agnostic)
# ======================================================================

# Enable raw audio saving to WAV file
# Works with all transcription providers (AWS, Azure, Whisper, etc.)
# Default: false
SAVE_RAW_AUDIO=false

# Enable split audio saving (save left/right channels separately for stereo input)
# Works with all transcription providers - saves separate L/R channel files alongside main file
# Only applies when stereo input device is selected
# Default: false
SAVE_SPLIT_AUDIO=false

# Directory path to save audio files
# Default: debug_audio/
AUDIO_SAVE_PATH=debug_audio/

# Maximum recording duration to save (seconds)
# Default: 30
AUDIO_SAVE_DURATION=30

# ======================================================================
# DATABASE CONFIGURATION (Optional)
# ======================================================================

# PostgreSQL connection URL for database storage
# Format: postgresql://user:password@host:port/database
# Example: postgresql://postgres:mypassword@localhost:5432/ymemo
POSTGRES_URL=postgresql://user:password@host:5432/database_name

# ======================================================================
# LOGGING & DEBUGGING
# ======================================================================

# Logging level
# Options: DEBUG, INFO, WARNING, ERROR, CRITICAL
# Default: INFO
LOG_LEVEL=INFO

# Test log level (used during development)
# Options: DEBUG, INFO, WARNING, ERROR, CRITICAL
# Default: WARNING
TEST_LOG_LEVEL=WARNING

# ======================================================================
# DEVELOPMENT & TESTING (Optional)
# ======================================================================

# Skip AWS validation during development
# Default: false
SKIP_AWS_VALIDATION=false

# Enable service mocking for testing
# Default: false
MOCK_SERVICES=false

# Enable testing mode
# Default: false
TESTING=false

# CI environment indicator (set automatically by CI systems)
# CI=true

# pytest environment indicator (set automatically by pytest)
# PYTEST_RUNNING=true

# Current pytest test (set automatically by pytest)
# PYTEST_CURRENT_TEST=test_name

# ======================================================================
# USAGE EXAMPLES
# ======================================================================

# Example 1: AWS Transcribe with English transcription
# TRANSCRIPTION_PROVIDER=aws
# AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
# AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
# AWS_REGION=us-east-1
# AWS_LANGUAGE_CODE=en-US
# ENABLE_SPEAKER_DIARIZATION=true

# Example 2: Azure Speech Service with Spanish transcription
# TRANSCRIPTION_PROVIDER=azure
# AZURE_SPEECH_KEY=your_32_character_azure_key_here
# AZURE_SPEECH_REGION=eastus
# AZURE_SPEECH_LANGUAGE=es-ES
# AZURE_ENABLE_SPEAKER_DIARIZATION=true

# Example 3: Development setup with file input and audio saving
# TRANSCRIPTION_PROVIDER=aws
# CAPTURE_PROVIDER=file
# SAVE_RAW_AUDIO=true
# AUDIO_SAVE_PATH=./recordings/
# AUDIO_SAVE_DURATION=60
# LOG_LEVEL=DEBUG
# SKIP_AWS_VALIDATION=true
# MOCK_SERVICES=true

# Example 4: Azure with stereo audio recording and channel splitting
# TRANSCRIPTION_PROVIDER=azure
# AZURE_SPEECH_KEY=your_azure_key_here
# AZURE_SPEECH_REGION=eastus
# AUDIO_CHANNELS=2
# SAVE_RAW_AUDIO=true
# SAVE_SPLIT_AUDIO=true
# AUDIO_SAVE_PATH=./meeting_recordings/

# Example 5: AWS with stereo device and split audio saving
# TRANSCRIPTION_PROVIDER=aws
# AWS_ACCESS_KEY_ID=your_access_key_here
# AWS_SECRET_ACCESS_KEY=your_secret_key_here
# SAVE_RAW_AUDIO=true
# SAVE_SPLIT_AUDIO=true
# AUDIO_SAVE_PATH=./debug_audio/

# ======================================================================
# SECURITY NOTES
# ======================================================================

# 1. Never commit this file with real credentials to version control
# 2. Keep your .env file in .gitignore
# 3. Use environment-specific .env files (.env.development, .env.production)
# 4. Regularly rotate your API keys and credentials
# 5. Use IAM roles and managed identities when possible
# 6. Monitor your cloud service usage and costs

# ======================================================================
# TROUBLESHOOTING
# ======================================================================

# If you encounter issues:
# 1. Check that all required variables for your provider are set
# 2. Verify your credentials are correct and have necessary permissions
# 3. Ensure your region settings match your service configurations
# 4. Check the YMemo logs for detailed error messages
# 5. Visit the project documentation: https://github.com/dev-wei/ymemo

# For more detailed configuration options, see:
# - src/config/audio_config.py (configuration loading)
# - src/config/provider_config.py (provider configurations)
# - README.md (setup instructions)