diff --git a/.dart_tool/package_config.json b/.dart_tool/package_config.json new file mode 100644 index 0000000..0654053 --- /dev/null +++ b/.dart_tool/package_config.json @@ -0,0 +1,194 @@ +{ + "configVersion": 2, + "packages": [ + { + "name": "args", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/args-2.3.1", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "async", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/async-2.8.2", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "boolean_selector", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/boolean_selector-2.1.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "characters", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/characters-1.2.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "charcode", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/charcode-1.3.1", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "cli_util", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/cli_util-0.3.5", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "clock", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/clock-1.1.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "collection", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/collection-1.16.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "fake_async", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/fake_async-1.3.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "ffi", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/ffi-1.2.1", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "ffigen", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/ffigen-6.0.0", + "packageUri": "lib/", + "languageVersion": "2.17" + }, + { + "name": "file", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/file-6.1.2", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "flutter", + "rootUri": "file:///D:/David/Documents/flutter/packages/flutter", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "flutter_test", + "rootUri": "file:///D:/David/Documents/flutter/packages/flutter_test", + "packageUri": "lib/", + "languageVersion": "2.17" + }, + { + "name": "glob", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/glob-2.1.0", + "packageUri": "lib/", + "languageVersion": "2.15" + }, + { + "name": "logging", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/logging-1.0.2", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "matcher", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/matcher-0.12.11", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "material_color_utilities", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/material_color_utilities-0.1.4", + "packageUri": "lib/", + "languageVersion": "2.13" + }, + { + "name": "meta", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/meta-1.7.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "path", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/path-1.8.1", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "quiver", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/quiver-3.1.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "sky_engine", + "rootUri": "file:///D:/David/Documents/flutter/bin/cache/pkg/sky_engine", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "source_span", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/source_span-1.8.2", + "packageUri": "lib/", + "languageVersion": "2.14" + }, + { + "name": "stack_trace", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/stack_trace-1.10.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "stream_channel", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/stream_channel-2.1.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "string_scanner", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/string_scanner-1.1.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "term_glyph", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/term_glyph-1.2.0", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "test_api", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/test_api-0.4.9", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "vector_math", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/vector_math-2.1.2", + "packageUri": "lib/", + "languageVersion": "2.14" + }, + { + "name": "yaml", + "rootUri": "file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/yaml-3.1.1", + "packageUri": "lib/", + "languageVersion": "2.12" + }, + { + "name": "deepspeech_flutter", + "rootUri": "../", + "packageUri": "lib/", + "languageVersion": "2.12" + } + ], + "generated": "2022-07-24T21:02:29.347950Z", + "generator": "pub", + "generatorVersion": "2.17.5" +} diff --git a/.dart_tool/package_config_subset b/.dart_tool/package_config_subset new file mode 100644 index 0000000..0cb41f6 --- /dev/null +++ b/.dart_tool/package_config_subset @@ -0,0 +1,125 @@ +deepspeech_flutter +2.12 +file:///D:/David/Documents/Proyectos%20David/mozilla-deepspeech-flutter/ +file:///D:/David/Documents/Proyectos%20David/mozilla-deepspeech-flutter/lib/ +args +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/args-2.3.1/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/args-2.3.1/lib/ +async +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/async-2.8.2/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/async-2.8.2/lib/ +boolean_selector +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/boolean_selector-2.1.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/boolean_selector-2.1.0/lib/ +characters +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/characters-1.2.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/characters-1.2.0/lib/ +charcode +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/charcode-1.3.1/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/charcode-1.3.1/lib/ +cli_util +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/cli_util-0.3.5/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/cli_util-0.3.5/lib/ +clock +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/clock-1.1.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/clock-1.1.0/lib/ +collection +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/collection-1.16.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/collection-1.16.0/lib/ +fake_async +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/fake_async-1.3.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/fake_async-1.3.0/lib/ +ffi +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/ffi-1.2.1/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/ffi-1.2.1/lib/ +ffigen +2.17 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/ffigen-6.0.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/ffigen-6.0.0/lib/ +file +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/file-6.1.2/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/file-6.1.2/lib/ +glob +2.15 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/glob-2.1.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/glob-2.1.0/lib/ +logging +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/logging-1.0.2/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/logging-1.0.2/lib/ +matcher +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/matcher-0.12.11/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/matcher-0.12.11/lib/ +material_color_utilities +2.13 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/material_color_utilities-0.1.4/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/material_color_utilities-0.1.4/lib/ +meta +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/meta-1.7.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/meta-1.7.0/lib/ +path +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/path-1.8.1/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/path-1.8.1/lib/ +quiver +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/quiver-3.1.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/quiver-3.1.0/lib/ +source_span +2.14 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/source_span-1.8.2/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/source_span-1.8.2/lib/ +stack_trace +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/stack_trace-1.10.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/stack_trace-1.10.0/lib/ +stream_channel +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/stream_channel-2.1.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/stream_channel-2.1.0/lib/ +string_scanner +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/string_scanner-1.1.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/string_scanner-1.1.0/lib/ +term_glyph +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/term_glyph-1.2.0/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/term_glyph-1.2.0/lib/ +test_api +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/test_api-0.4.9/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/test_api-0.4.9/lib/ +vector_math +2.14 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/vector_math-2.1.2/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/vector_math-2.1.2/lib/ +yaml +2.12 +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/yaml-3.1.1/ +file:///D:/David/Documents/flutter/.pub-cache/hosted/pub.dartlang.org/yaml-3.1.1/lib/ +sky_engine +2.12 +file:///D:/David/Documents/flutter/bin/cache/pkg/sky_engine/ +file:///D:/David/Documents/flutter/bin/cache/pkg/sky_engine/lib/ +flutter +2.12 +file:///D:/David/Documents/flutter/packages/flutter/ +file:///D:/David/Documents/flutter/packages/flutter/lib/ +flutter_test +2.17 +file:///D:/David/Documents/flutter/packages/flutter_test/ +file:///D:/David/Documents/flutter/packages/flutter_test/lib/ +2 diff --git a/.dart_tool/version b/.dart_tool/version new file mode 100644 index 0000000..b38ebbf --- /dev/null +++ b/.dart_tool/version @@ -0,0 +1 @@ +3.0.4 \ No newline at end of file diff --git a/deepspeech_flutter/example/pubspec.lock b/deepspeech_flutter/example/pubspec.lock index af61304..4daa87a 100644 --- a/deepspeech_flutter/example/pubspec.lock +++ b/deepspeech_flutter/example/pubspec.lock @@ -42,7 +42,7 @@ packages: name: collection url: "https://pub.dartlang.org" source: hosted - version: "1.15.0" + version: "1.16.0" cupertino_icons: dependency: "direct main" description: @@ -63,14 +63,14 @@ packages: name: fake_async url: "https://pub.dartlang.org" source: hosted - version: "1.2.0" + version: "1.3.0" ffi: dependency: transitive description: name: ffi url: "https://pub.dartlang.org" source: hosted - version: "1.2.1" + version: "2.0.1" file: dependency: transitive description: @@ -101,7 +101,7 @@ packages: name: material_color_utilities url: "https://pub.dartlang.org" source: hosted - version: "0.1.3" + version: "0.1.4" meta: dependency: transitive description: @@ -115,7 +115,7 @@ packages: name: path url: "https://pub.dartlang.org" source: hosted - version: "1.8.0" + version: "1.8.1" path_provider: dependency: "direct main" description: @@ -143,7 +143,7 @@ packages: name: path_provider_linux url: "https://pub.dartlang.org" source: hosted - version: "2.1.6" + version: "2.1.7" path_provider_macos: dependency: transitive description: @@ -164,7 +164,7 @@ packages: name: path_provider_windows url: "https://pub.dartlang.org" source: hosted - version: "2.0.6" + version: "2.1.0" platform: dependency: transitive description: @@ -197,7 +197,7 @@ packages: name: source_span url: "https://pub.dartlang.org" source: hosted - version: "1.8.1" + version: "1.8.2" stack_trace: dependency: transitive description: @@ -232,28 +232,21 @@ packages: name: test_api url: "https://pub.dartlang.org" source: hosted - version: "0.4.8" - typed_data: - dependency: transitive - description: - name: typed_data - url: "https://pub.dartlang.org" - source: hosted - version: "1.3.0" + version: "0.4.9" vector_math: dependency: transitive description: name: vector_math url: "https://pub.dartlang.org" source: hosted - version: "2.1.1" + version: "2.1.2" win32: dependency: transitive description: name: win32 url: "https://pub.dartlang.org" source: hosted - version: "2.5.2" + version: "2.7.0" xdg_directories: dependency: transitive description: @@ -262,5 +255,5 @@ packages: source: hosted version: "0.2.0+1" sdks: - dart: ">=2.15.0 <3.0.0" - flutter: ">=2.8.1" + dart: ">=2.17.0 <3.0.0" + flutter: ">=3.0.0" diff --git a/deepspeech_flutter/lib/deepspeech_flutter.dart b/deepspeech_flutter/lib/deepspeech_flutter.dart index b64ff25..3a49f7f 100644 --- a/deepspeech_flutter/lib/deepspeech_flutter.dart +++ b/deepspeech_flutter/lib/deepspeech_flutter.dart @@ -1,120 +1,640 @@ -import 'dart:ffi'; -import 'dart:io'; -import 'dart:typed_data'; - -import 'package:ffi/ffi.dart'; - -typedef DSVersion = Pointer Function(); -typedef DSNativeFreeStr = Void Function(Pointer); -typedef DSFreeStr = void Function(Pointer); -typedef CreateModel = Pointer Function(Pointer); -typedef NativeFreeModel = Void Function(Pointer); -typedef FreeModel = void Function(Pointer); -typedef NativeModelSampleRate = Uint64 Function(Pointer); -typedef ModelSampleRate = int Function(Pointer); -typedef NativeSpeechToText = Pointer Function(Pointer, Pointer, Uint64); -typedef SpeechToText = Pointer Function(Pointer, Pointer, int); -typedef NativeEnableScorer = Int32 Function(Pointer, Pointer); -typedef EnableScorer = int Function(Pointer, Pointer); -typedef NativeDisableScorer = Int32 Function(Pointer); -typedef DisableScorer = int Function(Pointer); -typedef NativeSetScorerAlphaBeta = Int32 Function(Pointer, Float, Float); -typedef SetScorerAlphaBeta = int Function(Pointer, double, double); +// AUTO GENERATED FILE, DO NOT EDIT. +// +// Generated by `package:ffigen`. +import 'dart:ffi' as ffi; -class DeepspeechFlutter { - factory DeepspeechFlutter() => _instance; - static final DeepspeechFlutter _instance = DeepspeechFlutter._internal(); +/// Dart bindings of the Mozilla Deepspeech library +class deepspech_FFI { + /// Holds the symbol lookup function. + final ffi.Pointer Function(String symbolName) + _lookup; - DeepspeechFlutter._internal() { - _deepspeech = Platform.isAndroid ? DynamicLibrary.open("libdeepspeechlibc.so") : DynamicLibrary.process(); + /// The symbols are looked up in [dynamicLibrary]. + deepspech_FFI(ffi.DynamicLibrary dynamicLibrary) + : _lookup = dynamicLibrary.lookup; - _dsVersion = _deepspeech.lookupFunction('deepspeech_verison'); - _dsFreeStr = _deepspeech.lookupFunction('deepspeech_free_str'); - _dsCreateModel = _deepspeech.lookupFunction('create_model'); - _dsFreeModel = _deepspeech.lookupFunction('free_model'); - _dsModelSampleRate = _deepspeech.lookupFunction('model_sample_rate'); - _dsSpeechToText = _deepspeech.lookupFunction('speech_to_text'); - _dsEnableScorer = _deepspeech.lookupFunction('enable_external_scorer'); - _dsDisableScorer = _deepspeech.lookupFunction('disable_external_scorer'); - _dsSetScorerAlphaBeta = - _deepspeech.lookupFunction('set_scorer_alpha_beta'); + /// The symbols are looked up with [lookup]. + deepspech_FFI.fromLookup( + ffi.Pointer Function(String symbolName) + lookup) + : _lookup = lookup; + + /// @brief An object providing an interface to a trained DeepSpeech model. + /// + /// @param aModelPath The path to the frozen model graph. + /// @param[out] retval a ModelState pointer + /// + /// @return Zero on success, non-zero on failure. + int DS_CreateModel( + ffi.Pointer aModelPath, + ffi.Pointer> retval, + ) { + return _DS_CreateModel( + aModelPath, + retval, + ); + } + + late final _DS_CreateModelPtr = _lookup< + ffi.NativeFunction< + ffi.Int Function(ffi.Pointer, + ffi.Pointer>)>>('DS_CreateModel'); + late final _DS_CreateModel = _DS_CreateModelPtr.asFunction< + int Function( + ffi.Pointer, ffi.Pointer>)>(); + + /// @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + /// was not called before, will return the default value loaded from the + /// model file. + /// + /// @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + /// + /// @return Beam width value used by the model. + int DS_GetModelBeamWidth( + ffi.Pointer aCtx, + ) { + return _DS_GetModelBeamWidth( + aCtx, + ); + } + + late final _DS_GetModelBeamWidthPtr = _lookup< + ffi.NativeFunction< + ffi.UnsignedInt Function( + ffi.Pointer)>>('DS_GetModelBeamWidth'); + late final _DS_GetModelBeamWidth = _DS_GetModelBeamWidthPtr.asFunction< + int Function(ffi.Pointer)>(); + + /// @brief Set beam width value used by the model. + /// + /// @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + /// @param aBeamWidth The beam width used by the model. A larger beam width value + /// generates better results at the cost of decoding time. + /// + /// @return Zero on success, non-zero on failure. + int DS_SetModelBeamWidth( + ffi.Pointer aCtx, + int aBeamWidth, + ) { + return _DS_SetModelBeamWidth( + aCtx, + aBeamWidth, + ); + } + + late final _DS_SetModelBeamWidthPtr = _lookup< + ffi.NativeFunction< + ffi.Int Function(ffi.Pointer, + ffi.UnsignedInt)>>('DS_SetModelBeamWidth'); + late final _DS_SetModelBeamWidth = _DS_SetModelBeamWidthPtr.asFunction< + int Function(ffi.Pointer, int)>(); + + /// @brief Return the sample rate expected by a model. + /// + /// @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + /// + /// @return Sample rate expected by the model for its input. + int DS_GetModelSampleRate( + ffi.Pointer aCtx, + ) { + return _DS_GetModelSampleRate( + aCtx, + ); + } + + late final _DS_GetModelSampleRatePtr = + _lookup)>>( + 'DS_GetModelSampleRate'); + late final _DS_GetModelSampleRate = _DS_GetModelSampleRatePtr.asFunction< + int Function(ffi.Pointer)>(); + + /// @brief Frees associated resources and destroys model object. + void DS_FreeModel( + ffi.Pointer ctx, + ) { + return _DS_FreeModel( + ctx, + ); + } + + late final _DS_FreeModelPtr = + _lookup)>>( + 'DS_FreeModel'); + late final _DS_FreeModel = + _DS_FreeModelPtr.asFunction)>(); + + /// @brief Enable decoding using an external scorer. + /// + /// @param aCtx The ModelState pointer for the model being changed. + /// @param aScorerPath The path to the external scorer file. + /// + /// @return Zero on success, non-zero on failure (invalid arguments). + int DS_EnableExternalScorer( + ffi.Pointer aCtx, + ffi.Pointer aScorerPath, + ) { + return _DS_EnableExternalScorer( + aCtx, + aScorerPath, + ); + } + + late final _DS_EnableExternalScorerPtr = _lookup< + ffi.NativeFunction< + ffi.Int Function(ffi.Pointer, + ffi.Pointer)>>('DS_EnableExternalScorer'); + late final _DS_EnableExternalScorer = _DS_EnableExternalScorerPtr.asFunction< + int Function(ffi.Pointer, ffi.Pointer)>(); + + /// @brief Add a hot-word and its boost. + /// + /// @param aCtx The ModelState pointer for the model being changed. + /// @param word The hot-word. + /// @param boost The boost. + /// + /// @return Zero on success, non-zero on failure (invalid arguments). + int DS_AddHotWord( + ffi.Pointer aCtx, + ffi.Pointer word, + double boost, + ) { + return _DS_AddHotWord( + aCtx, + word, + boost, + ); + } + + late final _DS_AddHotWordPtr = _lookup< + ffi.NativeFunction< + ffi.Int Function(ffi.Pointer, ffi.Pointer, + ffi.Float)>>('DS_AddHotWord'); + late final _DS_AddHotWord = _DS_AddHotWordPtr.asFunction< + int Function(ffi.Pointer, ffi.Pointer, double)>(); + + /// @brief Remove entry for a hot-word from the hot-words map. + /// + /// @param aCtx The ModelState pointer for the model being changed. + /// @param word The hot-word. + /// + /// @return Zero on success, non-zero on failure (invalid arguments). + int DS_EraseHotWord( + ffi.Pointer aCtx, + ffi.Pointer word, + ) { + return _DS_EraseHotWord( + aCtx, + word, + ); + } + + late final _DS_EraseHotWordPtr = _lookup< + ffi.NativeFunction< + ffi.Int Function(ffi.Pointer, + ffi.Pointer)>>('DS_EraseHotWord'); + late final _DS_EraseHotWord = _DS_EraseHotWordPtr.asFunction< + int Function(ffi.Pointer, ffi.Pointer)>(); + + /// @brief Removes all elements from the hot-words map. + /// + /// @param aCtx The ModelState pointer for the model being changed. + /// + /// @return Zero on success, non-zero on failure (invalid arguments). + int DS_ClearHotWords( + ffi.Pointer aCtx, + ) { + return _DS_ClearHotWords( + aCtx, + ); + } + + late final _DS_ClearHotWordsPtr = + _lookup)>>( + 'DS_ClearHotWords'); + late final _DS_ClearHotWords = + _DS_ClearHotWordsPtr.asFunction)>(); + + /// @brief Disable decoding using an external scorer. + /// + /// @param aCtx The ModelState pointer for the model being changed. + /// + /// @return Zero on success, non-zero on failure. + int DS_DisableExternalScorer( + ffi.Pointer aCtx, + ) { + return _DS_DisableExternalScorer( + aCtx, + ); + } + + late final _DS_DisableExternalScorerPtr = + _lookup)>>( + 'DS_DisableExternalScorer'); + late final _DS_DisableExternalScorer = _DS_DisableExternalScorerPtr + .asFunction)>(); + + /// @brief Set hyperparameters alpha and beta of the external scorer. + /// + /// @param aCtx The ModelState pointer for the model being changed. + /// @param aAlpha The alpha hyperparameter of the decoder. Language model weight. + /// @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight. + /// + /// @return Zero on success, non-zero on failure. + int DS_SetScorerAlphaBeta( + ffi.Pointer aCtx, + double aAlpha, + double aBeta, + ) { + return _DS_SetScorerAlphaBeta( + aCtx, + aAlpha, + aBeta, + ); + } + + late final _DS_SetScorerAlphaBetaPtr = _lookup< + ffi.NativeFunction< + ffi.Int Function(ffi.Pointer, ffi.Float, + ffi.Float)>>('DS_SetScorerAlphaBeta'); + late final _DS_SetScorerAlphaBeta = _DS_SetScorerAlphaBetaPtr.asFunction< + int Function(ffi.Pointer, double, double)>(); + + /// @brief Use the DeepSpeech model to convert speech to text. + /// + /// @param aCtx The ModelState pointer for the model to use. + /// @param aBuffer A 16-bit, mono raw audio signal at the appropriate + /// sample rate (matching what the model was trained on). + /// @param aBufferSize The number of samples in the audio signal. + /// + /// @return The STT result. The user is responsible for freeing the string using + /// {@link DS_FreeString()}. Returns NULL on error. + ffi.Pointer DS_SpeechToText( + ffi.Pointer aCtx, + ffi.Pointer aBuffer, + int aBufferSize, + ) { + return _DS_SpeechToText( + aCtx, + aBuffer, + aBufferSize, + ); + } + + late final _DS_SpeechToTextPtr = _lookup< + ffi.NativeFunction< + ffi.Pointer Function(ffi.Pointer, + ffi.Pointer, ffi.UnsignedInt)>>('DS_SpeechToText'); + late final _DS_SpeechToText = _DS_SpeechToTextPtr.asFunction< + ffi.Pointer Function( + ffi.Pointer, ffi.Pointer, int)>(); + + /// @brief Use the DeepSpeech model to convert speech to text and output results + /// including metadata. + /// + /// @param aCtx The ModelState pointer for the model to use. + /// @param aBuffer A 16-bit, mono raw audio signal at the appropriate + /// sample rate (matching what the model was trained on). + /// @param aBufferSize The number of samples in the audio signal. + /// @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this. + /// + /// @return Metadata struct containing multiple CandidateTranscript structs. Each + /// transcript has per-token metadata including timing information. The + /// user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + /// Returns NULL on error. + ffi.Pointer DS_SpeechToTextWithMetadata( + ffi.Pointer aCtx, + ffi.Pointer aBuffer, + int aBufferSize, + int aNumResults, + ) { + return _DS_SpeechToTextWithMetadata( + aCtx, + aBuffer, + aBufferSize, + aNumResults, + ); + } + + late final _DS_SpeechToTextWithMetadataPtr = _lookup< + ffi.NativeFunction< + ffi.Pointer Function( + ffi.Pointer, + ffi.Pointer, + ffi.UnsignedInt, + ffi.UnsignedInt)>>('DS_SpeechToTextWithMetadata'); + late final _DS_SpeechToTextWithMetadata = + _DS_SpeechToTextWithMetadataPtr.asFunction< + ffi.Pointer Function( + ffi.Pointer, ffi.Pointer, int, int)>(); + + /// @brief Create a new streaming inference state. The streaming state returned + /// by this function can then be passed to {@link DS_FeedAudioContent()} + /// and {@link DS_FinishStream()}. + /// + /// @param aCtx The ModelState pointer for the model to use. + /// @param[out] retval an opaque pointer that represents the streaming state. Can + /// be NULL if an error occurs. + /// + /// @return Zero for success, non-zero on failure. + int DS_CreateStream( + ffi.Pointer aCtx, + ffi.Pointer> retval, + ) { + return _DS_CreateStream( + aCtx, + retval, + ); } - late final DynamicLibrary _deepspeech; + late final _DS_CreateStreamPtr = _lookup< + ffi.NativeFunction< + ffi.Int Function(ffi.Pointer, + ffi.Pointer>)>>('DS_CreateStream'); + late final _DS_CreateStream = _DS_CreateStreamPtr.asFunction< + int Function( + ffi.Pointer, ffi.Pointer>)>(); - // Reference to functions. - late final DSVersion _dsVersion; - late final DSFreeStr _dsFreeStr; - late final CreateModel _dsCreateModel; - late final FreeModel _dsFreeModel; - late final ModelSampleRate _dsModelSampleRate; - late final SpeechToText _dsSpeechToText; - late final EnableScorer _dsEnableScorer; - late final DisableScorer _dsDisableScorer; - late final SetScorerAlphaBeta _dsSetScorerAlphaBeta; + /// @brief Feed audio samples to an ongoing streaming inference. + /// + /// @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + /// @param aBuffer An array of 16-bit, mono raw audio samples at the + /// appropriate sample rate (matching what the model was trained on). + /// @param aBufferSize The number of samples in @p aBuffer. + void DS_FeedAudioContent( + ffi.Pointer aSctx, + ffi.Pointer aBuffer, + int aBufferSize, + ) { + return _DS_FeedAudioContent( + aSctx, + aBuffer, + aBufferSize, + ); + } + + late final _DS_FeedAudioContentPtr = _lookup< + ffi.NativeFunction< + ffi.Void Function(ffi.Pointer, ffi.Pointer, + ffi.UnsignedInt)>>('DS_FeedAudioContent'); + late final _DS_FeedAudioContent = _DS_FeedAudioContentPtr.asFunction< + void Function( + ffi.Pointer, ffi.Pointer, int)>(); + + /// @brief Compute the intermediate decoding of an ongoing streaming inference. + /// + /// @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + /// + /// @return The STT intermediate result. The user is responsible for freeing the + /// string using {@link DS_FreeString()}. + ffi.Pointer DS_IntermediateDecode( + ffi.Pointer aSctx, + ) { + return _DS_IntermediateDecode( + aSctx, + ); + } - // Pointer to loaded model state - Pointer? _modelCtxPointer; + late final _DS_IntermediateDecodePtr = _lookup< + ffi.NativeFunction< + ffi.Pointer Function( + ffi.Pointer)>>('DS_IntermediateDecode'); + late final _DS_IntermediateDecode = _DS_IntermediateDecodePtr.asFunction< + ffi.Pointer Function(ffi.Pointer)>(); - String getVersion() { - Pointer _version = _dsVersion(); - String value = _version.toDartString(); - _dsFreeStr(_version); - return value; + /// @brief Compute the intermediate decoding of an ongoing streaming inference, + /// return results including metadata. + /// + /// @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + /// @param aNumResults The number of candidate transcripts to return. + /// + /// @return Metadata struct containing multiple candidate transcripts. Each transcript + /// has per-token metadata including timing information. The user is + /// responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + /// Returns NULL on error. + ffi.Pointer DS_IntermediateDecodeWithMetadata( + ffi.Pointer aSctx, + int aNumResults, + ) { + return _DS_IntermediateDecodeWithMetadata( + aSctx, + aNumResults, + ); } - void createModel(String modelPath) { - Pointer _modelPath = modelPath.toNativeUtf8(); - _modelCtxPointer = _dsCreateModel(_modelPath); - print('_modelCtxPointer: $_modelCtxPointer'); + late final _DS_IntermediateDecodeWithMetadataPtr = _lookup< + ffi.NativeFunction< + ffi.Pointer Function(ffi.Pointer, + ffi.UnsignedInt)>>('DS_IntermediateDecodeWithMetadata'); + late final _DS_IntermediateDecodeWithMetadata = + _DS_IntermediateDecodeWithMetadataPtr.asFunction< + ffi.Pointer Function(ffi.Pointer, int)>(); + + /// @brief Compute the final decoding of an ongoing streaming inference and return + /// the result. Signals the end of an ongoing streaming inference. + /// + /// @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + /// + /// @return The STT result. The user is responsible for freeing the string using + /// {@link DS_FreeString()}. + /// + /// @note This method will free the state pointer (@p aSctx). + ffi.Pointer DS_FinishStream( + ffi.Pointer aSctx, + ) { + return _DS_FinishStream( + aSctx, + ); } - int getSampleRate() { - if (_modelCtxPointer == null || _modelCtxPointer == nullptr) { - return -1; - } + late final _DS_FinishStreamPtr = _lookup< + ffi.NativeFunction< + ffi.Pointer Function( + ffi.Pointer)>>('DS_FinishStream'); + late final _DS_FinishStream = _DS_FinishStreamPtr.asFunction< + ffi.Pointer Function(ffi.Pointer)>(); - int _sampleRate = _dsModelSampleRate(_modelCtxPointer!); - return _sampleRate; + /// @brief Compute the final decoding of an ongoing streaming inference and return + /// results including metadata. Signals the end of an ongoing streaming + /// inference. + /// + /// @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + /// @param aNumResults The number of candidate transcripts to return. + /// + /// @return Metadata struct containing multiple candidate transcripts. Each transcript + /// has per-token metadata including timing information. The user is + /// responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + /// Returns NULL on error. + /// + /// @note This method will free the state pointer (@p aSctx). + ffi.Pointer DS_FinishStreamWithMetadata( + ffi.Pointer aSctx, + int aNumResults, + ) { + return _DS_FinishStreamWithMetadata( + aSctx, + aNumResults, + ); } - String speechToText(Uint8List samples) { - Pointer samplePointer = calloc.call(samples.length); - for (int index = 0; index < samples.length; index++) { - samplePointer.elementAt(index).value = samples[index]; - } + late final _DS_FinishStreamWithMetadataPtr = _lookup< + ffi.NativeFunction< + ffi.Pointer Function(ffi.Pointer, + ffi.UnsignedInt)>>('DS_FinishStreamWithMetadata'); + late final _DS_FinishStreamWithMetadata = + _DS_FinishStreamWithMetadataPtr.asFunction< + ffi.Pointer Function(ffi.Pointer, int)>(); + + /// @brief Destroy a streaming state without decoding the computed logits. This + /// can be used if you no longer need the result of an ongoing streaming + /// inference and don't want to perform a costly decode operation. + /// + /// @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + /// + /// @note This method will free the state pointer (@p aSctx). + void DS_FreeStream( + ffi.Pointer aSctx, + ) { + return _DS_FreeStream( + aSctx, + ); + } - Pointer _result = _dsSpeechToText(_modelCtxPointer!, samplePointer, samples.length); - malloc.free(samplePointer); + late final _DS_FreeStreamPtr = _lookup< + ffi.NativeFunction)>>( + 'DS_FreeStream'); + late final _DS_FreeStream = _DS_FreeStreamPtr.asFunction< + void Function(ffi.Pointer)>(); - return _result.toDartString(); + /// @brief Free memory allocated for metadata information. + void DS_FreeMetadata( + ffi.Pointer m, + ) { + return _DS_FreeMetadata( + m, + ); } - int enableExternalScorer(String scorerFilePath) { - Pointer _path = scorerFilePath.toNativeUtf8(); - if (_modelCtxPointer == null || _modelCtxPointer == nullptr) { - return -1; - } + late final _DS_FreeMetadataPtr = + _lookup)>>( + 'DS_FreeMetadata'); + late final _DS_FreeMetadata = + _DS_FreeMetadataPtr.asFunction)>(); - int statusCode = _dsEnableScorer(_modelCtxPointer!, _path); - return statusCode; + /// @brief Free a char* string returned by the DeepSpeech API. + void DS_FreeString( + ffi.Pointer str, + ) { + return _DS_FreeString( + str, + ); } - int disableExternalScorer() { - if (_modelCtxPointer == null || _modelCtxPointer == nullptr) { - return -1; - } - - int statusCode = _dsDisableScorer(_modelCtxPointer!); - return statusCode; + late final _DS_FreeStringPtr = + _lookup)>>( + 'DS_FreeString'); + late final _DS_FreeString = + _DS_FreeStringPtr.asFunction)>(); + + /// @brief Returns the version of this library. The returned version is a semantic + /// version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}. + /// + /// @return The version string. + ffi.Pointer DS_Version() { + return _DS_Version(); } - int setScorerAlphaBeta(double alpha, double beta) { - if (_modelCtxPointer == null || _modelCtxPointer == nullptr) { - return -1; - } + late final _DS_VersionPtr = + _lookup Function()>>( + 'DS_Version'); + late final _DS_Version = + _DS_VersionPtr.asFunction Function()>(); - int statusCode = _dsSetScorerAlphaBeta(_modelCtxPointer!, alpha, beta); - return statusCode; + /// @brief Returns a textual description corresponding to an error code. + /// The string returned must be freed with @{link DS_FreeString()}. + /// + /// @return The error description. + ffi.Pointer DS_ErrorCodeToErrorMessage( + int aErrorCode, + ) { + return _DS_ErrorCodeToErrorMessage( + aErrorCode, + ); } + + late final _DS_ErrorCodeToErrorMessagePtr = + _lookup Function(ffi.Int)>>( + 'DS_ErrorCodeToErrorMessage'); + late final _DS_ErrorCodeToErrorMessage = _DS_ErrorCodeToErrorMessagePtr + .asFunction Function(int)>(); +} + +class ModelState extends ffi.Opaque {} + +class StreamingState extends ffi.Opaque {} + +/// @brief Stores text of an individual token, along with its timing information +class TokenMetadata extends ffi.Struct { + /// The text corresponding to this token + external ffi.Pointer text; + + /// Position of the token in units of 20ms + @ffi.UnsignedInt() + external int timestep; + + /// Position of the token in seconds + @ffi.Float() + external double start_time; +} + +/// @brief A single transcript computed by the model, including a confidence +/// value and the metadata for its constituent tokens. +class CandidateTranscript extends ffi.Struct { + /// Array of TokenMetadata objects + external ffi.Pointer tokens; + + /// Size of the tokens array + @ffi.UnsignedInt() + external int num_tokens; + + /// Approximated confidence value for this transcript. This is roughly the + /// sum of the acoustic model logit values for each timestep/character that + /// contributed to the creation of this transcript. + @ffi.Double() + external double confidence; +} + +/// @brief An array of CandidateTranscript objects computed by the model. +class Metadata extends ffi.Struct { + /// Array of CandidateTranscript objects + external ffi.Pointer transcripts; + + /// Size of the transcripts array + @ffi.UnsignedInt() + external int num_transcripts; +} + +abstract class DeepSpeech_Error_Codes { + static const int DS_ERR_OK = 0; + static const int DS_ERR_NO_MODEL = 4096; + static const int DS_ERR_INVALID_ALPHABET = 8192; + static const int DS_ERR_INVALID_SHAPE = 8193; + static const int DS_ERR_INVALID_SCORER = 8194; + static const int DS_ERR_MODEL_INCOMPATIBLE = 8195; + static const int DS_ERR_SCORER_NOT_ENABLED = 8196; + static const int DS_ERR_SCORER_UNREADABLE = 8197; + static const int DS_ERR_SCORER_INVALID_LM = 8198; + static const int DS_ERR_SCORER_NO_TRIE = 8199; + static const int DS_ERR_SCORER_INVALID_TRIE = 8200; + static const int DS_ERR_SCORER_VERSION_MISMATCH = 8201; + static const int DS_ERR_FAIL_INIT_MMAP = 12288; + static const int DS_ERR_FAIL_INIT_SESS = 12289; + static const int DS_ERR_FAIL_INTERPRETER = 12290; + static const int DS_ERR_FAIL_RUN_SESS = 12291; + static const int DS_ERR_FAIL_CREATE_STREAM = 12292; + static const int DS_ERR_FAIL_READ_PROTOBUF = 12293; + static const int DS_ERR_FAIL_CREATE_SESS = 12294; + static const int DS_ERR_FAIL_CREATE_MODEL = 12295; + static const int DS_ERR_FAIL_INSERT_HOTWORD = 12296; + static const int DS_ERR_FAIL_CLEAR_HOTWORD = 12297; + static const int DS_ERR_FAIL_ERASE_HOTWORD = 12304; } diff --git a/deepspeech_flutter/native/headers/deepspeech.h b/deepspeech_flutter/native/headers/deepspeech.h new file mode 100644 index 0000000..35e9289 --- /dev/null +++ b/deepspeech_flutter/native/headers/deepspeech.h @@ -0,0 +1,396 @@ +#ifndef DEEPSPEECH_H +#define DEEPSPEECH_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef SWIG + #if defined _MSC_VER + #define DEEPSPEECH_EXPORT __declspec(dllexport) + #else + #define DEEPSPEECH_EXPORT __attribute__ ((visibility("default"))) + #endif /*End of _MSC_VER*/ +#else + #define DEEPSPEECH_EXPORT +#endif + +typedef struct ModelState ModelState; + +typedef struct StreamingState StreamingState; + +/** + * @brief Stores text of an individual token, along with its timing information + */ +typedef struct TokenMetadata { + /** The text corresponding to this token */ + const char* const text; + + /** Position of the token in units of 20ms */ + const unsigned int timestep; + + /** Position of the token in seconds */ + const float start_time; +} TokenMetadata; + +/** + * @brief A single transcript computed by the model, including a confidence + * value and the metadata for its constituent tokens. + */ +typedef struct CandidateTranscript { + /** Array of TokenMetadata objects */ + const TokenMetadata* const tokens; + /** Size of the tokens array */ + const unsigned int num_tokens; + /** Approximated confidence value for this transcript. This is roughly the + * sum of the acoustic model logit values for each timestep/character that + * contributed to the creation of this transcript. + */ + const double confidence; +} CandidateTranscript; + +/** + * @brief An array of CandidateTranscript objects computed by the model. + */ +typedef struct Metadata { + /** Array of CandidateTranscript objects */ + const CandidateTranscript* const transcripts; + /** Size of the transcripts array */ + const unsigned int num_transcripts; +} Metadata; + +// sphinx-doc: error_code_listing_start + +#define DS_FOR_EACH_ERROR(APPLY) \ + APPLY(DS_ERR_OK, 0x0000, "No error.") \ + APPLY(DS_ERR_NO_MODEL, 0x1000, "Missing model information.") \ + APPLY(DS_ERR_INVALID_ALPHABET, 0x2000, "Invalid alphabet embedded in model. (Data corruption?)") \ + APPLY(DS_ERR_INVALID_SHAPE, 0x2001, "Invalid model shape.") \ + APPLY(DS_ERR_INVALID_SCORER, 0x2002, "Invalid scorer file.") \ + APPLY(DS_ERR_MODEL_INCOMPATIBLE, 0x2003, "Incompatible model.") \ + APPLY(DS_ERR_SCORER_NOT_ENABLED, 0x2004, "External scorer is not enabled.") \ + APPLY(DS_ERR_SCORER_UNREADABLE, 0x2005, "Could not read scorer file.") \ + APPLY(DS_ERR_SCORER_INVALID_LM, 0x2006, "Could not recognize language model header in scorer.") \ + APPLY(DS_ERR_SCORER_NO_TRIE, 0x2007, "Reached end of scorer file before loading vocabulary trie.") \ + APPLY(DS_ERR_SCORER_INVALID_TRIE, 0x2008, "Invalid magic in trie header.") \ + APPLY(DS_ERR_SCORER_VERSION_MISMATCH, 0x2009, "Scorer file version does not match expected version.") \ + APPLY(DS_ERR_FAIL_INIT_MMAP, 0x3000, "Failed to initialize memory mapped model.") \ + APPLY(DS_ERR_FAIL_INIT_SESS, 0x3001, "Failed to initialize the session.") \ + APPLY(DS_ERR_FAIL_INTERPRETER, 0x3002, "Interpreter failed.") \ + APPLY(DS_ERR_FAIL_RUN_SESS, 0x3003, "Failed to run the session.") \ + APPLY(DS_ERR_FAIL_CREATE_STREAM, 0x3004, "Error creating the stream.") \ + APPLY(DS_ERR_FAIL_READ_PROTOBUF, 0x3005, "Error reading the proto buffer model file.") \ + APPLY(DS_ERR_FAIL_CREATE_SESS, 0x3006, "Failed to create session.") \ + APPLY(DS_ERR_FAIL_CREATE_MODEL, 0x3007, "Could not allocate model state.") \ + APPLY(DS_ERR_FAIL_INSERT_HOTWORD, 0x3008, "Could not insert hot-word.") \ + APPLY(DS_ERR_FAIL_CLEAR_HOTWORD, 0x3009, "Could not clear hot-words.") \ + APPLY(DS_ERR_FAIL_ERASE_HOTWORD, 0x3010, "Could not erase hot-word.") + +// sphinx-doc: error_code_listing_end + +enum DeepSpeech_Error_Codes +{ +#define DEFINE(NAME, VALUE, DESC) NAME = VALUE, +DS_FOR_EACH_ERROR(DEFINE) +#undef DEFINE +}; + +/** + * @brief An object providing an interface to a trained DeepSpeech model. + * + * @param aModelPath The path to the frozen model graph. + * @param[out] retval a ModelState pointer + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_CreateModel(const char* aModelPath, + ModelState** retval); + +/** + * @brief Get beam width value used by the model. If {@link DS_SetModelBeamWidth} + * was not called before, will return the default value loaded from the + * model file. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * + * @return Beam width value used by the model. + */ +DEEPSPEECH_EXPORT +unsigned int DS_GetModelBeamWidth(const ModelState* aCtx); + +/** + * @brief Set beam width value used by the model. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * @param aBeamWidth The beam width used by the model. A larger beam width value + * generates better results at the cost of decoding time. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_SetModelBeamWidth(ModelState* aCtx, + unsigned int aBeamWidth); + +/** + * @brief Return the sample rate expected by a model. + * + * @param aCtx A ModelState pointer created with {@link DS_CreateModel}. + * + * @return Sample rate expected by the model for its input. + */ +DEEPSPEECH_EXPORT +int DS_GetModelSampleRate(const ModelState* aCtx); + +/** + * @brief Frees associated resources and destroys model object. + */ +DEEPSPEECH_EXPORT +void DS_FreeModel(ModelState* ctx); + +/** + * @brief Enable decoding using an external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param aScorerPath The path to the external scorer file. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_EnableExternalScorer(ModelState* aCtx, + const char* aScorerPath); + +/** + * @brief Add a hot-word and its boost. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param word The hot-word. + * @param boost The boost. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_AddHotWord(ModelState* aCtx, + const char* word, + float boost); + +/** + * @brief Remove entry for a hot-word from the hot-words map. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param word The hot-word. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_EraseHotWord(ModelState* aCtx, + const char* word); + +/** + * @brief Removes all elements from the hot-words map. + * + * @param aCtx The ModelState pointer for the model being changed. + * + * @return Zero on success, non-zero on failure (invalid arguments). + */ +DEEPSPEECH_EXPORT +int DS_ClearHotWords(ModelState* aCtx); + +/** + * @brief Disable decoding using an external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_DisableExternalScorer(ModelState* aCtx); + +/** + * @brief Set hyperparameters alpha and beta of the external scorer. + * + * @param aCtx The ModelState pointer for the model being changed. + * @param aAlpha The alpha hyperparameter of the decoder. Language model weight. + * @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight. + * + * @return Zero on success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_SetScorerAlphaBeta(ModelState* aCtx, + float aAlpha, + float aBeta); + +/** + * @brief Use the DeepSpeech model to convert speech to text. + * + * @param aCtx The ModelState pointer for the model to use. + * @param aBuffer A 16-bit, mono raw audio signal at the appropriate + * sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in the audio signal. + * + * @return The STT result. The user is responsible for freeing the string using + * {@link DS_FreeString()}. Returns NULL on error. + */ +DEEPSPEECH_EXPORT +char* DS_SpeechToText(ModelState* aCtx, + const short* aBuffer, + unsigned int aBufferSize); + +/** + * @brief Use the DeepSpeech model to convert speech to text and output results + * including metadata. + * + * @param aCtx The ModelState pointer for the model to use. + * @param aBuffer A 16-bit, mono raw audio signal at the appropriate + * sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in the audio signal. + * @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this. + * + * @return Metadata struct containing multiple CandidateTranscript structs. Each + * transcript has per-token metadata including timing information. The + * user is responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + */ +DEEPSPEECH_EXPORT +Metadata* DS_SpeechToTextWithMetadata(ModelState* aCtx, + const short* aBuffer, + unsigned int aBufferSize, + unsigned int aNumResults); + +/** + * @brief Create a new streaming inference state. The streaming state returned + * by this function can then be passed to {@link DS_FeedAudioContent()} + * and {@link DS_FinishStream()}. + * + * @param aCtx The ModelState pointer for the model to use. + * @param[out] retval an opaque pointer that represents the streaming state. Can + * be NULL if an error occurs. + * + * @return Zero for success, non-zero on failure. + */ +DEEPSPEECH_EXPORT +int DS_CreateStream(ModelState* aCtx, + StreamingState** retval); + +/** + * @brief Feed audio samples to an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aBuffer An array of 16-bit, mono raw audio samples at the + * appropriate sample rate (matching what the model was trained on). + * @param aBufferSize The number of samples in @p aBuffer. + */ +DEEPSPEECH_EXPORT +void DS_FeedAudioContent(StreamingState* aSctx, + const short* aBuffer, + unsigned int aBufferSize); + +/** + * @brief Compute the intermediate decoding of an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @return The STT intermediate result. The user is responsible for freeing the + * string using {@link DS_FreeString()}. + */ +DEEPSPEECH_EXPORT +char* DS_IntermediateDecode(const StreamingState* aSctx); + +/** + * @brief Compute the intermediate decoding of an ongoing streaming inference, + * return results including metadata. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aNumResults The number of candidate transcripts to return. + * + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + */ +DEEPSPEECH_EXPORT +Metadata* DS_IntermediateDecodeWithMetadata(const StreamingState* aSctx, + unsigned int aNumResults); + +/** + * @brief Compute the final decoding of an ongoing streaming inference and return + * the result. Signals the end of an ongoing streaming inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @return The STT result. The user is responsible for freeing the string using + * {@link DS_FreeString()}. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +char* DS_FinishStream(StreamingState* aSctx); + +/** + * @brief Compute the final decoding of an ongoing streaming inference and return + * results including metadata. Signals the end of an ongoing streaming + * inference. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * @param aNumResults The number of candidate transcripts to return. + * + * @return Metadata struct containing multiple candidate transcripts. Each transcript + * has per-token metadata including timing information. The user is + * responsible for freeing Metadata by calling {@link DS_FreeMetadata()}. + * Returns NULL on error. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +Metadata* DS_FinishStreamWithMetadata(StreamingState* aSctx, + unsigned int aNumResults); + +/** + * @brief Destroy a streaming state without decoding the computed logits. This + * can be used if you no longer need the result of an ongoing streaming + * inference and don't want to perform a costly decode operation. + * + * @param aSctx A streaming state pointer returned by {@link DS_CreateStream()}. + * + * @note This method will free the state pointer (@p aSctx). + */ +DEEPSPEECH_EXPORT +void DS_FreeStream(StreamingState* aSctx); + +/** + * @brief Free memory allocated for metadata information. + */ +DEEPSPEECH_EXPORT +void DS_FreeMetadata(Metadata* m); + +/** + * @brief Free a char* string returned by the DeepSpeech API. + */ +DEEPSPEECH_EXPORT +void DS_FreeString(char* str); + +/** + * @brief Returns the version of this library. The returned version is a semantic + * version (SemVer 2.0.0). The string returned must be freed with {@link DS_FreeString()}. + * + * @return The version string. + */ +DEEPSPEECH_EXPORT +char* DS_Version(); + +/** + * @brief Returns a textual description corresponding to an error code. + * The string returned must be freed with @{link DS_FreeString()}. + * + * @return The error description. + */ +DEEPSPEECH_EXPORT +char* DS_ErrorCodeToErrorMessage(int aErrorCode); + +#undef DEEPSPEECH_EXPORT + +#ifdef __cplusplus +} +#endif + +#endif /* DEEPSPEECH_H */ diff --git a/deepspeech_flutter/pubspec.lock b/deepspeech_flutter/pubspec.lock index 52acf8c..6d99094 100644 --- a/deepspeech_flutter/pubspec.lock +++ b/deepspeech_flutter/pubspec.lock @@ -1,6 +1,13 @@ # Generated by pub # See https://dart.dev/tools/pub/glossary#lockfile packages: + args: + dependency: transitive + description: + name: args + url: "https://pub.dartlang.org" + source: hosted + version: "2.3.1" async: dependency: transitive description: @@ -29,6 +36,13 @@ packages: url: "https://pub.dartlang.org" source: hosted version: "1.3.1" + cli_util: + dependency: transitive + description: + name: cli_util + url: "https://pub.dartlang.org" + source: hosted + version: "0.3.5" clock: dependency: transitive description: @@ -42,21 +56,35 @@ packages: name: collection url: "https://pub.dartlang.org" source: hosted - version: "1.15.0" + version: "1.16.0" fake_async: dependency: transitive description: name: fake_async url: "https://pub.dartlang.org" source: hosted - version: "1.2.0" + version: "1.3.0" ffi: dependency: "direct main" description: name: ffi url: "https://pub.dartlang.org" source: hosted - version: "1.1.2" + version: "2.0.1" + ffigen: + dependency: "direct dev" + description: + name: ffigen + url: "https://pub.dartlang.org" + source: hosted + version: "6.0.1" + file: + dependency: transitive + description: + name: file + url: "https://pub.dartlang.org" + source: hosted + version: "6.1.2" flutter: dependency: "direct main" description: flutter @@ -67,6 +95,20 @@ packages: description: flutter source: sdk version: "0.0.0" + glob: + dependency: transitive + description: + name: glob + url: "https://pub.dartlang.org" + source: hosted + version: "2.1.0" + logging: + dependency: transitive + description: + name: logging + url: "https://pub.dartlang.org" + source: hosted + version: "1.0.2" matcher: dependency: transitive description: @@ -80,7 +122,7 @@ packages: name: material_color_utilities url: "https://pub.dartlang.org" source: hosted - version: "0.1.3" + version: "0.1.4" meta: dependency: transitive description: @@ -94,7 +136,14 @@ packages: name: path url: "https://pub.dartlang.org" source: hosted - version: "1.8.0" + version: "1.8.1" + quiver: + dependency: transitive + description: + name: quiver + url: "https://pub.dartlang.org" + source: hosted + version: "3.1.0" sky_engine: dependency: transitive description: flutter @@ -106,7 +155,7 @@ packages: name: source_span url: "https://pub.dartlang.org" source: hosted - version: "1.8.1" + version: "1.8.2" stack_trace: dependency: transitive description: @@ -141,21 +190,21 @@ packages: name: test_api url: "https://pub.dartlang.org" source: hosted - version: "0.4.8" - typed_data: + version: "0.4.9" + vector_math: dependency: transitive description: - name: typed_data + name: vector_math url: "https://pub.dartlang.org" source: hosted - version: "1.3.0" - vector_math: + version: "2.1.2" + yaml: dependency: transitive description: - name: vector_math + name: yaml url: "https://pub.dartlang.org" source: hosted - version: "2.1.1" + version: "3.1.1" sdks: - dart: ">=2.14.0 <3.0.0" + dart: ">=2.17.0 <3.0.0" flutter: ">=1.20.0" diff --git a/deepspeech_flutter/pubspec.yaml b/deepspeech_flutter/pubspec.yaml index 174e395..cfb6ffa 100644 --- a/deepspeech_flutter/pubspec.yaml +++ b/deepspeech_flutter/pubspec.yaml @@ -11,12 +11,21 @@ environment: dependencies: flutter: sdk: flutter - ffi: ^1.1.2 + ffi: ^2.0.1 dev_dependencies: + ffigen: ^6.0.1 flutter_test: sdk: flutter +ffigen: + name: 'deepspech_FFI' + description: 'Dart bindings of the Mozilla Deepspeech library' + output: 'lib/deepspeech_flutter.dart' + headers: + entry-points: + - 'native/headers/deepspeech.h' + # For information on the generic Dart part of this file, see the # following page: https://dart.dev/tools/pub/pubspec @@ -58,9 +67,4 @@ flutter: # style: italic # - family: Trajan Pro # fonts: - # - asset: fonts/TrajanPro.ttf - # - asset: fonts/TrajanPro_Bold.ttf - # weight: 700 - # - # For details regarding fonts in packages, see - # https://flutter.dev/custom-fonts/#from-packages + # - asset \ No newline at end of file