enhance: use ios new speech api for transcribe

This commit is contained in:
Tienson Qin
2025-09-17 23:17:01 +08:00
parent 3d725195a4
commit 6b55a90ea1
9 changed files with 135 additions and 69 deletions

View File

@@ -14,6 +14,7 @@ dependencies {
implementation project(':capacitor-app')
implementation project(':capacitor-camera')
implementation project(':capacitor-clipboard')
implementation project(':capacitor-device')
implementation project(':capacitor-filesystem')
implementation project(':capacitor-haptics')
implementation project(':capacitor-keyboard')

View File

@@ -19,6 +19,10 @@
"pkg": "@capacitor/clipboard",
"classpath": "com.capacitorjs.plugins.clipboard.ClipboardPlugin"
},
{
"pkg": "@capacitor/device",
"classpath": "com.capacitorjs.plugins.device.DevicePlugin"
},
{
"pkg": "@capacitor/filesystem",
"classpath": "com.capacitorjs.plugins.filesystem.FilesystemPlugin"

View File

@@ -17,6 +17,9 @@ project(':capacitor-camera').projectDir = new File('../node_modules/@capacitor/c
include ':capacitor-clipboard'
project(':capacitor-clipboard').projectDir = new File('../node_modules/@capacitor/clipboard/android')
include ':capacitor-device'
project(':capacitor-device').projectDir = new File('../node_modules/@capacitor/device/android')
include ':capacitor-filesystem'
project(':capacitor-filesystem').projectDir = new File('../node_modules/@capacitor/filesystem/android')

View File

@@ -209,51 +209,71 @@ public class UILocalPlugin: CAPPlugin, CAPBridgedPlugin {
CAPPluginMethod(name: "transcribeAudio2Text", returnType: CAPPluginReturnPromise)
]
// TODO: switch to use https://developer.apple.com/documentation/speech/speechanalyzer for iOS 26+
//
private func recognizeSpeech(from url: URL, completion: @escaping (String?, Error?) -> Void) {
SFSpeechRecognizer.requestAuthorization { authStatus in
guard authStatus == .authorized else {
completion(nil, NSError(domain: "", code: -1, userInfo: [NSLocalizedDescriptionKey: "语音识别权限未授权"]))
return
func recognizeSpeech(from file: URL, locale: String, completion: @escaping (String?, Error?) -> Void) {
if #available(iOS 26.0, *) {
// Modern API: SpeechTranscriber + SpeechAnalyzer
Task {
do {
print("debug locale \(locale)")
// Step 1: pick supported locale
guard let supportedLocale = await SpeechTranscriber.supportedLocale(equivalentTo: Locale(identifier: locale)) else {
throw NSError(domain: "Speech", code: -1,
userInfo: [NSLocalizedDescriptionKey: "Unsupported locale"])
}
let recognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))
let request = SFSpeechURLRecognitionRequest(url: url)
// Step 2: transcriber with transcription preset
let transcriber = SpeechTranscriber(locale: supportedLocale, preset: .transcription)
// Setting up offline speech recognition
recognizer?.supportsOnDeviceRecognition = true
request.shouldReportPartialResults = false
request.requiresOnDeviceRecognition = true
request.taskHint = .dictation
if #available(iOS 16, *) {
request.addsPunctuation = true
// Ensure assets (downloads model if needed)
if let installRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
try await installRequest.downloadAndInstall()
}
recognizer?.recognitionTask(with: request) { result, error in
if let result = result {
let transcription = result.bestTranscription.formattedString
completion(transcription, nil)
} else if let error = error {
// Step 3: collect transcription results async
async let transcriptionFuture: String = try transcriber.results.reduce(into: "") { partial, result in
partial += String(result.text.characters) + " "
}
// Step 4: analyzer
let analyzer = SpeechAnalyzer(modules: [transcriber])
// Step 5/6: run analysis from file
let audioFile = try AVAudioFile(forReading: file)
if let lastSample = try await analyzer.analyzeSequence(from: audioFile) {
try await analyzer.finalizeAndFinish(through: lastSample)
} else {
try await analyzer.cancelAndFinishNow()
}
// Step 7/8: wait for transcription
let finalText = try await transcriptionFuture.trimmingCharacters(in: .whitespacesAndNewlines)
completion(finalText, nil)
} catch {
completion(nil, error)
}
}
}
}
@objc func transcribeAudio2Text(_ call: CAPPluginCall) {
self.call = call
// arrayBuffer
// audio arrayBuffer
guard let audioArray = call.getArray("audioData", NSNumber.self) as? [UInt8] else {
call.reject("无效的音频数据")
call.reject("invalid audioData")
return
}
guard let locale = call.getString("locale") else {
call.reject("invalid locale")
return
}
// Data
let audioData = Data(audioArray)
//
let fileURL = FileManager.default.temporaryDirectory.appendingPathComponent("recordedAudio.m4a")
do {
@@ -261,23 +281,21 @@ public class UILocalPlugin: CAPPlugin, CAPBridgedPlugin {
let fileExists = FileManager.default.fileExists(atPath: fileURL.path)
print("文件是否存在: \(fileExists), 路径: \(fileURL.path)")
print("file exists: \(fileExists), path: \(fileURL.path)")
if !fileExists {
call.reject("文件保存失败,文件不存在")
call.reject("file save failed: file doesn't exist")
return
}
//
self.recognizeSpeech(from: fileURL) { result, error in
self.recognizeSpeech(from: fileURL, locale: locale) { result, error in
if let result = result {
call.resolve(["transcription": result])
} else if let error = error {
call.reject("语音识别失败: \(error.localizedDescription)")
call.reject("failed to transcribe: \(error.localizedDescription)")
}
}
} catch {
call.reject("保存文件失败: \(error.localizedDescription)")
call.reject("failed to transcribe: \(error.localizedDescription)")
}
}

View File

@@ -16,6 +16,7 @@ def capacitor_pods
pod 'CapacitorApp', :path => '../../node_modules/@capacitor/app'
pod 'CapacitorCamera', :path => '../../node_modules/@capacitor/camera'
pod 'CapacitorClipboard', :path => '../../node_modules/@capacitor/clipboard'
pod 'CapacitorDevice', :path => '../../node_modules/@capacitor/device'
pod 'CapacitorFilesystem', :path => '../../node_modules/@capacitor/filesystem'
pod 'CapacitorHaptics', :path => '../../node_modules/@capacitor/haptics'
pod 'CapacitorKeyboard', :path => '../../node_modules/@capacitor/keyboard'

View File

@@ -12,6 +12,8 @@ PODS:
- CapacitorCommunitySafeArea (7.0.0-alpha.1):
- Capacitor
- CapacitorCordova (7.2.0)
- CapacitorDevice (7.0.2):
- Capacitor
- CapacitorFilesystem (7.0.1):
- Capacitor
- CapacitorHaptics (7.0.1):
@@ -39,6 +41,7 @@ DEPENDENCIES:
- "CapacitorClipboard (from `../../node_modules/@capacitor/clipboard`)"
- "CapacitorCommunitySafeArea (from `../../node_modules/@capacitor-community/safe-area`)"
- "CapacitorCordova (from `../../node_modules/@capacitor/ios`)"
- "CapacitorDevice (from `../../node_modules/@capacitor/device`)"
- "CapacitorFilesystem (from `../../node_modules/@capacitor/filesystem`)"
- "CapacitorHaptics (from `../../node_modules/@capacitor/haptics`)"
- "CapacitorKeyboard (from `../../node_modules/@capacitor/keyboard`)"
@@ -64,6 +67,8 @@ EXTERNAL SOURCES:
:path: "../../node_modules/@capacitor-community/safe-area"
CapacitorCordova:
:path: "../../node_modules/@capacitor/ios"
CapacitorDevice:
:path: "../../node_modules/@capacitor/device"
CapacitorFilesystem:
:path: "../../node_modules/@capacitor/filesystem"
CapacitorHaptics:
@@ -91,6 +96,7 @@ SPEC CHECKSUMS:
CapacitorClipboard: b98aead5dc7ec595547fc2c5d75bacd2ae3338bc
CapacitorCommunitySafeArea: cc370b4f8d4aa340e4616acef9b73eda41ba0914
CapacitorCordova: 5967b9ba03915ef1d585469d6e31f31dc49be96f
CapacitorDevice: a50a45f0d075e55e2392c7a4be5404d4f69515de
CapacitorFilesystem: 307f97c27a265edf8396a1c9c235592fd8572fe3
CapacitorHaptics: 70e47470fa1a6bd6338cd102552e3846b7f9a1b3
CapacitorKeyboard: 969647d0ca2e5c737d7300088e2517aa832434e2
@@ -101,6 +107,6 @@ SPEC CHECKSUMS:
JcesarmobileSslSkip: b0f921e9d397a57f7983731209ca1ee244119c1f
SendIntent: 1f4f65c7103eb423067c566682dfcda973b5fb29
PODFILE CHECKSUM: d1ad773ee5fbd3415c2d78d69f4396a1dc68bed9
PODFILE CHECKSUM: cb9c70caa3eda97256a3dae9041478673def76cd
COCOAPODS: 1.16.2

View File

@@ -115,6 +115,7 @@
"@capacitor/camera": "7.0.1",
"@capacitor/clipboard": "7.0.1",
"@capacitor/core": "7.2.0",
"@capacitor/device": "^7.0.2",
"@capacitor/filesystem": "7.0.1",
"@capacitor/haptics": "7.0.1",
"@capacitor/ios": "7.2.0",

View File

@@ -1,6 +1,7 @@
(ns mobile.components.recorder
"Audio record"
(:require [cljs-time.core :as t]
(:require ["@capacitor/device" :refer [Device]]
[cljs-time.core :as t]
[clojure.string :as string]
[frontend.date :as date]
[frontend.db.model :as db-model]
@@ -28,8 +29,20 @@
(str (.padStart (str minutes) 2 "0") ":"
(.padStart (str seconds) 2 "0"))))
(defn- get-locale
[]
(->
(p/let [^js lang (.getLanguageTag ^js Device)
value (.-value lang)]
(if (= value "en_CN")
"zh"
(string/replace value "-" "_")))
(p/catch (fn [e]
(js/console.error e)
"en_US"))))
(defn save-asset-audio!
[blob]
[blob locale]
(let [ext (some-> blob
(.-type)
(string/split ";")
@@ -53,7 +66,8 @@
(when asset-entity
(p/let [buffer-data (.arrayBuffer blob)
unit8-data (js/Uint8Array. buffer-data)]
(-> (.transcribeAudio2Text mobile-util/ui-local #js {:audioData (js/Array.from unit8-data)})
(-> (.transcribeAudio2Text mobile-util/ui-local #js {:audioData (js/Array.from unit8-data)
:locale locale})
(p/then (fn [^js r]
(let [content (.-transcription r)]
(when-not (string/blank? content)
@@ -93,7 +107,7 @@
;; events
(doto r
(.on "record-end" (fn [^js blob]
(save-asset-audio! blob)
(save-asset-audio! blob "en_US")
(mobile-state/close-popup!)))
(.on "record-progress" (gfun/throttle
(fn [time]
@@ -120,31 +134,44 @@
(rum/defc record-button-2
[]
(let [[locale set-locale!] (hooks/use-state nil)
[*locale] (hooks/use-state (atom nil))]
(hooks/use-effect!
(fn []
(p/let [locale (get-locale)]
(set-locale! locale)
(reset! *locale locale)
(record/start
{:on-record-end (fn [^js blob]
(save-asset-audio! blob)
(save-asset-audio! blob @*locale)
(mobile-state/close-popup!))})
(record/attach-visualizer!
(js/document.getElementById "wave-canvas")
{:mode :rolling
:fps 30
:fft-size 2048
:smoothing 0.8})
:smoothing 0.8}))
#(record/destroy!))
[])
[:div.p-6.flex.justify-between
[:div.flex.justify-between.items-center.w-full
;; [:span.flex.flex-col.timer-wrap
;; [:strong.timer {:ref *timer-ref} "00:00"]
;; [:strong.timer "00:00"]
;; [:small "05:00"]]
(shui/button {:variant :outline
:class "record-ctrl-btn rounded-full recording"
:on-click (fn []
(record/stop))}
(shui/tabler-icon "player-stop" {:size 22}))]])
(shui/tabler-icon "player-stop" {:size 22}))
(when locale
(when-not (string/starts-with? locale "en_")
(shui/button {:variant :outline
:on-click (fn []
(reset! *locale "en_US")
(set-locale! "en_US"))}
"English transcribe")))]]))
(rum/defc audio-recorder-aux < rum/static
[]
@@ -158,7 +185,7 @@
[:div.wave.border.rounded
[:canvas#wave-canvas
{:height 200
:width 400}]]]
:width 320}]]]
;; (record-button)
(record-button-2)])

View File

@@ -291,6 +291,11 @@
dependencies:
tslib "^2.1.0"
"@capacitor/device@^7.0.2":
version "7.0.2"
resolved "https://registry.yarnpkg.com/@capacitor/device/-/device-7.0.2.tgz#406bde129d3fcf55f0de0b691509535e2a00e315"
integrity sha512-OMGMBjLbh7ApaqW1oOJIV73iyrFK/T5v2MzuQYq3GLT+jnGvCuj/y82Ofq2Fz9/hlJ2fukztPwG1K80jyk0i6w==
"@capacitor/filesystem@7.0.1":
version "7.0.1"
resolved "https://registry.yarnpkg.com/@capacitor/filesystem/-/filesystem-7.0.1.tgz#b0518d781f7640e936f529b80a59724e221d0471"