mirror of
https://github.com/logseq/logseq.git
synced 2026-05-01 09:26:28 +00:00
enhance: use ios new speech api for transcribe
This commit is contained in:
@@ -14,6 +14,7 @@ dependencies {
|
||||
implementation project(':capacitor-app')
|
||||
implementation project(':capacitor-camera')
|
||||
implementation project(':capacitor-clipboard')
|
||||
implementation project(':capacitor-device')
|
||||
implementation project(':capacitor-filesystem')
|
||||
implementation project(':capacitor-haptics')
|
||||
implementation project(':capacitor-keyboard')
|
||||
|
||||
@@ -19,6 +19,10 @@
|
||||
"pkg": "@capacitor/clipboard",
|
||||
"classpath": "com.capacitorjs.plugins.clipboard.ClipboardPlugin"
|
||||
},
|
||||
{
|
||||
"pkg": "@capacitor/device",
|
||||
"classpath": "com.capacitorjs.plugins.device.DevicePlugin"
|
||||
},
|
||||
{
|
||||
"pkg": "@capacitor/filesystem",
|
||||
"classpath": "com.capacitorjs.plugins.filesystem.FilesystemPlugin"
|
||||
|
||||
@@ -17,6 +17,9 @@ project(':capacitor-camera').projectDir = new File('../node_modules/@capacitor/c
|
||||
include ':capacitor-clipboard'
|
||||
project(':capacitor-clipboard').projectDir = new File('../node_modules/@capacitor/clipboard/android')
|
||||
|
||||
include ':capacitor-device'
|
||||
project(':capacitor-device').projectDir = new File('../node_modules/@capacitor/device/android')
|
||||
|
||||
include ':capacitor-filesystem'
|
||||
project(':capacitor-filesystem').projectDir = new File('../node_modules/@capacitor/filesystem/android')
|
||||
|
||||
|
||||
@@ -209,51 +209,71 @@ public class UILocalPlugin: CAPPlugin, CAPBridgedPlugin {
|
||||
CAPPluginMethod(name: "transcribeAudio2Text", returnType: CAPPluginReturnPromise)
|
||||
]
|
||||
|
||||
// TODO: switch to use https://developer.apple.com/documentation/speech/speechanalyzer for iOS 26+
|
||||
// 语音识别方法
|
||||
private func recognizeSpeech(from url: URL, completion: @escaping (String?, Error?) -> Void) {
|
||||
SFSpeechRecognizer.requestAuthorization { authStatus in
|
||||
guard authStatus == .authorized else {
|
||||
completion(nil, NSError(domain: "", code: -1, userInfo: [NSLocalizedDescriptionKey: "语音识别权限未授权"]))
|
||||
return
|
||||
func recognizeSpeech(from file: URL, locale: String, completion: @escaping (String?, Error?) -> Void) {
|
||||
if #available(iOS 26.0, *) {
|
||||
// Modern API: SpeechTranscriber + SpeechAnalyzer
|
||||
Task {
|
||||
do {
|
||||
print("debug locale \(locale)")
|
||||
|
||||
// Step 1: pick supported locale
|
||||
guard let supportedLocale = await SpeechTranscriber.supportedLocale(equivalentTo: Locale(identifier: locale)) else {
|
||||
throw NSError(domain: "Speech", code: -1,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Unsupported locale"])
|
||||
}
|
||||
|
||||
let recognizer = SFSpeechRecognizer(locale: Locale(identifier: "en-US"))
|
||||
let request = SFSpeechURLRecognitionRequest(url: url)
|
||||
// Step 2: transcriber with transcription preset
|
||||
let transcriber = SpeechTranscriber(locale: supportedLocale, preset: .transcription)
|
||||
|
||||
// Setting up offline speech recognition
|
||||
recognizer?.supportsOnDeviceRecognition = true
|
||||
request.shouldReportPartialResults = false
|
||||
request.requiresOnDeviceRecognition = true
|
||||
request.taskHint = .dictation
|
||||
if #available(iOS 16, *) {
|
||||
request.addsPunctuation = true
|
||||
// Ensure assets (downloads model if needed)
|
||||
if let installRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
|
||||
try await installRequest.downloadAndInstall()
|
||||
}
|
||||
|
||||
recognizer?.recognitionTask(with: request) { result, error in
|
||||
if let result = result {
|
||||
let transcription = result.bestTranscription.formattedString
|
||||
completion(transcription, nil)
|
||||
} else if let error = error {
|
||||
// Step 3: collect transcription results async
|
||||
async let transcriptionFuture: String = try transcriber.results.reduce(into: "") { partial, result in
|
||||
partial += String(result.text.characters) + " "
|
||||
}
|
||||
|
||||
// Step 4: analyzer
|
||||
let analyzer = SpeechAnalyzer(modules: [transcriber])
|
||||
|
||||
// Step 5/6: run analysis from file
|
||||
let audioFile = try AVAudioFile(forReading: file)
|
||||
if let lastSample = try await analyzer.analyzeSequence(from: audioFile) {
|
||||
try await analyzer.finalizeAndFinish(through: lastSample)
|
||||
} else {
|
||||
try await analyzer.cancelAndFinishNow()
|
||||
}
|
||||
|
||||
// Step 7/8: wait for transcription
|
||||
let finalText = try await transcriptionFuture.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
completion(finalText, nil)
|
||||
|
||||
} catch {
|
||||
completion(nil, error)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@objc func transcribeAudio2Text(_ call: CAPPluginCall) {
|
||||
self.call = call
|
||||
|
||||
// 接收音频数据 arrayBuffer
|
||||
// audio arrayBuffer
|
||||
guard let audioArray = call.getArray("audioData", NSNumber.self) as? [UInt8] else {
|
||||
call.reject("无效的音频数据")
|
||||
call.reject("invalid audioData")
|
||||
return
|
||||
}
|
||||
|
||||
guard let locale = call.getString("locale") else {
|
||||
call.reject("invalid locale")
|
||||
return
|
||||
}
|
||||
|
||||
// 将数组转换为 Data
|
||||
let audioData = Data(audioArray)
|
||||
|
||||
// 保存为本地文件
|
||||
let fileURL = FileManager.default.temporaryDirectory.appendingPathComponent("recordedAudio.m4a")
|
||||
|
||||
do {
|
||||
@@ -261,23 +281,21 @@ public class UILocalPlugin: CAPPlugin, CAPBridgedPlugin {
|
||||
|
||||
let fileExists = FileManager.default.fileExists(atPath: fileURL.path)
|
||||
|
||||
print("文件是否存在: \(fileExists), 路径: \(fileURL.path)")
|
||||
print("file exists: \(fileExists), path: \(fileURL.path)")
|
||||
if !fileExists {
|
||||
call.reject("文件保存失败,文件不存在")
|
||||
call.reject("file save failed: file doesn't exist")
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// 调用语音识别
|
||||
self.recognizeSpeech(from: fileURL) { result, error in
|
||||
self.recognizeSpeech(from: fileURL, locale: locale) { result, error in
|
||||
if let result = result {
|
||||
call.resolve(["transcription": result])
|
||||
} else if let error = error {
|
||||
call.reject("语音识别失败: \(error.localizedDescription)")
|
||||
call.reject("failed to transcribe: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
call.reject("保存文件失败: \(error.localizedDescription)")
|
||||
call.reject("failed to transcribe: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ def capacitor_pods
|
||||
pod 'CapacitorApp', :path => '../../node_modules/@capacitor/app'
|
||||
pod 'CapacitorCamera', :path => '../../node_modules/@capacitor/camera'
|
||||
pod 'CapacitorClipboard', :path => '../../node_modules/@capacitor/clipboard'
|
||||
pod 'CapacitorDevice', :path => '../../node_modules/@capacitor/device'
|
||||
pod 'CapacitorFilesystem', :path => '../../node_modules/@capacitor/filesystem'
|
||||
pod 'CapacitorHaptics', :path => '../../node_modules/@capacitor/haptics'
|
||||
pod 'CapacitorKeyboard', :path => '../../node_modules/@capacitor/keyboard'
|
||||
|
||||
@@ -12,6 +12,8 @@ PODS:
|
||||
- CapacitorCommunitySafeArea (7.0.0-alpha.1):
|
||||
- Capacitor
|
||||
- CapacitorCordova (7.2.0)
|
||||
- CapacitorDevice (7.0.2):
|
||||
- Capacitor
|
||||
- CapacitorFilesystem (7.0.1):
|
||||
- Capacitor
|
||||
- CapacitorHaptics (7.0.1):
|
||||
@@ -39,6 +41,7 @@ DEPENDENCIES:
|
||||
- "CapacitorClipboard (from `../../node_modules/@capacitor/clipboard`)"
|
||||
- "CapacitorCommunitySafeArea (from `../../node_modules/@capacitor-community/safe-area`)"
|
||||
- "CapacitorCordova (from `../../node_modules/@capacitor/ios`)"
|
||||
- "CapacitorDevice (from `../../node_modules/@capacitor/device`)"
|
||||
- "CapacitorFilesystem (from `../../node_modules/@capacitor/filesystem`)"
|
||||
- "CapacitorHaptics (from `../../node_modules/@capacitor/haptics`)"
|
||||
- "CapacitorKeyboard (from `../../node_modules/@capacitor/keyboard`)"
|
||||
@@ -64,6 +67,8 @@ EXTERNAL SOURCES:
|
||||
:path: "../../node_modules/@capacitor-community/safe-area"
|
||||
CapacitorCordova:
|
||||
:path: "../../node_modules/@capacitor/ios"
|
||||
CapacitorDevice:
|
||||
:path: "../../node_modules/@capacitor/device"
|
||||
CapacitorFilesystem:
|
||||
:path: "../../node_modules/@capacitor/filesystem"
|
||||
CapacitorHaptics:
|
||||
@@ -91,6 +96,7 @@ SPEC CHECKSUMS:
|
||||
CapacitorClipboard: b98aead5dc7ec595547fc2c5d75bacd2ae3338bc
|
||||
CapacitorCommunitySafeArea: cc370b4f8d4aa340e4616acef9b73eda41ba0914
|
||||
CapacitorCordova: 5967b9ba03915ef1d585469d6e31f31dc49be96f
|
||||
CapacitorDevice: a50a45f0d075e55e2392c7a4be5404d4f69515de
|
||||
CapacitorFilesystem: 307f97c27a265edf8396a1c9c235592fd8572fe3
|
||||
CapacitorHaptics: 70e47470fa1a6bd6338cd102552e3846b7f9a1b3
|
||||
CapacitorKeyboard: 969647d0ca2e5c737d7300088e2517aa832434e2
|
||||
@@ -101,6 +107,6 @@ SPEC CHECKSUMS:
|
||||
JcesarmobileSslSkip: b0f921e9d397a57f7983731209ca1ee244119c1f
|
||||
SendIntent: 1f4f65c7103eb423067c566682dfcda973b5fb29
|
||||
|
||||
PODFILE CHECKSUM: d1ad773ee5fbd3415c2d78d69f4396a1dc68bed9
|
||||
PODFILE CHECKSUM: cb9c70caa3eda97256a3dae9041478673def76cd
|
||||
|
||||
COCOAPODS: 1.16.2
|
||||
|
||||
@@ -115,6 +115,7 @@
|
||||
"@capacitor/camera": "7.0.1",
|
||||
"@capacitor/clipboard": "7.0.1",
|
||||
"@capacitor/core": "7.2.0",
|
||||
"@capacitor/device": "^7.0.2",
|
||||
"@capacitor/filesystem": "7.0.1",
|
||||
"@capacitor/haptics": "7.0.1",
|
||||
"@capacitor/ios": "7.2.0",
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
(ns mobile.components.recorder
|
||||
"Audio record"
|
||||
(:require [cljs-time.core :as t]
|
||||
(:require ["@capacitor/device" :refer [Device]]
|
||||
[cljs-time.core :as t]
|
||||
[clojure.string :as string]
|
||||
[frontend.date :as date]
|
||||
[frontend.db.model :as db-model]
|
||||
@@ -28,8 +29,20 @@
|
||||
(str (.padStart (str minutes) 2 "0") ":"
|
||||
(.padStart (str seconds) 2 "0"))))
|
||||
|
||||
(defn- get-locale
|
||||
[]
|
||||
(->
|
||||
(p/let [^js lang (.getLanguageTag ^js Device)
|
||||
value (.-value lang)]
|
||||
(if (= value "en_CN")
|
||||
"zh"
|
||||
(string/replace value "-" "_")))
|
||||
(p/catch (fn [e]
|
||||
(js/console.error e)
|
||||
"en_US"))))
|
||||
|
||||
(defn save-asset-audio!
|
||||
[blob]
|
||||
[blob locale]
|
||||
(let [ext (some-> blob
|
||||
(.-type)
|
||||
(string/split ";")
|
||||
@@ -53,7 +66,8 @@
|
||||
(when asset-entity
|
||||
(p/let [buffer-data (.arrayBuffer blob)
|
||||
unit8-data (js/Uint8Array. buffer-data)]
|
||||
(-> (.transcribeAudio2Text mobile-util/ui-local #js {:audioData (js/Array.from unit8-data)})
|
||||
(-> (.transcribeAudio2Text mobile-util/ui-local #js {:audioData (js/Array.from unit8-data)
|
||||
:locale locale})
|
||||
(p/then (fn [^js r]
|
||||
(let [content (.-transcription r)]
|
||||
(when-not (string/blank? content)
|
||||
@@ -93,7 +107,7 @@
|
||||
;; events
|
||||
(doto r
|
||||
(.on "record-end" (fn [^js blob]
|
||||
(save-asset-audio! blob)
|
||||
(save-asset-audio! blob "en_US")
|
||||
(mobile-state/close-popup!)))
|
||||
(.on "record-progress" (gfun/throttle
|
||||
(fn [time]
|
||||
@@ -120,31 +134,44 @@
|
||||
|
||||
(rum/defc record-button-2
|
||||
[]
|
||||
(let [[locale set-locale!] (hooks/use-state nil)
|
||||
[*locale] (hooks/use-state (atom nil))]
|
||||
(hooks/use-effect!
|
||||
(fn []
|
||||
(p/let [locale (get-locale)]
|
||||
(set-locale! locale)
|
||||
(reset! *locale locale)
|
||||
(record/start
|
||||
{:on-record-end (fn [^js blob]
|
||||
(save-asset-audio! blob)
|
||||
(save-asset-audio! blob @*locale)
|
||||
(mobile-state/close-popup!))})
|
||||
(record/attach-visualizer!
|
||||
(js/document.getElementById "wave-canvas")
|
||||
{:mode :rolling
|
||||
:fps 30
|
||||
:fft-size 2048
|
||||
:smoothing 0.8})
|
||||
:smoothing 0.8}))
|
||||
|
||||
#(record/destroy!))
|
||||
[])
|
||||
[:div.p-6.flex.justify-between
|
||||
[:div.flex.justify-between.items-center.w-full
|
||||
;; [:span.flex.flex-col.timer-wrap
|
||||
;; [:strong.timer {:ref *timer-ref} "00:00"]
|
||||
;; [:strong.timer "00:00"]
|
||||
;; [:small "05:00"]]
|
||||
(shui/button {:variant :outline
|
||||
:class "record-ctrl-btn rounded-full recording"
|
||||
:on-click (fn []
|
||||
(record/stop))}
|
||||
(shui/tabler-icon "player-stop" {:size 22}))]])
|
||||
(shui/tabler-icon "player-stop" {:size 22}))
|
||||
|
||||
(when locale
|
||||
(when-not (string/starts-with? locale "en_")
|
||||
(shui/button {:variant :outline
|
||||
:on-click (fn []
|
||||
(reset! *locale "en_US")
|
||||
(set-locale! "en_US"))}
|
||||
"English transcribe")))]]))
|
||||
|
||||
(rum/defc audio-recorder-aux < rum/static
|
||||
[]
|
||||
@@ -158,7 +185,7 @@
|
||||
[:div.wave.border.rounded
|
||||
[:canvas#wave-canvas
|
||||
{:height 200
|
||||
:width 400}]]]
|
||||
:width 320}]]]
|
||||
|
||||
;; (record-button)
|
||||
(record-button-2)])
|
||||
|
||||
@@ -291,6 +291,11 @@
|
||||
dependencies:
|
||||
tslib "^2.1.0"
|
||||
|
||||
"@capacitor/device@^7.0.2":
|
||||
version "7.0.2"
|
||||
resolved "https://registry.yarnpkg.com/@capacitor/device/-/device-7.0.2.tgz#406bde129d3fcf55f0de0b691509535e2a00e315"
|
||||
integrity sha512-OMGMBjLbh7ApaqW1oOJIV73iyrFK/T5v2MzuQYq3GLT+jnGvCuj/y82Ofq2Fz9/hlJ2fukztPwG1K80jyk0i6w==
|
||||
|
||||
"@capacitor/filesystem@7.0.1":
|
||||
version "7.0.1"
|
||||
resolved "https://registry.yarnpkg.com/@capacitor/filesystem/-/filesystem-7.0.1.tgz#b0518d781f7640e936f529b80a59724e221d0471"
|
||||
|
||||
Reference in New Issue
Block a user