mirror of
https://github.com/logseq/logseq.git
synced 2026-04-25 06:35:02 +00:00
enhance(mobile): auto-detect audio language
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
import Capacitor
|
||||
import Foundation
|
||||
import Speech
|
||||
import NaturalLanguage
|
||||
|
||||
func isDarkMode() -> Bool {
|
||||
if #available(iOS 12.0, *) {
|
||||
@@ -209,55 +210,185 @@ public class UILocalPlugin: CAPPlugin, CAPBridgedPlugin {
|
||||
CAPPluginMethod(name: "transcribeAudio2Text", returnType: CAPPluginReturnPromise)
|
||||
]
|
||||
|
||||
func recognizeSpeech(from file: URL, locale: String, completion: @escaping (String?, Error?) -> Void) {
|
||||
if #available(iOS 26.0, *) {
|
||||
// Modern API: SpeechTranscriber + SpeechAnalyzer
|
||||
@available(iOS 26.0, *)
|
||||
func recognizeWithAutoLocale(from file: URL,
|
||||
completion: @escaping (String?, Error?) -> Void) {
|
||||
Task {
|
||||
do {
|
||||
print("debug locale \(locale)")
|
||||
|
||||
// Step 1: pick supported locale
|
||||
guard let supportedLocale = await SpeechTranscriber.supportedLocale(equivalentTo: Locale(identifier: locale)) else {
|
||||
throw NSError(domain: "Speech", code: -1,
|
||||
userInfo: [NSLocalizedDescriptionKey: "Unsupported locale"])
|
||||
// ---------- STEP 1: Gather candidate locales ----------
|
||||
let preferred = Array(Locale.preferredLanguages.prefix(3))
|
||||
var candidateIDs = preferred
|
||||
if !candidateIDs.contains(where: { $0.hasPrefix("en") }) {
|
||||
candidateIDs.append("en-US")
|
||||
}
|
||||
if !candidateIDs.contains(where: { $0.hasPrefix("zh") }) {
|
||||
candidateIDs.append("zh-CN")
|
||||
}
|
||||
|
||||
// Step 2: transcriber with transcription preset
|
||||
let transcriber = SpeechTranscriber(locale: supportedLocale, preset: .transcription)
|
||||
// ---------- STEP 2: Probe candidates in parallel ----------
|
||||
var results: [(Locale, String)] = []
|
||||
|
||||
// Ensure assets (downloads model if needed)
|
||||
if let installRequest = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
|
||||
try await installRequest.downloadAndInstall()
|
||||
await withTaskGroup(of: (Locale, String).self) { group in
|
||||
for id in candidateIDs {
|
||||
let candidate = Locale(identifier: id)
|
||||
if let supported = await SpeechTranscriber.supportedLocale(equivalentTo: candidate) {
|
||||
group.addTask {
|
||||
let text = (try? await self.quickSampleTranscription(file: file, locale: supported)) ?? ""
|
||||
return (supported, text)
|
||||
}
|
||||
}
|
||||
}
|
||||
for await (locale, text) in group {
|
||||
results.append((locale, text))
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: collect transcription results async
|
||||
async let transcriptionFuture: String = try transcriber.results.reduce(into: "") { partial, result in
|
||||
partial += String(result.text.characters) + " "
|
||||
// ---------- STEP 3: Score results ----------
|
||||
var bestLocale: Locale = Locale(identifier: "en-US")
|
||||
var bestScore = Int.min
|
||||
for (locale, text) in results {
|
||||
let score = scoreTranscript(text, locale: locale)
|
||||
print("📊 Candidate: \(locale.identifier), score: \(score), text: \(text)")
|
||||
if score > bestScore {
|
||||
bestScore = score
|
||||
bestLocale = locale
|
||||
}
|
||||
}
|
||||
|
||||
print("🎙 Running full transcription with locale: \(bestLocale.identifier)")
|
||||
|
||||
// ---------- STEP 4: Full transcription ----------
|
||||
let transcriber = SpeechTranscriber(locale: bestLocale, preset: .transcription)
|
||||
|
||||
if let req = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
|
||||
try await req.downloadAndInstall()
|
||||
print("✅ Model installed for \(bestLocale.identifier)")
|
||||
}
|
||||
|
||||
let collectFullTask = Task { () -> String in
|
||||
var full = ""
|
||||
do {
|
||||
for try await r in transcriber.results {
|
||||
full += String(r.text.characters) + " "
|
||||
}
|
||||
} catch {}
|
||||
return full
|
||||
}
|
||||
|
||||
// Step 4: analyzer
|
||||
let analyzer = SpeechAnalyzer(modules: [transcriber])
|
||||
|
||||
// Step 5/6: run analysis from file
|
||||
let audioFile = try AVAudioFile(forReading: file)
|
||||
if let lastSample = try await analyzer.analyzeSequence(from: audioFile) {
|
||||
try await analyzer.finalizeAndFinish(through: lastSample)
|
||||
let audio = try AVAudioFile(forReading: file)
|
||||
if let last = try await analyzer.analyzeSequence(from: audio) {
|
||||
try await analyzer.finalizeAndFinish(through: last)
|
||||
} else {
|
||||
try await analyzer.cancelAndFinishNow()
|
||||
}
|
||||
|
||||
// Step 7/8: wait for transcription
|
||||
let finalText = try await transcriptionFuture.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
completion(finalText, nil)
|
||||
let finalText = (await collectFullTask.value)
|
||||
.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
completion(finalText.isEmpty ? nil : finalText, nil)
|
||||
|
||||
} catch {
|
||||
completion(nil, error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@available(iOS 26.0, *)
|
||||
private func quickSampleTranscription(file: URL, locale: Locale) async throws -> String {
|
||||
let transcriber = SpeechTranscriber(locale: locale, preset: .transcription)
|
||||
|
||||
// Install models if needed (you could cache this across runs)
|
||||
if let req = try await AssetInventory.assetInstallationRequest(supporting: [transcriber]) {
|
||||
try await req.downloadAndInstall()
|
||||
}
|
||||
|
||||
var sample = ""
|
||||
var count = 0
|
||||
let analyzer = SpeechAnalyzer(modules: [transcriber])
|
||||
|
||||
let collectTask = Task { () -> String in
|
||||
do {
|
||||
for try await r in transcriber.results {
|
||||
sample += String(r.text.characters) + " "
|
||||
count += 1
|
||||
if count >= 3 {
|
||||
// ✅ Early exit: stop once we have enough
|
||||
try? await analyzer.cancelAndFinishNow()
|
||||
break
|
||||
}
|
||||
}
|
||||
} catch {}
|
||||
return sample
|
||||
}
|
||||
|
||||
let audioFile = try AVAudioFile(forReading: file)
|
||||
if let last = try await analyzer.analyzeSequence(from: audioFile) {
|
||||
try await analyzer.finalizeAndFinish(through: last)
|
||||
} else {
|
||||
try await analyzer.cancelAndFinishNow()
|
||||
}
|
||||
|
||||
return await collectTask.value.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
}
|
||||
|
||||
private func scoreTranscript(_ text: String, locale: Locale) -> Int {
|
||||
// Normalize: keep only letters/digits/scripts (ignore punctuation)
|
||||
let normalized = text.unicodeScalars.filter {
|
||||
CharacterSet.letters.contains($0) ||
|
||||
CharacterSet.decimalDigits.contains($0) ||
|
||||
CharacterSet(charactersIn: "\u{4E00}"..."\u{9FFF}").contains($0) || // Han
|
||||
CharacterSet(charactersIn: "\u{3040}"..."\u{30FF}").contains($0) || // Kana
|
||||
CharacterSet(charactersIn: "\u{AC00}"..."\u{D7AF}").contains($0) || // Hangul
|
||||
CharacterSet(charactersIn: "\u{0400}"..."\u{04FF}").contains($0) || // Cyrillic
|
||||
CharacterSet(charactersIn: "\u{0600}"..."\u{06FF}").contains($0) || // Arabic
|
||||
CharacterSet(charactersIn: "\u{0590}"..."\u{05FF}").contains($0) || // Hebrew
|
||||
CharacterSet(charactersIn: "\u{0900}"..."\u{097F}").contains($0) // Devanagari
|
||||
}
|
||||
let coreText = String(String.UnicodeScalarView(normalized))
|
||||
var score = coreText.count
|
||||
|
||||
// Detect script presence
|
||||
let hasHan = coreText.range(of: #"\p{Han}"#, options: .regularExpression) != nil
|
||||
let hasKana = coreText.range(of: #"\u3040-\u30FF"#, options: .regularExpression) != nil
|
||||
let hasHangul = coreText.range(of: #"\uAC00-\uD7AF"#, options: .regularExpression) != nil
|
||||
let hasCyrillic = coreText.range(of: #"\u0400-\u04FF"#, options: .regularExpression) != nil
|
||||
let hasArabic = coreText.range(of: #"\u0600-\u06FF"#, options: .regularExpression) != nil
|
||||
let hasHebrew = coreText.range(of: #"\u0590-\u05FF"#, options: .regularExpression) != nil
|
||||
let hasDevanag = coreText.range(of: #"\u0900-\u097F"#, options: .regularExpression) != nil
|
||||
|
||||
// Latin ratio detection
|
||||
let latinLetters = coreText.filter { $0.isASCII && $0.isLetter }.count
|
||||
let latinRatio = coreText.isEmpty ? 0.0 : Double(latinLetters) / Double(coreText.count)
|
||||
|
||||
if latinRatio > 0.7 {
|
||||
if locale.identifier.hasPrefix("en") {
|
||||
score += 500
|
||||
} else if locale.identifier.hasPrefix("zh")
|
||||
|| locale.identifier.hasPrefix("ja")
|
||||
|| locale.identifier.hasPrefix("ko") {
|
||||
score -= 500
|
||||
}
|
||||
}
|
||||
|
||||
if hasHan { score += locale.identifier.hasPrefix("zh") || locale.identifier.hasPrefix("ja") ? 1000 : -500 }
|
||||
if hasKana { score += locale.identifier.hasPrefix("ja") ? 1000 : -500 }
|
||||
if hasHangul { score += locale.identifier.hasPrefix("ko") ? 1000 : -500 }
|
||||
if hasCyrillic { score += locale.identifier.hasPrefix("ru") ? 1000 : -500 }
|
||||
if hasArabic { score += locale.identifier.hasPrefix("ar") ? 1000 : -500 }
|
||||
if hasHebrew { score += locale.identifier.hasPrefix("he") ? 1000 : -500 }
|
||||
if hasDevanag { score += locale.identifier.hasPrefix("hi") ? 1000 : -500 }
|
||||
|
||||
// Bias toward user-preferred languages
|
||||
if Locale.preferredLanguages.contains(where: { locale.identifier.hasPrefix($0.prefix(2)) }) {
|
||||
score += 200
|
||||
}
|
||||
|
||||
return score
|
||||
}
|
||||
|
||||
|
||||
@available(iOS 26.0, *)
|
||||
@objc func transcribeAudio2Text(_ call: CAPPluginCall) {
|
||||
self.call = call
|
||||
|
||||
@@ -267,11 +398,6 @@ public class UILocalPlugin: CAPPlugin, CAPBridgedPlugin {
|
||||
return
|
||||
}
|
||||
|
||||
guard let locale = call.getString("locale") else {
|
||||
call.reject("invalid locale")
|
||||
return
|
||||
}
|
||||
|
||||
let audioData = Data(audioArray)
|
||||
|
||||
let fileURL = FileManager.default.temporaryDirectory.appendingPathComponent("recordedAudio.m4a")
|
||||
@@ -287,7 +413,7 @@ public class UILocalPlugin: CAPPlugin, CAPBridgedPlugin {
|
||||
return
|
||||
}
|
||||
|
||||
self.recognizeSpeech(from: fileURL, locale: locale) { result, error in
|
||||
self.recognizeWithAutoLocale(from: fileURL) { result, error in
|
||||
if let result = result {
|
||||
call.resolve(["transcription": result])
|
||||
} else if let error = error {
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
[logseq.common.util :as common-util]
|
||||
[promesa.core :as p])))
|
||||
|
||||
(comment
|
||||
#?(:cljs
|
||||
(defn throw-err
|
||||
[v]
|
||||
@@ -14,7 +13,7 @@
|
||||
|
||||
(defmacro <?
|
||||
[port]
|
||||
`(throw-err (cljs.core.async/<! ~port))))
|
||||
`(throw-err (cljs.core.async/<! ~port)))
|
||||
|
||||
#?(:cljs
|
||||
(defn c->p
|
||||
|
||||
@@ -1,16 +1,25 @@
|
||||
(ns frontend.db.transact
|
||||
"Provides async transact for use with ldb/transact!"
|
||||
(:require [frontend.state :as state]
|
||||
(:require [clojure.core.async :as async]
|
||||
[clojure.core.async.interop :refer [p->c]]
|
||||
[frontend.common.async-util :include-macros true :refer [<?]]
|
||||
[frontend.state :as state]
|
||||
[frontend.util :as util]
|
||||
[lambdaisland.glogi :as log]
|
||||
[logseq.outliner.op :as outliner-op]
|
||||
[promesa.core :as p]))
|
||||
|
||||
(defn worker-call
|
||||
[request-f]
|
||||
(p/let [result (request-f)]
|
||||
;; yields to ensure ui db to be updated before resolved
|
||||
(p/delay 0)
|
||||
result))
|
||||
(let [response (p/deferred)]
|
||||
(async/go
|
||||
(let [result (<? (p->c (request-f)))]
|
||||
(if (:ex-data result)
|
||||
(do
|
||||
(log/error :worker-request-failed result)
|
||||
(p/reject! response result))
|
||||
(p/resolve! response result))))
|
||||
response))
|
||||
|
||||
(defn transact [worker-transact repo tx-data tx-meta]
|
||||
(let [tx-meta' (assoc tx-meta
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
[frontend.state :as state]
|
||||
[frontend.util :as util]
|
||||
[goog.functions :as gfun]
|
||||
[logseq.client.logging :as log]
|
||||
[lambdaisland.glogi :as log]
|
||||
[logseq.shui.hooks :as hooks]
|
||||
[logseq.shui.ui :as shui]
|
||||
[mobile.init :as init]
|
||||
@@ -20,7 +20,9 @@
|
||||
[rum.core :as rum]))
|
||||
|
||||
(defonce audio-file-format "yyyy-MM-dd HH:mm:ss")
|
||||
|
||||
(def audio-length-limit 10) ; 10 minutes
|
||||
(defonce *transcribe? (atom false))
|
||||
|
||||
(def *last-edit-block (atom nil))
|
||||
(defn set-last-edit-block! [block] (reset! *last-edit-block block))
|
||||
@@ -32,18 +34,6 @@
|
||||
(str (.padStart (str minutes) 2 "0") ":"
|
||||
(.padStart (str seconds) 2 "0"))))
|
||||
|
||||
(defn- get-locale
|
||||
[]
|
||||
(->
|
||||
(p/let [^js lang (.getLanguageTag ^js Device)
|
||||
value (.-value lang)]
|
||||
(if (= value "en_CN")
|
||||
"zh"
|
||||
(string/replace value "-" "_")))
|
||||
(p/catch (fn [e]
|
||||
(log/error :get-locale-error e)
|
||||
"en_US"))))
|
||||
|
||||
(defn- >ios-26
|
||||
[]
|
||||
(p/let [^js info (.getInfo ^js Device)
|
||||
@@ -54,7 +44,7 @@
|
||||
(and (= os "ios") (>= major 26))))
|
||||
|
||||
(defn save-asset-audio!
|
||||
[blob locale]
|
||||
[blob transcribe?]
|
||||
(let [ext (some-> blob
|
||||
(.-type)
|
||||
(string/split ";")
|
||||
@@ -76,11 +66,12 @@
|
||||
[file]
|
||||
{:last-edit-block @*last-edit-block})
|
||||
asset-entity (first result)]
|
||||
(when (and asset-entity (util/ios?))
|
||||
(when (nil? asset-entity)
|
||||
(log/error ::empty-asset-entity {}))
|
||||
(when (and asset-entity transcribe?)
|
||||
(p/let [buffer-data (.arrayBuffer blob)
|
||||
unit8-data (js/Uint8Array. buffer-data)]
|
||||
(-> (.transcribeAudio2Text mobile-util/ui-local #js {:audioData (js/Array.from unit8-data)
|
||||
:locale locale})
|
||||
(-> (.transcribeAudio2Text mobile-util/ui-local #js {:audioData (js/Array.from unit8-data)})
|
||||
(p/then (fn [^js r]
|
||||
(let [content (.-transcription r)]
|
||||
(when-not (string/blank? content)
|
||||
@@ -92,14 +83,16 @@
|
||||
(p/catch #(log/error :transcribe-audio-error %)))))))))
|
||||
|
||||
(rum/defc record-button
|
||||
[*locale]
|
||||
[]
|
||||
(let [*timer-ref (hooks/use-ref nil)
|
||||
*save? (hooks/use-ref nil)
|
||||
[*recorder _] (hooks/use-state (atom nil))
|
||||
[locale set-locale!] (hooks/use-state nil)]
|
||||
[*save? _] (hooks/use-state (atom nil))]
|
||||
|
||||
(hooks/use-effect!
|
||||
(fn []
|
||||
(when-not @*transcribe?
|
||||
(p/let [transcribe? (>ios-26)]
|
||||
(reset! *transcribe? transcribe?)))
|
||||
(let [^js node (js/document.getElementById "wave-container")
|
||||
^js wave-l (.querySelector node ".wave-left")
|
||||
^js wave-r (.querySelector node ".wave-right")
|
||||
@@ -121,8 +114,8 @@
|
||||
(.start w1)
|
||||
(.start w2)))
|
||||
(.on "record-end" (fn [^js blob]
|
||||
(when (true? (rum/deref *save?))
|
||||
(save-asset-audio! blob @*locale))
|
||||
(when @*save?
|
||||
(save-asset-audio! blob @*transcribe?))
|
||||
(mobile-state/close-popup!)))
|
||||
(.on "record-progress" (gfun/throttle
|
||||
(fn [time]
|
||||
@@ -160,60 +153,22 @@
|
||||
(shui/button {:variant :outline
|
||||
:class "record-ctrl-btn rounded-full recording"
|
||||
:on-click (fn []
|
||||
(rum/set-ref! *save? true)
|
||||
(reset! *save? true)
|
||||
(.stopRecording ^js @*recorder))}
|
||||
(shui/tabler-icon "player-stop" {:size 22}))]]
|
||||
|
||||
(when locale
|
||||
(when-not (string/starts-with? locale "en_")
|
||||
(shui/button {:variant :outline
|
||||
:on-click (fn []
|
||||
(reset! *locale "en_US")
|
||||
(set-locale! "en_US"))}
|
||||
"English transcribe")))]))
|
||||
(shui/tabler-icon "player-stop" {:size 22}))]]]))
|
||||
|
||||
(rum/defc audio-recorder-aux < rum/static
|
||||
[]
|
||||
(let [[locale set-locale!] (hooks/use-state nil)
|
||||
[system-locale set-system-locale!] (hooks/use-state nil)
|
||||
[*locale] (hooks/use-state (atom nil))
|
||||
[transcribe-supported? set-transcribe-supported!] (hooks/use-state false)]
|
||||
|
||||
(hooks/use-effect!
|
||||
(fn []
|
||||
(p/let [locale (get-locale)
|
||||
transcribe-supported? >ios-26]
|
||||
(set-transcribe-supported! transcribe-supported?)
|
||||
(set-locale! locale)
|
||||
(set-system-locale! locale)
|
||||
(reset! *locale locale)))
|
||||
[])
|
||||
|
||||
[:div.app-audio-recorder
|
||||
[:div.flex.flex-row.justify-between.items-center.font-medium
|
||||
[:div.opacity-70 (date/get-date-time-string (tl/local-now) {:formatter-str "yyyy-MM-dd"})]
|
||||
(when transcribe-supported?
|
||||
(if (and locale (not (string/starts-with? system-locale "en_")))
|
||||
(let [en? (string/starts-with? locale "en_")]
|
||||
(shui/button
|
||||
{:variant (if en? :default :outline)
|
||||
:class (str "rounded-full " (if en? "opacity-100" "opacity-70"))
|
||||
:on-click (fn []
|
||||
(reset! *locale "en_US")
|
||||
(set-locale! "en_US"))}
|
||||
"EN transcribe"))
|
||||
;; hack: same height with en transcribe button
|
||||
(shui/button
|
||||
{:variant :outline
|
||||
:class "rounded-full opacity-0"}
|
||||
"EN transcribe")))]
|
||||
[:div.opacity-70 (date/get-date-time-string (tl/local-now) {:formatter-str "yyyy-MM-dd"})]]
|
||||
|
||||
[:div#wave-container.app-wave-container
|
||||
[:div.app-wave-needle]
|
||||
[:div.wave-left]
|
||||
[:div.wave-right.mirror]]
|
||||
|
||||
(record-button *locale)]))
|
||||
(record-button)])
|
||||
|
||||
(defn- show-recorder
|
||||
[]
|
||||
|
||||
Reference in New Issue
Block a user