Parcourir la Source

Switch AI Companion transcription to Apple Speech recognition and remove OpenAI dependency from the local transcript flow.

This keeps transcript text generation fully on-device APIs and adds required speech-recognition permission messaging for macOS.

Co-authored-by: Cursor <cursoragent@cursor.com>
huzaifahayat12 il y a 1 mois
Parent
commit
3f29e14107
2 fichiers modifiés avec 82 ajouts et 39 suppressions
  1. 2 0
      Info.plist
  2. 80 39
      meetings_app/ViewController.swift

+ 2 - 0
Info.plist

@@ -28,6 +28,8 @@
28 28
 	<string>NSApplication</string>
29 29
 	<key>NSMicrophoneUsageDescription</key>
30 30
 	<string>This app records meeting audio locally with your consent so it can be shown in AI Companion after the meeting.</string>
31
+	<key>NSSpeechRecognitionUsageDescription</key>
32
+	<string>This app converts your saved meeting audio into text transcripts for AI Companion.</string>
31 33
 	<key>AppLaunchPlaceholderURL</key>
32 34
 	<string>https://example.com/app-link-coming-soon</string>
33 35
 	<key>AppShareURL</key>

+ 80 - 39
meetings_app/ViewController.swift

@@ -9,6 +9,7 @@ import Cocoa
9 9
 import QuartzCore
10 10
 import AVFoundation
11 11
 import AVKit
12
+import Speech
12 13
 import WebKit
13 14
 import AuthenticationServices
14 15
 import StoreKit
@@ -240,7 +241,7 @@ final class ViewController: NSViewController {
240 241
 
241 242
     private enum MeetingTranscriptSource: String, Codable {
242 243
         case meetApi
243
-        case localAudioOpenAI
244
+        case localAudioAppleSpeech
244 245
     }
245 246
 
246 247
     private enum PaywallFooterAction {
@@ -472,10 +473,7 @@ final class ViewController: NSViewController {
472 473
     private let ratingStateMigrationV2DoneDefaultsKey = "rating.stateMigrationV2Done"
473 474
     private let nonPremiumJoinTrialConsumedDefaultsKey = "join.nonPremiumTrialConsumed"
474 475
     private let aiCompanionLocalRecordingsDefaultsKey = "aiCompanion.localRecordings"
475
-    private let openAIAPIKeyDefaultsKey = "openai.apiKey"
476
-    private let openAIAPIKeyPlistKey = "OpenAIAPIKey"
477 476
     private let ratingEligibleUsageSeconds: TimeInterval = 30 * 60
478
-    private let openAITranscriptionClient = OpenAITranscriptionClient()
479 477
     private var darkModeEnabled: Bool {
480 478
         get {
481 479
             let hasValue = UserDefaults.standard.object(forKey: darkModeDefaultsKey) != nil
@@ -1304,18 +1302,82 @@ private extension ViewController {
1304 1302
         UserDefaults.standard.set(encoded, forKey: aiCompanionLocalRecordingsDefaultsKey)
1305 1303
     }
1306 1304
 
1307
-    private func configuredOpenAIAPIKey() -> String? {
1308
-        let value = UserDefaults.standard.string(forKey: openAIAPIKeyDefaultsKey)?
1309
-            .trimmingCharacters(in: .whitespacesAndNewlines)
1310
-        if let value, value.isEmpty == false {
1311
-            return value
1305
+    private func requestSpeechRecognitionAuthorizationIfNeeded() async throws {
1306
+        switch SFSpeechRecognizer.authorizationStatus() {
1307
+        case .authorized:
1308
+            return
1309
+        case .notDetermined:
1310
+            let status = await withCheckedContinuation { continuation in
1311
+                SFSpeechRecognizer.requestAuthorization { status in
1312
+                    continuation.resume(returning: status)
1313
+                }
1314
+            }
1315
+            guard status == .authorized else {
1316
+                throw NSError(
1317
+                    domain: "AiCompanionTranscript",
1318
+                    code: 15,
1319
+                    userInfo: [NSLocalizedDescriptionKey: "Speech recognition permission denied. Enable it in System Settings and try again."]
1320
+                )
1321
+            }
1322
+        case .denied:
1323
+            throw NSError(
1324
+                domain: "AiCompanionTranscript",
1325
+                code: 15,
1326
+                userInfo: [NSLocalizedDescriptionKey: "Speech recognition permission denied. Enable it in System Settings and try again."]
1327
+            )
1328
+        case .restricted:
1329
+            throw NSError(
1330
+                domain: "AiCompanionTranscript",
1331
+                code: 16,
1332
+                userInfo: [NSLocalizedDescriptionKey: "Speech recognition is restricted on this Mac."]
1333
+            )
1334
+        @unknown default:
1335
+            throw NSError(
1336
+                domain: "AiCompanionTranscript",
1337
+                code: 17,
1338
+                userInfo: [NSLocalizedDescriptionKey: "Speech recognition authorization is unavailable."]
1339
+            )
1340
+        }
1341
+    }
1342
+
1343
+    private func transcribeLocalAudioWithAppleSpeech(audioURL: URL) async throws -> String {
1344
+        try await requestSpeechRecognitionAuthorizationIfNeeded()
1345
+
1346
+        guard let recognizer = SFSpeechRecognizer(locale: Locale.current) ?? SFSpeechRecognizer(locale: Locale(identifier: "en-US")) else {
1347
+            throw NSError(
1348
+                domain: "AiCompanionTranscript",
1349
+                code: 18,
1350
+                userInfo: [NSLocalizedDescriptionKey: "Speech recognizer is unavailable for the current locale."]
1351
+            )
1312 1352
         }
1313
-        let plistValue = Bundle.main.object(forInfoDictionaryKey: openAIAPIKeyPlistKey) as? String
1314
-        let trimmedPlist = plistValue?.trimmingCharacters(in: .whitespacesAndNewlines)
1315
-        if let trimmedPlist, trimmedPlist.isEmpty == false {
1316
-            return trimmedPlist
1353
+
1354
+        let request = SFSpeechURLRecognitionRequest(url: audioURL)
1355
+        request.shouldReportPartialResults = false
1356
+        if #available(macOS 13.0, *) {
1357
+            request.addsPunctuation = true
1358
+        }
1359
+
1360
+        return try await withCheckedThrowingContinuation { continuation in
1361
+            var hasResumed = false
1362
+            var task: SFSpeechRecognitionTask?
1363
+            task = recognizer.recognitionTask(with: request) { result, error in
1364
+                if hasResumed { return }
1365
+
1366
+                if let error {
1367
+                    hasResumed = true
1368
+                    task?.cancel()
1369
+                    continuation.resume(throwing: error)
1370
+                    return
1371
+                }
1372
+
1373
+                guard let result else { return }
1374
+                if result.isFinal {
1375
+                    hasResumed = true
1376
+                    task?.finish()
1377
+                    continuation.resume(returning: result.bestTranscription.formattedString)
1378
+                }
1379
+            }
1317 1380
         }
1318
-        return nil
1319 1381
     }
1320 1382
 
1321 1383
     private func aiCompanionTranscriptStatus(for recording: MeetingRecordingSummary) -> MeetingTranscriptStatus {
@@ -2969,40 +3031,19 @@ private extension ViewController {
2969 3031
         guard let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }) else {
2970 3032
             throw NSError(domain: "AiCompanionTranscript", code: 10, userInfo: [NSLocalizedDescriptionKey: "Recording not found."])
2971 3033
         }
2972
-
2973
-        if let meeting = aiCompanionMeetingFromRecording(recording) {
2974
-            do {
2975
-                let accessToken: String
2976
-                if interactiveAuth {
2977
-                    accessToken = try await googleOAuth.validAccessToken(presentingWindow: presentingWindow)
2978
-                } else if let token = googleOAuth.loadTokens()?.accessToken {
2979
-                    accessToken = token
2980
-                } else {
2981
-                    throw NSError(domain: "AiCompanionTranscript", code: 11, userInfo: [NSLocalizedDescriptionKey: "Google account not connected."])
2982
-                }
2983
-                let text = try await aiCompanionFetchTranscriptText(for: meeting, accessToken: accessToken)
2984
-                let cleaned = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
2985
-                if cleaned.isEmpty == false {
2986
-                    return (cleaned, .meetApi)
2987
-                }
2988
-            } catch {
2989
-                // Continue to OpenAI fallback below.
2990
-            }
2991
-        }
3034
+        _ = interactiveAuth
3035
+        _ = presentingWindow
2992 3036
 
2993 3037
         let audioURL = URL(fileURLWithPath: recording.audioFilePath)
2994 3038
         guard FileManager.default.fileExists(atPath: audioURL.path) else {
2995 3039
             throw NSError(domain: "AiCompanionTranscript", code: 12, userInfo: [NSLocalizedDescriptionKey: "Local meeting audio is missing."])
2996 3040
         }
2997
-        guard let apiKey = configuredOpenAIAPIKey() else {
2998
-            throw NSError(domain: "AiCompanionTranscript", code: 13, userInfo: [NSLocalizedDescriptionKey: "OpenAI API key missing. Add OpenAIAPIKey to Info.plist or openai.apiKey to UserDefaults."])
2999
-        }
3000
-        let text = try await openAITranscriptionClient.transcribeAudioFile(fileURL: audioURL, apiKey: apiKey)
3041
+        let text = try await transcribeLocalAudioWithAppleSpeech(audioURL: audioURL)
3001 3042
         let cleaned = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
3002 3043
         guard cleaned.isEmpty == false else {
3003 3044
             throw NSError(domain: "AiCompanionTranscript", code: 14, userInfo: [NSLocalizedDescriptionKey: "Generated transcript was empty."])
3004 3045
         }
3005
-        return (cleaned, .localAudioOpenAI)
3046
+        return (cleaned, .localAudioAppleSpeech)
3006 3047
     }
3007 3048
 
3008 3049
     @objc private func aiCompanionStopRecordingTapped(_ sender: NSButton) {