Quellcode durchsuchen

Improve meeting transcript reliability with dual-source capture and fallback transcription.

Persist transcript status per recording, auto-process transcripts after meetings end, and fix silent recordings by waiting for system audio finalization while mixing system and mic tracks.

Made-with: Cursor
huzaifahayat12 vor 1 Monat
Ursprung
Commit
b590a3c5b8
2 geänderte Dateien mit 421 neuen und 45 gelöschten Zeilen
  1. 83 0
      meetings_app/OpenAI/OpenAITranscriptionClient.swift
  2. 338 45
      meetings_app/ViewController.swift

+ 83 - 0
meetings_app/OpenAI/OpenAITranscriptionClient.swift

@@ -0,0 +1,83 @@
1
+import Foundation
2
+
3
+enum OpenAITranscriptionClientError: Error {
4
+    case invalidResponse
5
+    case httpStatus(Int, String)
6
+    case missingTranscript
7
+}
8
+
9
+final class OpenAITranscriptionClient {
10
+    private let session: URLSession
11
+
12
+    init(session: URLSession = .shared) {
13
+        self.session = session
14
+    }
15
+
16
+    func transcribeAudioFile(fileURL: URL, apiKey: String) async throws -> String {
17
+        let boundary = "Boundary-\(UUID().uuidString)"
18
+        var request = URLRequest(url: URL(string: "https://api.openai.com/v1/audio/transcriptions")!)
19
+        request.httpMethod = "POST"
20
+        request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
21
+        request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
22
+        request.httpBody = try makeMultipartBody(fileURL: fileURL, boundary: boundary)
23
+
24
+        let (data, response) = try await session.data(for: request)
25
+        guard let http = response as? HTTPURLResponse else {
26
+            throw OpenAITranscriptionClientError.invalidResponse
27
+        }
28
+        guard (200..<300).contains(http.statusCode) else {
29
+            let body = String(data: data, encoding: .utf8) ?? "<no body>"
30
+            throw OpenAITranscriptionClientError.httpStatus(http.statusCode, body)
31
+        }
32
+
33
+        struct TranscriptionResponse: Decodable {
34
+            let text: String?
35
+        }
36
+
37
+        let decoded = try JSONDecoder().decode(TranscriptionResponse.self, from: data)
38
+        guard let text = decoded.text?.trimmingCharacters(in: .whitespacesAndNewlines), text.isEmpty == false else {
39
+            throw OpenAITranscriptionClientError.missingTranscript
40
+        }
41
+        return text
42
+    }
43
+
44
+    private func makeMultipartBody(fileURL: URL, boundary: String) throws -> Data {
45
+        let fileData = try Data(contentsOf: fileURL)
46
+        var body = Data()
47
+        let filename = fileURL.lastPathComponent
48
+        let mimeType = "audio/mp4"
49
+
50
+        appendField(name: "model", value: "gpt-4o-mini-transcribe", boundary: boundary, into: &body)
51
+        appendField(name: "response_format", value: "json", boundary: boundary, into: &body)
52
+        appendFile(name: "file", filename: filename, mimeType: mimeType, data: fileData, boundary: boundary, into: &body)
53
+        body.append("--\(boundary)--\r\n".data(using: .utf8) ?? Data())
54
+        return body
55
+    }
56
+
57
+    private func appendField(name: String, value: String, boundary: String, into body: inout Data) {
58
+        body.append("--\(boundary)\r\n".data(using: .utf8) ?? Data())
59
+        body.append("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n".data(using: .utf8) ?? Data())
60
+        body.append("\(value)\r\n".data(using: .utf8) ?? Data())
61
+    }
62
+
63
+    private func appendFile(name: String, filename: String, mimeType: String, data: Data, boundary: String, into body: inout Data) {
64
+        body.append("--\(boundary)\r\n".data(using: .utf8) ?? Data())
65
+        body.append("Content-Disposition: form-data; name=\"\(name)\"; filename=\"\(filename)\"\r\n".data(using: .utf8) ?? Data())
66
+        body.append("Content-Type: \(mimeType)\r\n\r\n".data(using: .utf8) ?? Data())
67
+        body.append(data)
68
+        body.append("\r\n".data(using: .utf8) ?? Data())
69
+    }
70
+}
71
+
72
+extension OpenAITranscriptionClientError: LocalizedError {
73
+    var errorDescription: String? {
74
+        switch self {
75
+        case .invalidResponse:
76
+            return "OpenAI transcription returned an invalid response."
77
+        case let .httpStatus(status, body):
78
+            return "OpenAI transcription failed (\(status)): \(body)"
79
+        case .missingTranscript:
80
+            return "OpenAI transcription returned no text."
81
+        }
82
+    }
83
+}

+ 338 - 45
meetings_app/ViewController.swift

@@ -231,6 +231,18 @@ private final class StoreKitCoordinator {
231 231
 }
232 232
 
233 233
 final class ViewController: NSViewController {
234
+    private enum MeetingTranscriptStatus: String, Codable {
235
+        case notRequested
236
+        case processing
237
+        case ready
238
+        case failed
239
+    }
240
+
241
+    private enum MeetingTranscriptSource: String, Codable {
242
+        case meetApi
243
+        case localAudioOpenAI
244
+    }
245
+
234 246
     private enum PaywallFooterAction {
235 247
         case manageSubscription
236 248
         case restorePurchase
@@ -253,6 +265,10 @@ final class ViewController: NSViewController {
253 265
         let startedAt: Date
254 266
         let endedAt: Date
255 267
         let audioFilePath: String
268
+        var transcriptStatusRaw: String?
269
+        var transcriptSourceRaw: String?
270
+        var transcriptText: String?
271
+        var transcriptErrorMessage: String?
256 272
     }
257 273
 
258 274
     private struct ActiveMeetingRecordingSession {
@@ -260,7 +276,8 @@ final class ViewController: NSViewController {
260 276
         let title: String
261 277
         let meetURL: URL
262 278
         let startedAt: Date
263
-        let audioFileURL: URL
279
+        let systemAudioFileURL: URL
280
+        let microphoneAudioFileURL: URL
264 281
     }
265 282
 
266 283
     private var palette = Palette(isDarkMode: true)
@@ -290,10 +307,10 @@ final class ViewController: NSViewController {
290 307
     private var aiCompanionAudioStatusLabelByView = [ObjectIdentifier: NSTextField]()
291 308
     private var aiCompanionTranscriptMeetingIdByView = [ObjectIdentifier: String]()
292 309
     private var aiCompanionTranscriptStatusLabelByView = [ObjectIdentifier: NSTextField]()
293
-    private var aiCompanionTranscriptTextByMeetingId = [String: String]()
294 310
     private var aiCompanionTranscriptCurrentRequestId: UUID?
295 311
     private var aiCompanionTranscriptWindow: NSWindow?
296 312
     private weak var aiCompanionTranscriptTextView: NSTextView?
313
+    private var aiCompanionTranscriptTaskByMeetingId = [String: Task<Void, Never>]()
297 314
     private var aiCompanionAudioPlayer: AVPlayer?
298 315
     private var aiCompanionLocalAudioPlayer: AVAudioPlayer?
299 316
     private var aiCompanionCurrentlyPlayingURL: URL?
@@ -455,7 +472,10 @@ final class ViewController: NSViewController {
455 472
     private let ratingStateMigrationV2DoneDefaultsKey = "rating.stateMigrationV2Done"
456 473
     private let nonPremiumJoinTrialConsumedDefaultsKey = "join.nonPremiumTrialConsumed"
457 474
     private let aiCompanionLocalRecordingsDefaultsKey = "aiCompanion.localRecordings"
475
+    private let openAIAPIKeyDefaultsKey = "openai.apiKey"
476
+    private let openAIAPIKeyPlistKey = "OpenAIAPIKey"
458 477
     private let ratingEligibleUsageSeconds: TimeInterval = 30 * 60
478
+    private let openAITranscriptionClient = OpenAITranscriptionClient()
459 479
     private var darkModeEnabled: Bool {
460 480
         get {
461 481
             let hasValue = UserDefaults.standard.object(forKey: darkModeDefaultsKey) != nil
@@ -1284,6 +1304,73 @@ private extension ViewController {
1284 1304
         UserDefaults.standard.set(encoded, forKey: aiCompanionLocalRecordingsDefaultsKey)
1285 1305
     }
1286 1306
 
1307
+    private func configuredOpenAIAPIKey() -> String? {
1308
+        let value = UserDefaults.standard.string(forKey: openAIAPIKeyDefaultsKey)?
1309
+            .trimmingCharacters(in: .whitespacesAndNewlines)
1310
+        if let value, value.isEmpty == false {
1311
+            return value
1312
+        }
1313
+        let plistValue = Bundle.main.object(forInfoDictionaryKey: openAIAPIKeyPlistKey) as? String
1314
+        let trimmedPlist = plistValue?.trimmingCharacters(in: .whitespacesAndNewlines)
1315
+        if let trimmedPlist, trimmedPlist.isEmpty == false {
1316
+            return trimmedPlist
1317
+        }
1318
+        return nil
1319
+    }
1320
+
1321
+    private func aiCompanionTranscriptStatus(for recording: MeetingRecordingSummary) -> MeetingTranscriptStatus {
1322
+        guard let raw = recording.transcriptStatusRaw, let status = MeetingTranscriptStatus(rawValue: raw) else {
1323
+            return .notRequested
1324
+        }
1325
+        return status
1326
+    }
1327
+
1328
+    private func aiCompanionTranscriptStatusText(for recording: MeetingRecordingSummary) -> String {
1329
+        switch aiCompanionTranscriptStatus(for: recording) {
1330
+        case .notRequested:
1331
+            return "Transcript not requested"
1332
+        case .processing:
1333
+            return "Transcript processing..."
1334
+        case .ready:
1335
+            return "Transcript ready"
1336
+        case .failed:
1337
+            let error = recording.transcriptErrorMessage?.trimmingCharacters(in: .whitespacesAndNewlines)
1338
+            if let error, error.isEmpty == false {
1339
+                return "Transcript unavailable (tap to retry)"
1340
+            }
1341
+            return "Transcript unavailable (tap to retry)"
1342
+        }
1343
+    }
1344
+
1345
+    private func aiCompanionMeetingFromRecording(_ recording: MeetingRecordingSummary) -> ScheduledMeeting? {
1346
+        guard let meetURL = URL(string: recording.meetURLString) else { return nil }
1347
+        return ScheduledMeeting(
1348
+            id: recording.id,
1349
+            title: recording.title,
1350
+            subtitle: nil,
1351
+            startDate: recording.startedAt,
1352
+            endDate: recording.endedAt,
1353
+            meetURL: meetURL,
1354
+            isAllDay: false
1355
+        )
1356
+    }
1357
+
1358
+    @discardableResult
1359
+    private func aiCompanionUpdateRecording(meetingId: String, mutate: (inout MeetingRecordingSummary) -> Void) -> MeetingRecordingSummary? {
1360
+        guard let idx = aiCompanionLocalRecordings.firstIndex(where: { $0.id == meetingId }) else { return nil }
1361
+        mutate(&aiCompanionLocalRecordings[idx])
1362
+        persistAiCompanionLocalRecordings()
1363
+        return aiCompanionLocalRecordings[idx]
1364
+    }
1365
+
1366
+    private func aiCompanionRefreshTranscriptStatusLabels(forMeetingID meetingId: String) {
1367
+        guard let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }) else { return }
1368
+        let statusText = aiCompanionTranscriptStatusText(for: recording)
1369
+        for (buttonId, linkedMeetingId) in aiCompanionTranscriptMeetingIdByView where linkedMeetingId == meetingId {
1370
+            aiCompanionTranscriptStatusLabelByView[buttonId]?.stringValue = statusText
1371
+        }
1372
+    }
1373
+
1287 1374
     private func localRecordingDirectoryURL() -> URL {
1288 1375
         let base = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first
1289 1376
             ?? URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
@@ -1378,18 +1465,22 @@ private extension ViewController {
1378 1465
 
1379 1466
     private func startMeetingRecording(meetingTitle: String, meetingURL: URL) {
1380 1467
         let recordingID = UUID().uuidString
1381
-        let outputURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID).m4a")
1468
+        let outputURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID)-system.m4a")
1469
+        let microphoneURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID)-mic.m4a")
1382 1470
 
1383 1471
         activeMeetingRecordingSession = ActiveMeetingRecordingSession(
1384 1472
             id: recordingID,
1385 1473
             title: meetingTitle,
1386 1474
             meetURL: meetingURL,
1387 1475
             startedAt: Date(),
1388
-            audioFileURL: outputURL
1476
+            systemAudioFileURL: outputURL,
1477
+            microphoneAudioFileURL: microphoneURL
1389 1478
         )
1390 1479
         pageCache[.aiCompanion] = nil
1391 1480
         if selectedSidebarPage == .aiCompanion { showSidebarPage(.aiCompanion) }
1392 1481
 
1482
+        startMicrophoneRecording(at: microphoneURL, showToast: false)
1483
+
1393 1484
         if #available(macOS 13.0, *) {
1394 1485
             let systemRecorder = MeetingSystemAudioRecorder(outputURL: outputURL)
1395 1486
             activeMeetingSystemAudioStopper = { [systemRecorder] in
@@ -1400,22 +1491,22 @@ private extension ViewController {
1400 1491
                 do {
1401 1492
                     try await systemRecorder.start()
1402 1493
                     await MainActor.run {
1403
-                        self.showTopToast(message: "Meeting recording started (meeting audio)", isError: false)
1494
+                        self.showTopToast(message: "Meeting recording started (meeting + microphone)", isError: false)
1404 1495
                     }
1405 1496
                 } catch {
1406 1497
                     await MainActor.run {
1407 1498
                         self.activeMeetingSystemAudioStopper = nil
1408
-                        self.startMicrophoneFallbackRecording(at: outputURL)
1499
+                        self.showTopToast(message: "System audio unavailable. Recording microphone only.", isError: true)
1409 1500
                     }
1410 1501
                 }
1411 1502
             }
1412 1503
             return
1413 1504
         }
1414 1505
 
1415
-        startMicrophoneFallbackRecording(at: outputURL)
1506
+        showTopToast(message: "Meeting recording started (microphone only)", isError: false)
1416 1507
     }
1417 1508
 
1418
-    private func startMicrophoneFallbackRecording(at outputURL: URL) {
1509
+    private func startMicrophoneRecording(at outputURL: URL, showToast: Bool) {
1419 1510
         let settings: [String: Any] = [
1420 1511
             AVFormatIDKey: kAudioFormatMPEG4AAC,
1421 1512
             AVSampleRateKey: 44_100,
@@ -1427,7 +1518,9 @@ private extension ViewController {
1427 1518
             recorder.prepareToRecord()
1428 1519
             recorder.record()
1429 1520
             activeMeetingAudioRecorder = recorder
1430
-            showTopToast(message: "Meeting recording started (microphone only)", isError: false)
1521
+            if showToast {
1522
+                showTopToast(message: "Meeting recording started (microphone only)", isError: false)
1523
+            }
1431 1524
         } catch {
1432 1525
             activeMeetingRecordingSession = nil
1433 1526
             showSimpleAlert(title: "Could not start recording", message: error.localizedDescription)
@@ -1442,26 +1535,126 @@ private extension ViewController {
1442 1535
         guard let session = activeMeetingRecordingSession else { return }
1443 1536
         let stopSystemAudio = activeMeetingSystemAudioStopper
1444 1537
         activeMeetingSystemAudioStopper = nil
1445
-        if let stopSystemAudio { Task { await stopSystemAudio() } }
1446 1538
         activeMeetingAudioRecorder?.stop()
1447 1539
         activeMeetingAudioRecorder = nil
1448 1540
         activeMeetingRecordingSession = nil
1541
+        Task { [weak self] in
1542
+            guard let self else { return }
1543
+            if let stopSystemAudio { await stopSystemAudio() }
1544
+            let finalURL = await self.finalizeMeetingAudioFile(
1545
+                systemURL: session.systemAudioFileURL,
1546
+                microphoneURL: session.microphoneAudioFileURL,
1547
+                recordingID: session.id
1548
+            )
1549
+            await MainActor.run {
1550
+                let summary = MeetingRecordingSummary(
1551
+                    id: session.id,
1552
+                    title: session.title,
1553
+                    meetURLString: session.meetURL.absoluteString,
1554
+                    startedAt: session.startedAt,
1555
+                    endedAt: Date(),
1556
+                    audioFilePath: finalURL.path,
1557
+                    transcriptStatusRaw: MeetingTranscriptStatus.notRequested.rawValue,
1558
+                    transcriptSourceRaw: nil,
1559
+                    transcriptText: nil,
1560
+                    transcriptErrorMessage: nil
1561
+                )
1562
+                self.aiCompanionLocalRecordings.insert(summary, at: 0)
1563
+                self.aiCompanionLocalRecordings.sort(by: { $0.endedAt > $1.endedAt })
1564
+                self.persistAiCompanionLocalRecordings()
1565
+                self.aiCompanionStartTranscriptProcessing(forMeetingID: summary.id, requestId: nil, interactiveAuth: false, forceRegenerate: false)
1566
+                self.pageCache[.aiCompanion] = nil
1567
+                self.showTopToast(message: "Meeting recording saved", isError: false)
1568
+                if self.selectedSidebarPage == .aiCompanion {
1569
+                    self.showSidebarPage(.aiCompanion)
1570
+                }
1571
+            }
1572
+        }
1573
+    }
1449 1574
 
1450
-        let summary = MeetingRecordingSummary(
1451
-            id: session.id,
1452
-            title: session.title,
1453
-            meetURLString: session.meetURL.absoluteString,
1454
-            startedAt: session.startedAt,
1455
-            endedAt: Date(),
1456
-            audioFilePath: session.audioFileURL.path
1457
-        )
1458
-        aiCompanionLocalRecordings.insert(summary, at: 0)
1459
-        aiCompanionLocalRecordings.sort(by: { $0.endedAt > $1.endedAt })
1460
-        persistAiCompanionLocalRecordings()
1461
-        pageCache[.aiCompanion] = nil
1462
-        showTopToast(message: "Meeting recording saved", isError: false)
1463
-        if selectedSidebarPage == .aiCompanion {
1464
-            showSidebarPage(.aiCompanion)
1575
+    private func fileSize(at url: URL) -> Int64 {
1576
+        let attrs = try? FileManager.default.attributesOfItem(atPath: url.path)
1577
+        return attrs?[.size] as? Int64 ?? 0
1578
+    }
1579
+
1580
+    private func hasAudioPayload(at url: URL, minBytes: Int64 = 10_000) -> Bool {
1581
+        guard FileManager.default.fileExists(atPath: url.path) else { return false }
1582
+        return fileSize(at: url) >= minBytes
1583
+    }
1584
+
1585
+    private func finalizeMeetingAudioFile(systemURL: URL, microphoneURL: URL, recordingID: String) async -> URL {
1586
+        let destinationURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID).m4a")
1587
+        let hasSystem = hasAudioPayload(at: systemURL)
1588
+        let hasMic = hasAudioPayload(at: microphoneURL)
1589
+
1590
+        if hasSystem && hasMic {
1591
+            do {
1592
+                try await mixAudioFiles(systemURL: systemURL, microphoneURL: microphoneURL, destinationURL: destinationURL)
1593
+                try? FileManager.default.removeItem(at: systemURL)
1594
+                try? FileManager.default.removeItem(at: microphoneURL)
1595
+                return destinationURL
1596
+            } catch {
1597
+                // Fall back to best available single track.
1598
+            }
1599
+        }
1600
+
1601
+        let chosenURL: URL
1602
+        if hasSystem {
1603
+            chosenURL = systemURL
1604
+        } else if hasMic {
1605
+            chosenURL = microphoneURL
1606
+        } else {
1607
+            let systemSize = fileSize(at: systemURL)
1608
+            let micSize = fileSize(at: microphoneURL)
1609
+            chosenURL = systemSize >= micSize ? systemURL : microphoneURL
1610
+        }
1611
+
1612
+        if chosenURL.path != destinationURL.path {
1613
+            try? FileManager.default.removeItem(at: destinationURL)
1614
+            do {
1615
+                try FileManager.default.copyItem(at: chosenURL, to: destinationURL)
1616
+            } catch {
1617
+                return chosenURL
1618
+            }
1619
+        }
1620
+        try? FileManager.default.removeItem(at: systemURL)
1621
+        try? FileManager.default.removeItem(at: microphoneURL)
1622
+        return destinationURL
1623
+    }
1624
+
1625
+    private func mixAudioFiles(systemURL: URL, microphoneURL: URL, destinationURL: URL) async throws {
1626
+        try? FileManager.default.removeItem(at: destinationURL)
1627
+
1628
+        let composition = AVMutableComposition()
1629
+        guard let systemTrack = composition.addMutableTrack(withMediaType: .audio, preferredTrackID: kCMPersistentTrackID_Invalid),
1630
+              let micTrack = composition.addMutableTrack(withMediaType: .audio, preferredTrackID: kCMPersistentTrackID_Invalid) else {
1631
+            throw NSError(domain: "MeetingAudioMix", code: 1, userInfo: [NSLocalizedDescriptionKey: "Unable to create audio composition tracks."])
1632
+        }
1633
+
1634
+        let systemAsset = AVURLAsset(url: systemURL)
1635
+        let micAsset = AVURLAsset(url: microphoneURL)
1636
+
1637
+        if let src = try await systemAsset.loadTracks(withMediaType: .audio).first {
1638
+            let duration = try await systemAsset.load(.duration)
1639
+            try systemTrack.insertTimeRange(CMTimeRange(start: .zero, duration: duration), of: src, at: .zero)
1640
+        }
1641
+        if let src = try await micAsset.loadTracks(withMediaType: .audio).first {
1642
+            let duration = try await micAsset.load(.duration)
1643
+            try micTrack.insertTimeRange(CMTimeRange(start: .zero, duration: duration), of: src, at: .zero)
1644
+        }
1645
+
1646
+        guard let export = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A) else {
1647
+            throw NSError(domain: "MeetingAudioMix", code: 2, userInfo: [NSLocalizedDescriptionKey: "Unable to create audio export session."])
1648
+        }
1649
+        export.outputURL = destinationURL
1650
+        export.outputFileType = .m4a
1651
+        await withCheckedContinuation { continuation in
1652
+            export.exportAsynchronously {
1653
+                continuation.resume()
1654
+            }
1655
+        }
1656
+        if export.status != .completed {
1657
+            throw export.error ?? NSError(domain: "MeetingAudioMix", code: 3, userInfo: [NSLocalizedDescriptionKey: "Audio mix export failed."])
1465 1658
         }
1466 1659
     }
1467 1660
 
@@ -2228,7 +2421,8 @@ private extension ViewController {
2228 2421
         aiCompanionSpeechTextByView.removeAll()
2229 2422
         aiCompanionTranscriptMeetingIdByView.removeAll()
2230 2423
         aiCompanionTranscriptStatusLabelByView.removeAll()
2231
-        aiCompanionTranscriptTextByMeetingId.removeAll()
2424
+        aiCompanionTranscriptTaskByMeetingId.values.forEach { $0.cancel() }
2425
+        aiCompanionTranscriptTaskByMeetingId.removeAll()
2232 2426
         aiCompanionTranscriptCurrentRequestId = nil
2233 2427
         aiCompanionTranscriptWindow?.close()
2234 2428
         aiCompanionTranscriptWindow = nil
@@ -2424,7 +2618,7 @@ private extension ViewController {
2424 2618
 
2425 2619
         aiCompanionTranscriptMeetingIdByView[ObjectIdentifier(transcriptButton)] = recording.id
2426 2620
 
2427
-        let transcriptStatusLabel = textLabel("Transcript not loaded", font: typography.fieldLabel, color: palette.textMuted)
2621
+        let transcriptStatusLabel = textLabel(aiCompanionTranscriptStatusText(for: recording), font: typography.fieldLabel, color: palette.textMuted)
2428 2622
         transcriptStatusLabel.alignment = .left
2429 2623
         transcriptStatusLabel.maximumNumberOfLines = 2
2430 2624
         transcriptStatusLabel.lineBreakMode = .byTruncatingTail
@@ -2663,47 +2857,144 @@ private extension ViewController {
2663 2857
     @objc private func aiCompanionTranscriptTapped(_ sender: NSButton) {
2664 2858
         let senderId = ObjectIdentifier(sender)
2665 2859
         guard let meetingId = aiCompanionTranscriptMeetingIdByView[senderId] else { return }
2666
-        guard let meeting = scheduleCachedMeetings.first(where: { $0.id == meetingId }) else {
2667
-            aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript unavailable"
2668
-            showSimpleAlert(title: "Transcript unavailable", message: "Transcript fetch is available for Google Calendar meetings only.")
2860
+        guard let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }) else {
2861
+            aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript unavailable (tap to retry)"
2862
+            showSimpleAlert(title: "Transcript unavailable", message: "Could not find recording details for this meeting.")
2669 2863
             return
2670 2864
         }
2671 2865
 
2672
-        if let cached = aiCompanionTranscriptTextByMeetingId[meetingId] {
2673
-            aiCompanionPresentTranscriptWindow(meetingTitle: meeting.title, initialText: cached)
2674
-            aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript ready"
2866
+        if aiCompanionTranscriptStatus(for: recording) == .ready,
2867
+           let cached = recording.transcriptText?.trimmingCharacters(in: .whitespacesAndNewlines),
2868
+           cached.isEmpty == false {
2869
+            aiCompanionPresentTranscriptWindow(meetingTitle: recording.title, initialText: cached)
2870
+            aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = aiCompanionTranscriptStatusText(for: recording)
2675 2871
             return
2676 2872
         }
2677 2873
 
2678
-        aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Loading transcript..."
2874
+        aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript processing..."
2679 2875
 
2680 2876
         let requestId = UUID()
2681 2877
         aiCompanionTranscriptCurrentRequestId = requestId
2682 2878
 
2683
-        aiCompanionPresentTranscriptWindow(meetingTitle: meeting.title, initialText: "Loading transcript...")
2879
+        aiCompanionPresentTranscriptWindow(meetingTitle: recording.title, initialText: "Transcript processing...")
2880
+        aiCompanionStartTranscriptProcessing(forMeetingID: meetingId, requestId: requestId, interactiveAuth: true, forceRegenerate: true)
2881
+    }
2882
+
2883
+    private func aiCompanionStartTranscriptProcessing(
2884
+        forMeetingID meetingId: String,
2885
+        requestId: UUID?,
2886
+        interactiveAuth: Bool,
2887
+        forceRegenerate: Bool
2888
+    ) {
2889
+        if !forceRegenerate,
2890
+           let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }),
2891
+           aiCompanionTranscriptStatus(for: recording) == .ready {
2892
+            return
2893
+        }
2684 2894
 
2685
-        Task { [weak self] in
2895
+        aiCompanionTranscriptTaskByMeetingId[meetingId]?.cancel()
2896
+        _ = aiCompanionUpdateRecording(meetingId: meetingId) { recording in
2897
+            recording.transcriptStatusRaw = MeetingTranscriptStatus.processing.rawValue
2898
+            recording.transcriptErrorMessage = nil
2899
+            if forceRegenerate {
2900
+                recording.transcriptText = nil
2901
+                recording.transcriptSourceRaw = nil
2902
+            }
2903
+        }
2904
+        aiCompanionRefreshTranscriptStatusLabels(forMeetingID: meetingId)
2905
+        if selectedSidebarPage == .aiCompanion {
2906
+            pageCache[.aiCompanion] = nil
2907
+            showSidebarPage(.aiCompanion)
2908
+        }
2909
+
2910
+        let presentingWindow = view.window
2911
+        let task = Task { [weak self] in
2686 2912
             guard let self else { return }
2913
+            defer { Task { @MainActor [weak self] in self?.aiCompanionTranscriptTaskByMeetingId[meetingId] = nil } }
2687 2914
 
2688 2915
             do {
2689
-                let token = try await self.googleOAuth.validAccessToken(presentingWindow: self.view.window)
2690
-                let text = try await self.aiCompanionFetchTranscriptText(for: meeting, accessToken: token)
2691
-
2916
+                let result = try await self.aiCompanionFetchOrGenerateTranscript(
2917
+                    meetingId: meetingId,
2918
+                    interactiveAuth: interactiveAuth,
2919
+                    presentingWindow: presentingWindow
2920
+                )
2692 2921
                 await MainActor.run {
2693
-                    guard self.aiCompanionTranscriptCurrentRequestId == requestId else { return } // stale request
2694
-                    self.aiCompanionTranscriptTextByMeetingId[meetingId] = text
2695
-                    self.aiCompanionTranscriptTextView?.string = text
2696
-                    self.aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript ready"
2922
+                    guard requestId == nil || self.aiCompanionTranscriptCurrentRequestId == requestId else { return }
2923
+                    _ = self.aiCompanionUpdateRecording(meetingId: meetingId) { recording in
2924
+                        recording.transcriptStatusRaw = MeetingTranscriptStatus.ready.rawValue
2925
+                        recording.transcriptSourceRaw = result.source.rawValue
2926
+                        recording.transcriptText = result.text
2927
+                        recording.transcriptErrorMessage = nil
2928
+                    }
2929
+                    self.aiCompanionTranscriptTextView?.string = result.text
2930
+                    self.aiCompanionRefreshTranscriptStatusLabels(forMeetingID: meetingId)
2931
+                    if self.selectedSidebarPage == .aiCompanion {
2932
+                        self.pageCache[.aiCompanion] = nil
2933
+                        self.showSidebarPage(.aiCompanion)
2934
+                    }
2697 2935
                 }
2698 2936
             } catch {
2699 2937
                 await MainActor.run {
2700
-                    guard self.aiCompanionTranscriptCurrentRequestId == requestId else { return }
2938
+                    guard requestId == nil || self.aiCompanionTranscriptCurrentRequestId == requestId else { return }
2701 2939
                     let msg = error.localizedDescription.isEmpty ? "Failed to load transcript." : error.localizedDescription
2940
+                    _ = self.aiCompanionUpdateRecording(meetingId: meetingId) { recording in
2941
+                        recording.transcriptStatusRaw = MeetingTranscriptStatus.failed.rawValue
2942
+                        recording.transcriptErrorMessage = msg
2943
+                    }
2702 2944
                     self.aiCompanionTranscriptTextView?.string = "Transcript unavailable.\n\n\(msg)"
2703
-                    self.aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript unavailable"
2945
+                    self.aiCompanionRefreshTranscriptStatusLabels(forMeetingID: meetingId)
2946
+                    if self.selectedSidebarPage == .aiCompanion {
2947
+                        self.pageCache[.aiCompanion] = nil
2948
+                        self.showSidebarPage(.aiCompanion)
2949
+                    }
2950
+                }
2951
+            }
2952
+        }
2953
+        aiCompanionTranscriptTaskByMeetingId[meetingId] = task
2954
+    }
2955
+
2956
+    private func aiCompanionFetchOrGenerateTranscript(
2957
+        meetingId: String,
2958
+        interactiveAuth: Bool,
2959
+        presentingWindow: NSWindow?
2960
+    ) async throws -> (text: String, source: MeetingTranscriptSource) {
2961
+        guard let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }) else {
2962
+            throw NSError(domain: "AiCompanionTranscript", code: 10, userInfo: [NSLocalizedDescriptionKey: "Recording not found."])
2963
+        }
2964
+
2965
+        if let meeting = aiCompanionMeetingFromRecording(recording) {
2966
+            do {
2967
+                let accessToken: String
2968
+                if interactiveAuth {
2969
+                    accessToken = try await googleOAuth.validAccessToken(presentingWindow: presentingWindow)
2970
+                } else if let token = googleOAuth.loadTokens()?.accessToken {
2971
+                    accessToken = token
2972
+                } else {
2973
+                    throw NSError(domain: "AiCompanionTranscript", code: 11, userInfo: [NSLocalizedDescriptionKey: "Google account not connected."])
2704 2974
                 }
2975
+                let text = try await aiCompanionFetchTranscriptText(for: meeting, accessToken: accessToken)
2976
+                let cleaned = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
2977
+                if cleaned.isEmpty == false {
2978
+                    return (cleaned, .meetApi)
2979
+                }
2980
+            } catch {
2981
+                // Continue to OpenAI fallback below.
2705 2982
             }
2706 2983
         }
2984
+
2985
+        let audioURL = URL(fileURLWithPath: recording.audioFilePath)
2986
+        guard FileManager.default.fileExists(atPath: audioURL.path) else {
2987
+            throw NSError(domain: "AiCompanionTranscript", code: 12, userInfo: [NSLocalizedDescriptionKey: "Local meeting audio is missing."])
2988
+        }
2989
+        guard let apiKey = configuredOpenAIAPIKey() else {
2990
+            throw NSError(domain: "AiCompanionTranscript", code: 13, userInfo: [NSLocalizedDescriptionKey: "OpenAI API key missing. Add OpenAIAPIKey to Info.plist or openai.apiKey to UserDefaults."])
2991
+        }
2992
+        let text = try await openAITranscriptionClient.transcribeAudioFile(fileURL: audioURL, apiKey: apiKey)
2993
+        let cleaned = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
2994
+        guard cleaned.isEmpty == false else {
2995
+            throw NSError(domain: "AiCompanionTranscript", code: 14, userInfo: [NSLocalizedDescriptionKey: "Generated transcript was empty."])
2996
+        }
2997
+        return (cleaned, .localAudioOpenAI)
2707 2998
     }
2708 2999
 
2709 3000
     @objc private func aiCompanionStopRecordingTapped(_ sender: NSButton) {
@@ -5766,6 +6057,8 @@ private final class MeetingSystemAudioRecorder: NSObject, SCStreamOutput, SCStre
5766 6057
         config.minimumFrameInterval = CMTime(value: 1, timescale: 2)
5767 6058
         config.queueDepth = 1
5768 6059
         config.capturesAudio = true
6060
+        config.sampleRate = 48_000
6061
+        config.channelCount = 2
5769 6062
         if #available(macOS 13.0, *) {
5770 6063
             config.excludesCurrentProcessAudio = true
5771 6064
         }