Browse Source

Improve meeting transcript reliability with dual-source capture and fallback transcription.

Persist transcript status per recording, auto-process transcripts after meetings end, and fix silent recordings by waiting for system audio finalization while mixing system and mic tracks.

Made-with: Cursor
huzaifahayat12 1 month ago
parent
commit
b590a3c5b8
2 changed files with 421 additions and 45 deletions
  1. 83 0
      meetings_app/OpenAI/OpenAITranscriptionClient.swift
  2. 338 45
      meetings_app/ViewController.swift

+ 83 - 0
meetings_app/OpenAI/OpenAITranscriptionClient.swift

@@ -0,0 +1,83 @@
1
+import Foundation
2
+
3
+enum OpenAITranscriptionClientError: Error {
4
+    case invalidResponse
5
+    case httpStatus(Int, String)
6
+    case missingTranscript
7
+}
8
+
9
+final class OpenAITranscriptionClient {
10
+    private let session: URLSession
11
+
12
+    init(session: URLSession = .shared) {
13
+        self.session = session
14
+    }
15
+
16
+    func transcribeAudioFile(fileURL: URL, apiKey: String) async throws -> String {
17
+        let boundary = "Boundary-\(UUID().uuidString)"
18
+        var request = URLRequest(url: URL(string: "https://api.openai.com/v1/audio/transcriptions")!)
19
+        request.httpMethod = "POST"
20
+        request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization")
21
+        request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
22
+        request.httpBody = try makeMultipartBody(fileURL: fileURL, boundary: boundary)
23
+
24
+        let (data, response) = try await session.data(for: request)
25
+        guard let http = response as? HTTPURLResponse else {
26
+            throw OpenAITranscriptionClientError.invalidResponse
27
+        }
28
+        guard (200..<300).contains(http.statusCode) else {
29
+            let body = String(data: data, encoding: .utf8) ?? "<no body>"
30
+            throw OpenAITranscriptionClientError.httpStatus(http.statusCode, body)
31
+        }
32
+
33
+        struct TranscriptionResponse: Decodable {
34
+            let text: String?
35
+        }
36
+
37
+        let decoded = try JSONDecoder().decode(TranscriptionResponse.self, from: data)
38
+        guard let text = decoded.text?.trimmingCharacters(in: .whitespacesAndNewlines), text.isEmpty == false else {
39
+            throw OpenAITranscriptionClientError.missingTranscript
40
+        }
41
+        return text
42
+    }
43
+
44
+    private func makeMultipartBody(fileURL: URL, boundary: String) throws -> Data {
45
+        let fileData = try Data(contentsOf: fileURL)
46
+        var body = Data()
47
+        let filename = fileURL.lastPathComponent
48
+        let mimeType = "audio/mp4"
49
+
50
+        appendField(name: "model", value: "gpt-4o-mini-transcribe", boundary: boundary, into: &body)
51
+        appendField(name: "response_format", value: "json", boundary: boundary, into: &body)
52
+        appendFile(name: "file", filename: filename, mimeType: mimeType, data: fileData, boundary: boundary, into: &body)
53
+        body.append("--\(boundary)--\r\n".data(using: .utf8) ?? Data())
54
+        return body
55
+    }
56
+
57
+    private func appendField(name: String, value: String, boundary: String, into body: inout Data) {
58
+        body.append("--\(boundary)\r\n".data(using: .utf8) ?? Data())
59
+        body.append("Content-Disposition: form-data; name=\"\(name)\"\r\n\r\n".data(using: .utf8) ?? Data())
60
+        body.append("\(value)\r\n".data(using: .utf8) ?? Data())
61
+    }
62
+
63
+    private func appendFile(name: String, filename: String, mimeType: String, data: Data, boundary: String, into body: inout Data) {
64
+        body.append("--\(boundary)\r\n".data(using: .utf8) ?? Data())
65
+        body.append("Content-Disposition: form-data; name=\"\(name)\"; filename=\"\(filename)\"\r\n".data(using: .utf8) ?? Data())
66
+        body.append("Content-Type: \(mimeType)\r\n\r\n".data(using: .utf8) ?? Data())
67
+        body.append(data)
68
+        body.append("\r\n".data(using: .utf8) ?? Data())
69
+    }
70
+}
71
+
72
+extension OpenAITranscriptionClientError: LocalizedError {
73
+    var errorDescription: String? {
74
+        switch self {
75
+        case .invalidResponse:
76
+            return "OpenAI transcription returned an invalid response."
77
+        case let .httpStatus(status, body):
78
+            return "OpenAI transcription failed (\(status)): \(body)"
79
+        case .missingTranscript:
80
+            return "OpenAI transcription returned no text."
81
+        }
82
+    }
83
+}

+ 338 - 45
meetings_app/ViewController.swift

@@ -231,6 +231,18 @@ private final class StoreKitCoordinator {
231
 }
231
 }
232
 
232
 
233
 final class ViewController: NSViewController {
233
 final class ViewController: NSViewController {
234
+    private enum MeetingTranscriptStatus: String, Codable {
235
+        case notRequested
236
+        case processing
237
+        case ready
238
+        case failed
239
+    }
240
+
241
+    private enum MeetingTranscriptSource: String, Codable {
242
+        case meetApi
243
+        case localAudioOpenAI
244
+    }
245
+
234
     private enum PaywallFooterAction {
246
     private enum PaywallFooterAction {
235
         case manageSubscription
247
         case manageSubscription
236
         case restorePurchase
248
         case restorePurchase
@@ -253,6 +265,10 @@ final class ViewController: NSViewController {
253
         let startedAt: Date
265
         let startedAt: Date
254
         let endedAt: Date
266
         let endedAt: Date
255
         let audioFilePath: String
267
         let audioFilePath: String
268
+        var transcriptStatusRaw: String?
269
+        var transcriptSourceRaw: String?
270
+        var transcriptText: String?
271
+        var transcriptErrorMessage: String?
256
     }
272
     }
257
 
273
 
258
     private struct ActiveMeetingRecordingSession {
274
     private struct ActiveMeetingRecordingSession {
@@ -260,7 +276,8 @@ final class ViewController: NSViewController {
260
         let title: String
276
         let title: String
261
         let meetURL: URL
277
         let meetURL: URL
262
         let startedAt: Date
278
         let startedAt: Date
263
-        let audioFileURL: URL
279
+        let systemAudioFileURL: URL
280
+        let microphoneAudioFileURL: URL
264
     }
281
     }
265
 
282
 
266
     private var palette = Palette(isDarkMode: true)
283
     private var palette = Palette(isDarkMode: true)
@@ -290,10 +307,10 @@ final class ViewController: NSViewController {
290
     private var aiCompanionAudioStatusLabelByView = [ObjectIdentifier: NSTextField]()
307
     private var aiCompanionAudioStatusLabelByView = [ObjectIdentifier: NSTextField]()
291
     private var aiCompanionTranscriptMeetingIdByView = [ObjectIdentifier: String]()
308
     private var aiCompanionTranscriptMeetingIdByView = [ObjectIdentifier: String]()
292
     private var aiCompanionTranscriptStatusLabelByView = [ObjectIdentifier: NSTextField]()
309
     private var aiCompanionTranscriptStatusLabelByView = [ObjectIdentifier: NSTextField]()
293
-    private var aiCompanionTranscriptTextByMeetingId = [String: String]()
294
     private var aiCompanionTranscriptCurrentRequestId: UUID?
310
     private var aiCompanionTranscriptCurrentRequestId: UUID?
295
     private var aiCompanionTranscriptWindow: NSWindow?
311
     private var aiCompanionTranscriptWindow: NSWindow?
296
     private weak var aiCompanionTranscriptTextView: NSTextView?
312
     private weak var aiCompanionTranscriptTextView: NSTextView?
313
+    private var aiCompanionTranscriptTaskByMeetingId = [String: Task<Void, Never>]()
297
     private var aiCompanionAudioPlayer: AVPlayer?
314
     private var aiCompanionAudioPlayer: AVPlayer?
298
     private var aiCompanionLocalAudioPlayer: AVAudioPlayer?
315
     private var aiCompanionLocalAudioPlayer: AVAudioPlayer?
299
     private var aiCompanionCurrentlyPlayingURL: URL?
316
     private var aiCompanionCurrentlyPlayingURL: URL?
@@ -455,7 +472,10 @@ final class ViewController: NSViewController {
455
     private let ratingStateMigrationV2DoneDefaultsKey = "rating.stateMigrationV2Done"
472
     private let ratingStateMigrationV2DoneDefaultsKey = "rating.stateMigrationV2Done"
456
     private let nonPremiumJoinTrialConsumedDefaultsKey = "join.nonPremiumTrialConsumed"
473
     private let nonPremiumJoinTrialConsumedDefaultsKey = "join.nonPremiumTrialConsumed"
457
     private let aiCompanionLocalRecordingsDefaultsKey = "aiCompanion.localRecordings"
474
     private let aiCompanionLocalRecordingsDefaultsKey = "aiCompanion.localRecordings"
475
+    private let openAIAPIKeyDefaultsKey = "openai.apiKey"
476
+    private let openAIAPIKeyPlistKey = "OpenAIAPIKey"
458
     private let ratingEligibleUsageSeconds: TimeInterval = 30 * 60
477
     private let ratingEligibleUsageSeconds: TimeInterval = 30 * 60
478
+    private let openAITranscriptionClient = OpenAITranscriptionClient()
459
     private var darkModeEnabled: Bool {
479
     private var darkModeEnabled: Bool {
460
         get {
480
         get {
461
             let hasValue = UserDefaults.standard.object(forKey: darkModeDefaultsKey) != nil
481
             let hasValue = UserDefaults.standard.object(forKey: darkModeDefaultsKey) != nil
@@ -1284,6 +1304,73 @@ private extension ViewController {
1284
         UserDefaults.standard.set(encoded, forKey: aiCompanionLocalRecordingsDefaultsKey)
1304
         UserDefaults.standard.set(encoded, forKey: aiCompanionLocalRecordingsDefaultsKey)
1285
     }
1305
     }
1286
 
1306
 
1307
+    private func configuredOpenAIAPIKey() -> String? {
1308
+        let value = UserDefaults.standard.string(forKey: openAIAPIKeyDefaultsKey)?
1309
+            .trimmingCharacters(in: .whitespacesAndNewlines)
1310
+        if let value, value.isEmpty == false {
1311
+            return value
1312
+        }
1313
+        let plistValue = Bundle.main.object(forInfoDictionaryKey: openAIAPIKeyPlistKey) as? String
1314
+        let trimmedPlist = plistValue?.trimmingCharacters(in: .whitespacesAndNewlines)
1315
+        if let trimmedPlist, trimmedPlist.isEmpty == false {
1316
+            return trimmedPlist
1317
+        }
1318
+        return nil
1319
+    }
1320
+
1321
+    private func aiCompanionTranscriptStatus(for recording: MeetingRecordingSummary) -> MeetingTranscriptStatus {
1322
+        guard let raw = recording.transcriptStatusRaw, let status = MeetingTranscriptStatus(rawValue: raw) else {
1323
+            return .notRequested
1324
+        }
1325
+        return status
1326
+    }
1327
+
1328
+    private func aiCompanionTranscriptStatusText(for recording: MeetingRecordingSummary) -> String {
1329
+        switch aiCompanionTranscriptStatus(for: recording) {
1330
+        case .notRequested:
1331
+            return "Transcript not requested"
1332
+        case .processing:
1333
+            return "Transcript processing..."
1334
+        case .ready:
1335
+            return "Transcript ready"
1336
+        case .failed:
1337
+            let error = recording.transcriptErrorMessage?.trimmingCharacters(in: .whitespacesAndNewlines)
1338
+            if let error, error.isEmpty == false {
1339
+                return "Transcript unavailable (tap to retry)"
1340
+            }
1341
+            return "Transcript unavailable (tap to retry)"
1342
+        }
1343
+    }
1344
+
1345
+    private func aiCompanionMeetingFromRecording(_ recording: MeetingRecordingSummary) -> ScheduledMeeting? {
1346
+        guard let meetURL = URL(string: recording.meetURLString) else { return nil }
1347
+        return ScheduledMeeting(
1348
+            id: recording.id,
1349
+            title: recording.title,
1350
+            subtitle: nil,
1351
+            startDate: recording.startedAt,
1352
+            endDate: recording.endedAt,
1353
+            meetURL: meetURL,
1354
+            isAllDay: false
1355
+        )
1356
+    }
1357
+
1358
+    @discardableResult
1359
+    private func aiCompanionUpdateRecording(meetingId: String, mutate: (inout MeetingRecordingSummary) -> Void) -> MeetingRecordingSummary? {
1360
+        guard let idx = aiCompanionLocalRecordings.firstIndex(where: { $0.id == meetingId }) else { return nil }
1361
+        mutate(&aiCompanionLocalRecordings[idx])
1362
+        persistAiCompanionLocalRecordings()
1363
+        return aiCompanionLocalRecordings[idx]
1364
+    }
1365
+
1366
+    private func aiCompanionRefreshTranscriptStatusLabels(forMeetingID meetingId: String) {
1367
+        guard let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }) else { return }
1368
+        let statusText = aiCompanionTranscriptStatusText(for: recording)
1369
+        for (buttonId, linkedMeetingId) in aiCompanionTranscriptMeetingIdByView where linkedMeetingId == meetingId {
1370
+            aiCompanionTranscriptStatusLabelByView[buttonId]?.stringValue = statusText
1371
+        }
1372
+    }
1373
+
1287
     private func localRecordingDirectoryURL() -> URL {
1374
     private func localRecordingDirectoryURL() -> URL {
1288
         let base = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first
1375
         let base = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first
1289
             ?? URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
1376
             ?? URL(fileURLWithPath: NSTemporaryDirectory(), isDirectory: true)
@@ -1378,18 +1465,22 @@ private extension ViewController {
1378
 
1465
 
1379
     private func startMeetingRecording(meetingTitle: String, meetingURL: URL) {
1466
     private func startMeetingRecording(meetingTitle: String, meetingURL: URL) {
1380
         let recordingID = UUID().uuidString
1467
         let recordingID = UUID().uuidString
1381
-        let outputURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID).m4a")
1468
+        let outputURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID)-system.m4a")
1469
+        let microphoneURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID)-mic.m4a")
1382
 
1470
 
1383
         activeMeetingRecordingSession = ActiveMeetingRecordingSession(
1471
         activeMeetingRecordingSession = ActiveMeetingRecordingSession(
1384
             id: recordingID,
1472
             id: recordingID,
1385
             title: meetingTitle,
1473
             title: meetingTitle,
1386
             meetURL: meetingURL,
1474
             meetURL: meetingURL,
1387
             startedAt: Date(),
1475
             startedAt: Date(),
1388
-            audioFileURL: outputURL
1476
+            systemAudioFileURL: outputURL,
1477
+            microphoneAudioFileURL: microphoneURL
1389
         )
1478
         )
1390
         pageCache[.aiCompanion] = nil
1479
         pageCache[.aiCompanion] = nil
1391
         if selectedSidebarPage == .aiCompanion { showSidebarPage(.aiCompanion) }
1480
         if selectedSidebarPage == .aiCompanion { showSidebarPage(.aiCompanion) }
1392
 
1481
 
1482
+        startMicrophoneRecording(at: microphoneURL, showToast: false)
1483
+
1393
         if #available(macOS 13.0, *) {
1484
         if #available(macOS 13.0, *) {
1394
             let systemRecorder = MeetingSystemAudioRecorder(outputURL: outputURL)
1485
             let systemRecorder = MeetingSystemAudioRecorder(outputURL: outputURL)
1395
             activeMeetingSystemAudioStopper = { [systemRecorder] in
1486
             activeMeetingSystemAudioStopper = { [systemRecorder] in
@@ -1400,22 +1491,22 @@ private extension ViewController {
1400
                 do {
1491
                 do {
1401
                     try await systemRecorder.start()
1492
                     try await systemRecorder.start()
1402
                     await MainActor.run {
1493
                     await MainActor.run {
1403
-                        self.showTopToast(message: "Meeting recording started (meeting audio)", isError: false)
1494
+                        self.showTopToast(message: "Meeting recording started (meeting + microphone)", isError: false)
1404
                     }
1495
                     }
1405
                 } catch {
1496
                 } catch {
1406
                     await MainActor.run {
1497
                     await MainActor.run {
1407
                         self.activeMeetingSystemAudioStopper = nil
1498
                         self.activeMeetingSystemAudioStopper = nil
1408
-                        self.startMicrophoneFallbackRecording(at: outputURL)
1499
+                        self.showTopToast(message: "System audio unavailable. Recording microphone only.", isError: true)
1409
                     }
1500
                     }
1410
                 }
1501
                 }
1411
             }
1502
             }
1412
             return
1503
             return
1413
         }
1504
         }
1414
 
1505
 
1415
-        startMicrophoneFallbackRecording(at: outputURL)
1506
+        showTopToast(message: "Meeting recording started (microphone only)", isError: false)
1416
     }
1507
     }
1417
 
1508
 
1418
-    private func startMicrophoneFallbackRecording(at outputURL: URL) {
1509
+    private func startMicrophoneRecording(at outputURL: URL, showToast: Bool) {
1419
         let settings: [String: Any] = [
1510
         let settings: [String: Any] = [
1420
             AVFormatIDKey: kAudioFormatMPEG4AAC,
1511
             AVFormatIDKey: kAudioFormatMPEG4AAC,
1421
             AVSampleRateKey: 44_100,
1512
             AVSampleRateKey: 44_100,
@@ -1427,7 +1518,9 @@ private extension ViewController {
1427
             recorder.prepareToRecord()
1518
             recorder.prepareToRecord()
1428
             recorder.record()
1519
             recorder.record()
1429
             activeMeetingAudioRecorder = recorder
1520
             activeMeetingAudioRecorder = recorder
1430
-            showTopToast(message: "Meeting recording started (microphone only)", isError: false)
1521
+            if showToast {
1522
+                showTopToast(message: "Meeting recording started (microphone only)", isError: false)
1523
+            }
1431
         } catch {
1524
         } catch {
1432
             activeMeetingRecordingSession = nil
1525
             activeMeetingRecordingSession = nil
1433
             showSimpleAlert(title: "Could not start recording", message: error.localizedDescription)
1526
             showSimpleAlert(title: "Could not start recording", message: error.localizedDescription)
@@ -1442,26 +1535,126 @@ private extension ViewController {
1442
         guard let session = activeMeetingRecordingSession else { return }
1535
         guard let session = activeMeetingRecordingSession else { return }
1443
         let stopSystemAudio = activeMeetingSystemAudioStopper
1536
         let stopSystemAudio = activeMeetingSystemAudioStopper
1444
         activeMeetingSystemAudioStopper = nil
1537
         activeMeetingSystemAudioStopper = nil
1445
-        if let stopSystemAudio { Task { await stopSystemAudio() } }
1446
         activeMeetingAudioRecorder?.stop()
1538
         activeMeetingAudioRecorder?.stop()
1447
         activeMeetingAudioRecorder = nil
1539
         activeMeetingAudioRecorder = nil
1448
         activeMeetingRecordingSession = nil
1540
         activeMeetingRecordingSession = nil
1541
+        Task { [weak self] in
1542
+            guard let self else { return }
1543
+            if let stopSystemAudio { await stopSystemAudio() }
1544
+            let finalURL = await self.finalizeMeetingAudioFile(
1545
+                systemURL: session.systemAudioFileURL,
1546
+                microphoneURL: session.microphoneAudioFileURL,
1547
+                recordingID: session.id
1548
+            )
1549
+            await MainActor.run {
1550
+                let summary = MeetingRecordingSummary(
1551
+                    id: session.id,
1552
+                    title: session.title,
1553
+                    meetURLString: session.meetURL.absoluteString,
1554
+                    startedAt: session.startedAt,
1555
+                    endedAt: Date(),
1556
+                    audioFilePath: finalURL.path,
1557
+                    transcriptStatusRaw: MeetingTranscriptStatus.notRequested.rawValue,
1558
+                    transcriptSourceRaw: nil,
1559
+                    transcriptText: nil,
1560
+                    transcriptErrorMessage: nil
1561
+                )
1562
+                self.aiCompanionLocalRecordings.insert(summary, at: 0)
1563
+                self.aiCompanionLocalRecordings.sort(by: { $0.endedAt > $1.endedAt })
1564
+                self.persistAiCompanionLocalRecordings()
1565
+                self.aiCompanionStartTranscriptProcessing(forMeetingID: summary.id, requestId: nil, interactiveAuth: false, forceRegenerate: false)
1566
+                self.pageCache[.aiCompanion] = nil
1567
+                self.showTopToast(message: "Meeting recording saved", isError: false)
1568
+                if self.selectedSidebarPage == .aiCompanion {
1569
+                    self.showSidebarPage(.aiCompanion)
1570
+                }
1571
+            }
1572
+        }
1573
+    }
1449
 
1574
 
1450
-        let summary = MeetingRecordingSummary(
1451
-            id: session.id,
1452
-            title: session.title,
1453
-            meetURLString: session.meetURL.absoluteString,
1454
-            startedAt: session.startedAt,
1455
-            endedAt: Date(),
1456
-            audioFilePath: session.audioFileURL.path
1457
-        )
1458
-        aiCompanionLocalRecordings.insert(summary, at: 0)
1459
-        aiCompanionLocalRecordings.sort(by: { $0.endedAt > $1.endedAt })
1460
-        persistAiCompanionLocalRecordings()
1461
-        pageCache[.aiCompanion] = nil
1462
-        showTopToast(message: "Meeting recording saved", isError: false)
1463
-        if selectedSidebarPage == .aiCompanion {
1464
-            showSidebarPage(.aiCompanion)
1575
+    private func fileSize(at url: URL) -> Int64 {
1576
+        let attrs = try? FileManager.default.attributesOfItem(atPath: url.path)
1577
+        return attrs?[.size] as? Int64 ?? 0
1578
+    }
1579
+
1580
+    private func hasAudioPayload(at url: URL, minBytes: Int64 = 10_000) -> Bool {
1581
+        guard FileManager.default.fileExists(atPath: url.path) else { return false }
1582
+        return fileSize(at: url) >= minBytes
1583
+    }
1584
+
1585
+    private func finalizeMeetingAudioFile(systemURL: URL, microphoneURL: URL, recordingID: String) async -> URL {
1586
+        let destinationURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID).m4a")
1587
+        let hasSystem = hasAudioPayload(at: systemURL)
1588
+        let hasMic = hasAudioPayload(at: microphoneURL)
1589
+
1590
+        if hasSystem && hasMic {
1591
+            do {
1592
+                try await mixAudioFiles(systemURL: systemURL, microphoneURL: microphoneURL, destinationURL: destinationURL)
1593
+                try? FileManager.default.removeItem(at: systemURL)
1594
+                try? FileManager.default.removeItem(at: microphoneURL)
1595
+                return destinationURL
1596
+            } catch {
1597
+                // Fall back to best available single track.
1598
+            }
1599
+        }
1600
+
1601
+        let chosenURL: URL
1602
+        if hasSystem {
1603
+            chosenURL = systemURL
1604
+        } else if hasMic {
1605
+            chosenURL = microphoneURL
1606
+        } else {
1607
+            let systemSize = fileSize(at: systemURL)
1608
+            let micSize = fileSize(at: microphoneURL)
1609
+            chosenURL = systemSize >= micSize ? systemURL : microphoneURL
1610
+        }
1611
+
1612
+        if chosenURL.path != destinationURL.path {
1613
+            try? FileManager.default.removeItem(at: destinationURL)
1614
+            do {
1615
+                try FileManager.default.copyItem(at: chosenURL, to: destinationURL)
1616
+            } catch {
1617
+                return chosenURL
1618
+            }
1619
+        }
1620
+        try? FileManager.default.removeItem(at: systemURL)
1621
+        try? FileManager.default.removeItem(at: microphoneURL)
1622
+        return destinationURL
1623
+    }
1624
+
1625
+    private func mixAudioFiles(systemURL: URL, microphoneURL: URL, destinationURL: URL) async throws {
1626
+        try? FileManager.default.removeItem(at: destinationURL)
1627
+
1628
+        let composition = AVMutableComposition()
1629
+        guard let systemTrack = composition.addMutableTrack(withMediaType: .audio, preferredTrackID: kCMPersistentTrackID_Invalid),
1630
+              let micTrack = composition.addMutableTrack(withMediaType: .audio, preferredTrackID: kCMPersistentTrackID_Invalid) else {
1631
+            throw NSError(domain: "MeetingAudioMix", code: 1, userInfo: [NSLocalizedDescriptionKey: "Unable to create audio composition tracks."])
1632
+        }
1633
+
1634
+        let systemAsset = AVURLAsset(url: systemURL)
1635
+        let micAsset = AVURLAsset(url: microphoneURL)
1636
+
1637
+        if let src = try await systemAsset.loadTracks(withMediaType: .audio).first {
1638
+            let duration = try await systemAsset.load(.duration)
1639
+            try systemTrack.insertTimeRange(CMTimeRange(start: .zero, duration: duration), of: src, at: .zero)
1640
+        }
1641
+        if let src = try await micAsset.loadTracks(withMediaType: .audio).first {
1642
+            let duration = try await micAsset.load(.duration)
1643
+            try micTrack.insertTimeRange(CMTimeRange(start: .zero, duration: duration), of: src, at: .zero)
1644
+        }
1645
+
1646
+        guard let export = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A) else {
1647
+            throw NSError(domain: "MeetingAudioMix", code: 2, userInfo: [NSLocalizedDescriptionKey: "Unable to create audio export session."])
1648
+        }
1649
+        export.outputURL = destinationURL
1650
+        export.outputFileType = .m4a
1651
+        await withCheckedContinuation { continuation in
1652
+            export.exportAsynchronously {
1653
+                continuation.resume()
1654
+            }
1655
+        }
1656
+        if export.status != .completed {
1657
+            throw export.error ?? NSError(domain: "MeetingAudioMix", code: 3, userInfo: [NSLocalizedDescriptionKey: "Audio mix export failed."])
1465
         }
1658
         }
1466
     }
1659
     }
1467
 
1660
 
@@ -2228,7 +2421,8 @@ private extension ViewController {
2228
         aiCompanionSpeechTextByView.removeAll()
2421
         aiCompanionSpeechTextByView.removeAll()
2229
         aiCompanionTranscriptMeetingIdByView.removeAll()
2422
         aiCompanionTranscriptMeetingIdByView.removeAll()
2230
         aiCompanionTranscriptStatusLabelByView.removeAll()
2423
         aiCompanionTranscriptStatusLabelByView.removeAll()
2231
-        aiCompanionTranscriptTextByMeetingId.removeAll()
2424
+        aiCompanionTranscriptTaskByMeetingId.values.forEach { $0.cancel() }
2425
+        aiCompanionTranscriptTaskByMeetingId.removeAll()
2232
         aiCompanionTranscriptCurrentRequestId = nil
2426
         aiCompanionTranscriptCurrentRequestId = nil
2233
         aiCompanionTranscriptWindow?.close()
2427
         aiCompanionTranscriptWindow?.close()
2234
         aiCompanionTranscriptWindow = nil
2428
         aiCompanionTranscriptWindow = nil
@@ -2424,7 +2618,7 @@ private extension ViewController {
2424
 
2618
 
2425
         aiCompanionTranscriptMeetingIdByView[ObjectIdentifier(transcriptButton)] = recording.id
2619
         aiCompanionTranscriptMeetingIdByView[ObjectIdentifier(transcriptButton)] = recording.id
2426
 
2620
 
2427
-        let transcriptStatusLabel = textLabel("Transcript not loaded", font: typography.fieldLabel, color: palette.textMuted)
2621
+        let transcriptStatusLabel = textLabel(aiCompanionTranscriptStatusText(for: recording), font: typography.fieldLabel, color: palette.textMuted)
2428
         transcriptStatusLabel.alignment = .left
2622
         transcriptStatusLabel.alignment = .left
2429
         transcriptStatusLabel.maximumNumberOfLines = 2
2623
         transcriptStatusLabel.maximumNumberOfLines = 2
2430
         transcriptStatusLabel.lineBreakMode = .byTruncatingTail
2624
         transcriptStatusLabel.lineBreakMode = .byTruncatingTail
@@ -2663,47 +2857,144 @@ private extension ViewController {
2663
     @objc private func aiCompanionTranscriptTapped(_ sender: NSButton) {
2857
     @objc private func aiCompanionTranscriptTapped(_ sender: NSButton) {
2664
         let senderId = ObjectIdentifier(sender)
2858
         let senderId = ObjectIdentifier(sender)
2665
         guard let meetingId = aiCompanionTranscriptMeetingIdByView[senderId] else { return }
2859
         guard let meetingId = aiCompanionTranscriptMeetingIdByView[senderId] else { return }
2666
-        guard let meeting = scheduleCachedMeetings.first(where: { $0.id == meetingId }) else {
2667
-            aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript unavailable"
2668
-            showSimpleAlert(title: "Transcript unavailable", message: "Transcript fetch is available for Google Calendar meetings only.")
2860
+        guard let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }) else {
2861
+            aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript unavailable (tap to retry)"
2862
+            showSimpleAlert(title: "Transcript unavailable", message: "Could not find recording details for this meeting.")
2669
             return
2863
             return
2670
         }
2864
         }
2671
 
2865
 
2672
-        if let cached = aiCompanionTranscriptTextByMeetingId[meetingId] {
2673
-            aiCompanionPresentTranscriptWindow(meetingTitle: meeting.title, initialText: cached)
2674
-            aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript ready"
2866
+        if aiCompanionTranscriptStatus(for: recording) == .ready,
2867
+           let cached = recording.transcriptText?.trimmingCharacters(in: .whitespacesAndNewlines),
2868
+           cached.isEmpty == false {
2869
+            aiCompanionPresentTranscriptWindow(meetingTitle: recording.title, initialText: cached)
2870
+            aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = aiCompanionTranscriptStatusText(for: recording)
2675
             return
2871
             return
2676
         }
2872
         }
2677
 
2873
 
2678
-        aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Loading transcript..."
2874
+        aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript processing..."
2679
 
2875
 
2680
         let requestId = UUID()
2876
         let requestId = UUID()
2681
         aiCompanionTranscriptCurrentRequestId = requestId
2877
         aiCompanionTranscriptCurrentRequestId = requestId
2682
 
2878
 
2683
-        aiCompanionPresentTranscriptWindow(meetingTitle: meeting.title, initialText: "Loading transcript...")
2879
+        aiCompanionPresentTranscriptWindow(meetingTitle: recording.title, initialText: "Transcript processing...")
2880
+        aiCompanionStartTranscriptProcessing(forMeetingID: meetingId, requestId: requestId, interactiveAuth: true, forceRegenerate: true)
2881
+    }
2882
+
2883
+    private func aiCompanionStartTranscriptProcessing(
2884
+        forMeetingID meetingId: String,
2885
+        requestId: UUID?,
2886
+        interactiveAuth: Bool,
2887
+        forceRegenerate: Bool
2888
+    ) {
2889
+        if !forceRegenerate,
2890
+           let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }),
2891
+           aiCompanionTranscriptStatus(for: recording) == .ready {
2892
+            return
2893
+        }
2684
 
2894
 
2685
-        Task { [weak self] in
2895
+        aiCompanionTranscriptTaskByMeetingId[meetingId]?.cancel()
2896
+        _ = aiCompanionUpdateRecording(meetingId: meetingId) { recording in
2897
+            recording.transcriptStatusRaw = MeetingTranscriptStatus.processing.rawValue
2898
+            recording.transcriptErrorMessage = nil
2899
+            if forceRegenerate {
2900
+                recording.transcriptText = nil
2901
+                recording.transcriptSourceRaw = nil
2902
+            }
2903
+        }
2904
+        aiCompanionRefreshTranscriptStatusLabels(forMeetingID: meetingId)
2905
+        if selectedSidebarPage == .aiCompanion {
2906
+            pageCache[.aiCompanion] = nil
2907
+            showSidebarPage(.aiCompanion)
2908
+        }
2909
+
2910
+        let presentingWindow = view.window
2911
+        let task = Task { [weak self] in
2686
             guard let self else { return }
2912
             guard let self else { return }
2913
+            defer { Task { @MainActor [weak self] in self?.aiCompanionTranscriptTaskByMeetingId[meetingId] = nil } }
2687
 
2914
 
2688
             do {
2915
             do {
2689
-                let token = try await self.googleOAuth.validAccessToken(presentingWindow: self.view.window)
2690
-                let text = try await self.aiCompanionFetchTranscriptText(for: meeting, accessToken: token)
2691
-
2916
+                let result = try await self.aiCompanionFetchOrGenerateTranscript(
2917
+                    meetingId: meetingId,
2918
+                    interactiveAuth: interactiveAuth,
2919
+                    presentingWindow: presentingWindow
2920
+                )
2692
                 await MainActor.run {
2921
                 await MainActor.run {
2693
-                    guard self.aiCompanionTranscriptCurrentRequestId == requestId else { return } // stale request
2694
-                    self.aiCompanionTranscriptTextByMeetingId[meetingId] = text
2695
-                    self.aiCompanionTranscriptTextView?.string = text
2696
-                    self.aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript ready"
2922
+                    guard requestId == nil || self.aiCompanionTranscriptCurrentRequestId == requestId else { return }
2923
+                    _ = self.aiCompanionUpdateRecording(meetingId: meetingId) { recording in
2924
+                        recording.transcriptStatusRaw = MeetingTranscriptStatus.ready.rawValue
2925
+                        recording.transcriptSourceRaw = result.source.rawValue
2926
+                        recording.transcriptText = result.text
2927
+                        recording.transcriptErrorMessage = nil
2928
+                    }
2929
+                    self.aiCompanionTranscriptTextView?.string = result.text
2930
+                    self.aiCompanionRefreshTranscriptStatusLabels(forMeetingID: meetingId)
2931
+                    if self.selectedSidebarPage == .aiCompanion {
2932
+                        self.pageCache[.aiCompanion] = nil
2933
+                        self.showSidebarPage(.aiCompanion)
2934
+                    }
2697
                 }
2935
                 }
2698
             } catch {
2936
             } catch {
2699
                 await MainActor.run {
2937
                 await MainActor.run {
2700
-                    guard self.aiCompanionTranscriptCurrentRequestId == requestId else { return }
2938
+                    guard requestId == nil || self.aiCompanionTranscriptCurrentRequestId == requestId else { return }
2701
                     let msg = error.localizedDescription.isEmpty ? "Failed to load transcript." : error.localizedDescription
2939
                     let msg = error.localizedDescription.isEmpty ? "Failed to load transcript." : error.localizedDescription
2940
+                    _ = self.aiCompanionUpdateRecording(meetingId: meetingId) { recording in
2941
+                        recording.transcriptStatusRaw = MeetingTranscriptStatus.failed.rawValue
2942
+                        recording.transcriptErrorMessage = msg
2943
+                    }
2702
                     self.aiCompanionTranscriptTextView?.string = "Transcript unavailable.\n\n\(msg)"
2944
                     self.aiCompanionTranscriptTextView?.string = "Transcript unavailable.\n\n\(msg)"
2703
-                    self.aiCompanionTranscriptStatusLabelByView[senderId]?.stringValue = "Transcript unavailable"
2945
+                    self.aiCompanionRefreshTranscriptStatusLabels(forMeetingID: meetingId)
2946
+                    if self.selectedSidebarPage == .aiCompanion {
2947
+                        self.pageCache[.aiCompanion] = nil
2948
+                        self.showSidebarPage(.aiCompanion)
2949
+                    }
2950
+                }
2951
+            }
2952
+        }
2953
+        aiCompanionTranscriptTaskByMeetingId[meetingId] = task
2954
+    }
2955
+
2956
+    private func aiCompanionFetchOrGenerateTranscript(
2957
+        meetingId: String,
2958
+        interactiveAuth: Bool,
2959
+        presentingWindow: NSWindow?
2960
+    ) async throws -> (text: String, source: MeetingTranscriptSource) {
2961
+        guard let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }) else {
2962
+            throw NSError(domain: "AiCompanionTranscript", code: 10, userInfo: [NSLocalizedDescriptionKey: "Recording not found."])
2963
+        }
2964
+
2965
+        if let meeting = aiCompanionMeetingFromRecording(recording) {
2966
+            do {
2967
+                let accessToken: String
2968
+                if interactiveAuth {
2969
+                    accessToken = try await googleOAuth.validAccessToken(presentingWindow: presentingWindow)
2970
+                } else if let token = googleOAuth.loadTokens()?.accessToken {
2971
+                    accessToken = token
2972
+                } else {
2973
+                    throw NSError(domain: "AiCompanionTranscript", code: 11, userInfo: [NSLocalizedDescriptionKey: "Google account not connected."])
2704
                 }
2974
                 }
2975
+                let text = try await aiCompanionFetchTranscriptText(for: meeting, accessToken: accessToken)
2976
+                let cleaned = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
2977
+                if cleaned.isEmpty == false {
2978
+                    return (cleaned, .meetApi)
2979
+                }
2980
+            } catch {
2981
+                // Continue to OpenAI fallback below.
2705
             }
2982
             }
2706
         }
2983
         }
2984
+
2985
+        let audioURL = URL(fileURLWithPath: recording.audioFilePath)
2986
+        guard FileManager.default.fileExists(atPath: audioURL.path) else {
2987
+            throw NSError(domain: "AiCompanionTranscript", code: 12, userInfo: [NSLocalizedDescriptionKey: "Local meeting audio is missing."])
2988
+        }
2989
+        guard let apiKey = configuredOpenAIAPIKey() else {
2990
+            throw NSError(domain: "AiCompanionTranscript", code: 13, userInfo: [NSLocalizedDescriptionKey: "OpenAI API key missing. Add OpenAIAPIKey to Info.plist or openai.apiKey to UserDefaults."])
2991
+        }
2992
+        let text = try await openAITranscriptionClient.transcribeAudioFile(fileURL: audioURL, apiKey: apiKey)
2993
+        let cleaned = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
2994
+        guard cleaned.isEmpty == false else {
2995
+            throw NSError(domain: "AiCompanionTranscript", code: 14, userInfo: [NSLocalizedDescriptionKey: "Generated transcript was empty."])
2996
+        }
2997
+        return (cleaned, .localAudioOpenAI)
2707
     }
2998
     }
2708
 
2999
 
2709
     @objc private func aiCompanionStopRecordingTapped(_ sender: NSButton) {
3000
     @objc private func aiCompanionStopRecordingTapped(_ sender: NSButton) {
@@ -5766,6 +6057,8 @@ private final class MeetingSystemAudioRecorder: NSObject, SCStreamOutput, SCStre
5766
         config.minimumFrameInterval = CMTime(value: 1, timescale: 2)
6057
         config.minimumFrameInterval = CMTime(value: 1, timescale: 2)
5767
         config.queueDepth = 1
6058
         config.queueDepth = 1
5768
         config.capturesAudio = true
6059
         config.capturesAudio = true
6060
+        config.sampleRate = 48_000
6061
+        config.channelCount = 2
5769
         if #available(macOS 13.0, *) {
6062
         if #available(macOS 13.0, *) {
5770
             config.excludesCurrentProcessAudio = true
6063
             config.excludesCurrentProcessAudio = true
5771
         }
6064
         }