Selaa lähdekoodia

Improve transcript reliability with chunked multi-channel Apple Speech processing.

Keep system and microphone recordings separately, transcribe each channel in 30-second chunks with progress updates, and merge timeline segments while preserving compatibility for older mixed-only recordings.

Co-authored-by: Cursor <cursoragent@cursor.com>
huzaifahayat12 1 kuukausi sitten
vanhempi
commit
141b21579f

+ 315 - 0
meetings_app/Transcription/MeetingTranscriptionService.swift

@@ -0,0 +1,315 @@
1
+import Foundation
2
+import AVFoundation
3
+import Speech
4
+
5
+/// A single piece of transcript text attributed to a channel and a time range
6
+/// (offsets are in seconds from the start of the recording).
7
+struct TranscriptSegment: Codable, Hashable {
8
+    let speaker: String
9
+    let startOffset: TimeInterval
10
+    let endOffset: TimeInterval
11
+    let text: String
12
+}
13
+
14
+/// Logical speaker labels used when merging per-channel transcripts.
15
+enum TranscriptSpeaker: String {
16
+    case microphone = "You"
17
+    case system = "Meeting"
18
+}
19
+
20
+/// Progress snapshot for UI status updates.
21
+struct MeetingTranscriptionProgress: Sendable {
22
+    let totalChunks: Int
23
+    let completedChunks: Int
24
+}
25
+
26
+enum MeetingTranscriptionError: Error, LocalizedError {
27
+    case authorizationDenied
28
+    case authorizationRestricted
29
+    case recognizerUnavailable(locale: String)
30
+    case noAudioToTranscribe
31
+
32
+    var errorDescription: String? {
33
+        switch self {
34
+        case .authorizationDenied:
35
+            return "Speech recognition permission denied. Enable it in System Settings and try again."
36
+        case .authorizationRestricted:
37
+            return "Speech recognition is restricted on this Mac."
38
+        case .recognizerUnavailable(let locale):
39
+            return "Speech recognizer is unavailable for \(locale)."
40
+        case .noAudioToTranscribe:
41
+            return "No audio was available to transcribe."
42
+        }
43
+    }
44
+}
45
+
46
+/// Transcribes meeting audio by running Apple Speech on per-channel files
47
+/// in fixed-size chunks, falling back across a list of locales per chunk.
48
+final class MeetingTranscriptionService {
49
+    private struct ChunkPlan {
50
+        let index: Int
51
+        let startFrame: AVAudioFramePosition
52
+        let frameCount: AVAudioFrameCount
53
+        let startOffset: TimeInterval
54
+        let endOffset: TimeInterval
55
+    }
56
+
57
+    /// Shared progress counter used across concurrent channels.
58
+    private actor ProgressCounter {
59
+        private let total: Int
60
+        private var completed: Int = 0
61
+        private let onProgress: (@Sendable (MeetingTranscriptionProgress) -> Void)?
62
+
63
+        init(total: Int, onProgress: (@Sendable (MeetingTranscriptionProgress) -> Void)?) {
64
+            self.total = total
65
+            self.onProgress = onProgress
66
+        }
67
+
68
+        func emitInitial() {
69
+            onProgress?(MeetingTranscriptionProgress(totalChunks: total, completedChunks: 0))
70
+        }
71
+
72
+        func increment() {
73
+            completed += 1
74
+            onProgress?(MeetingTranscriptionProgress(totalChunks: total, completedChunks: completed))
75
+        }
76
+    }
77
+
78
+    func requestAuthorization() async throws {
79
+        switch SFSpeechRecognizer.authorizationStatus() {
80
+        case .authorized:
81
+            return
82
+        case .notDetermined:
83
+            let status: SFSpeechRecognizerAuthorizationStatus = await withCheckedContinuation { continuation in
84
+                SFSpeechRecognizer.requestAuthorization { continuation.resume(returning: $0) }
85
+            }
86
+            guard status == .authorized else { throw MeetingTranscriptionError.authorizationDenied }
87
+        case .denied:
88
+            throw MeetingTranscriptionError.authorizationDenied
89
+        case .restricted:
90
+            throw MeetingTranscriptionError.authorizationRestricted
91
+        @unknown default:
92
+            throw MeetingTranscriptionError.authorizationDenied
93
+        }
94
+    }
95
+
96
+    /// Transcribes the mic and system channel audio (either may be nil) and
97
+    /// returns a flat, time-ordered list of transcript segments labeled with
98
+    /// the speaker channel.
99
+    func transcribeMeeting(
100
+        micURL: URL?,
101
+        systemURL: URL?,
102
+        chunkSeconds: TimeInterval = 30,
103
+        overlapSeconds: TimeInterval = 0,
104
+        locales: [Locale] = [Locale(identifier: "en-US")],
105
+        onProgress: (@Sendable (MeetingTranscriptionProgress) -> Void)? = nil
106
+    ) async throws -> [TranscriptSegment] {
107
+        try await requestAuthorization()
108
+
109
+        let micPlan: (URL, [ChunkPlan])? = try micURL.flatMap { url -> (URL, [ChunkPlan])? in
110
+            guard FileManager.default.fileExists(atPath: url.path) else { return nil }
111
+            let chunks = try planChunks(for: url, chunkSeconds: chunkSeconds, overlapSeconds: overlapSeconds)
112
+            return chunks.isEmpty ? nil : (url, chunks)
113
+        }
114
+        let systemPlan: (URL, [ChunkPlan])? = try systemURL.flatMap { url -> (URL, [ChunkPlan])? in
115
+            guard FileManager.default.fileExists(atPath: url.path) else { return nil }
116
+            let chunks = try planChunks(for: url, chunkSeconds: chunkSeconds, overlapSeconds: overlapSeconds)
117
+            return chunks.isEmpty ? nil : (url, chunks)
118
+        }
119
+
120
+        let totalChunks = (micPlan?.1.count ?? 0) + (systemPlan?.1.count ?? 0)
121
+        guard totalChunks > 0 else {
122
+            throw MeetingTranscriptionError.noAudioToTranscribe
123
+        }
124
+
125
+        let counter = ProgressCounter(total: totalChunks, onProgress: onProgress)
126
+        await counter.emitInitial()
127
+
128
+        let effectiveLocales = locales.isEmpty ? [Locale(identifier: "en-US")] : locales
129
+
130
+        async let micSegments: [TranscriptSegment] = {
131
+            guard let plan = micPlan else { return [] }
132
+            return try await self.transcribeChannel(
133
+                url: plan.0,
134
+                chunks: plan.1,
135
+                speaker: .microphone,
136
+                locales: effectiveLocales,
137
+                counter: counter
138
+            )
139
+        }()
140
+        async let systemSegments: [TranscriptSegment] = {
141
+            guard let plan = systemPlan else { return [] }
142
+            return try await self.transcribeChannel(
143
+                url: plan.0,
144
+                chunks: plan.1,
145
+                speaker: .system,
146
+                locales: effectiveLocales,
147
+                counter: counter
148
+            )
149
+        }()
150
+
151
+        let combined = try await micSegments + systemSegments
152
+        return combined
153
+            .filter { $0.text.isEmpty == false }
154
+            .sorted { $0.startOffset < $1.startOffset }
155
+    }
156
+
157
+    // MARK: - Chunk planning
158
+
159
+    private func planChunks(for url: URL, chunkSeconds: TimeInterval, overlapSeconds: TimeInterval) throws -> [ChunkPlan] {
160
+        let audioFile = try AVAudioFile(forReading: url)
161
+        let sampleRate = audioFile.processingFormat.sampleRate
162
+        guard sampleRate > 0 else { return [] }
163
+        let totalFrames = audioFile.length
164
+        guard totalFrames > 0 else { return [] }
165
+
166
+        let chunkSamples = AVAudioFramePosition(max(1, chunkSeconds * sampleRate))
167
+        let overlapSamples = AVAudioFramePosition(max(0, overlapSeconds * sampleRate))
168
+        let step = max(AVAudioFramePosition(1), chunkSamples - overlapSamples)
169
+
170
+        var plans: [ChunkPlan] = []
171
+        var start: AVAudioFramePosition = 0
172
+        var index = 0
173
+        while start < totalFrames {
174
+            let end = min(start + chunkSamples, totalFrames)
175
+            let frameCount = AVAudioFrameCount(end - start)
176
+            let startOffset = Double(start) / sampleRate
177
+            let endOffset = Double(end) / sampleRate
178
+            plans.append(ChunkPlan(
179
+                index: index,
180
+                startFrame: start,
181
+                frameCount: frameCount,
182
+                startOffset: startOffset,
183
+                endOffset: endOffset
184
+            ))
185
+            index += 1
186
+            if end >= totalFrames { break }
187
+            start += step
188
+        }
189
+        return plans
190
+    }
191
+
192
+    // MARK: - Per-channel transcription
193
+
194
+    private func transcribeChannel(
195
+        url: URL,
196
+        chunks: [ChunkPlan],
197
+        speaker: TranscriptSpeaker,
198
+        locales: [Locale],
199
+        counter: ProgressCounter
200
+    ) async throws -> [TranscriptSegment] {
201
+        var segments: [TranscriptSegment] = []
202
+        segments.reserveCapacity(chunks.count)
203
+
204
+        for plan in chunks {
205
+            try Task.checkCancellation()
206
+            let buffer = try readChunkBuffer(url: url, startFrame: plan.startFrame, frameCount: plan.frameCount)
207
+            let text = await transcribeBufferWithLocaleFallback(buffer: buffer, locales: locales)
208
+            await counter.increment()
209
+            let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
210
+            if trimmed.isEmpty { continue }
211
+            segments.append(TranscriptSegment(
212
+                speaker: speaker.rawValue,
213
+                startOffset: plan.startOffset,
214
+                endOffset: plan.endOffset,
215
+                text: trimmed
216
+            ))
217
+        }
218
+        return segments
219
+    }
220
+
221
+    private func readChunkBuffer(url: URL, startFrame: AVAudioFramePosition, frameCount: AVAudioFrameCount) throws -> AVAudioPCMBuffer {
222
+        let audioFile = try AVAudioFile(forReading: url)
223
+        audioFile.framePosition = startFrame
224
+        guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: frameCount) else {
225
+            throw NSError(domain: "MeetingTranscriptionService", code: 1, userInfo: [NSLocalizedDescriptionKey: "Unable to allocate audio buffer."])
226
+        }
227
+        try audioFile.read(into: buffer, frameCount: frameCount)
228
+        return buffer
229
+    }
230
+
231
+    private func transcribeBufferWithLocaleFallback(buffer: AVAudioPCMBuffer, locales: [Locale]) async -> String {
232
+        for locale in locales {
233
+            guard let recognizer = SFSpeechRecognizer(locale: locale), recognizer.isAvailable else { continue }
234
+            do {
235
+                let text = try await transcribeBuffer(buffer: buffer, recognizer: recognizer)
236
+                let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
237
+                if trimmed.isEmpty == false { return trimmed }
238
+            } catch {
239
+                // One transient retry before moving on to the next locale.
240
+                try? await Task.sleep(nanoseconds: 500_000_000)
241
+                if let text = try? await transcribeBuffer(buffer: buffer, recognizer: recognizer) {
242
+                    let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
243
+                    if trimmed.isEmpty == false { return trimmed }
244
+                }
245
+                continue
246
+            }
247
+        }
248
+        return ""
249
+    }
250
+
251
+    private func transcribeBuffer(buffer: AVAudioPCMBuffer, recognizer: SFSpeechRecognizer) async throws -> String {
252
+        let request = SFSpeechAudioBufferRecognitionRequest()
253
+        request.shouldReportPartialResults = false
254
+        if #available(macOS 13.0, *) {
255
+            request.addsPunctuation = true
256
+        }
257
+
258
+        return try await withCheckedThrowingContinuation { continuation in
259
+            var hasResumed = false
260
+            let lock = NSLock()
261
+            func resumeOnce(with result: Result<String, Error>) {
262
+                lock.lock()
263
+                defer { lock.unlock() }
264
+                if hasResumed { return }
265
+                hasResumed = true
266
+                switch result {
267
+                case .success(let text):
268
+                    continuation.resume(returning: text)
269
+                case .failure(let error):
270
+                    continuation.resume(throwing: error)
271
+                }
272
+            }
273
+
274
+            let task = recognizer.recognitionTask(with: request) { result, error in
275
+                if let error {
276
+                    let nsError = error as NSError
277
+                    // "No speech detected" is a normal empty-chunk outcome (code 203 in kafAssistant domain).
278
+                    if nsError.domain == "kAFAssistantErrorDomain" && (nsError.code == 203 || nsError.code == 1110) {
279
+                        resumeOnce(with: .success(""))
280
+                        return
281
+                    }
282
+                    resumeOnce(with: .failure(error))
283
+                    return
284
+                }
285
+                if let result, result.isFinal {
286
+                    resumeOnce(with: .success(result.bestTranscription.formattedString))
287
+                }
288
+            }
289
+
290
+            request.append(buffer)
291
+            request.endAudio()
292
+            _ = task
293
+        }
294
+    }
295
+}
296
+
297
+extension Array where Element == TranscriptSegment {
298
+    /// Renders segments as a human-readable timeline like:
299
+    /// `[00:12] You: Hello everyone.`
300
+    func renderedTimelineText() -> String {
301
+        let formatter: (TimeInterval) -> String = { seconds in
302
+            let total = Int(seconds.rounded(.down))
303
+            let h = total / 3600
304
+            let m = (total % 3600) / 60
305
+            let s = total % 60
306
+            if h > 0 {
307
+                return String(format: "%02d:%02d:%02d", h, m, s)
308
+            }
309
+            return String(format: "%02d:%02d", m, s)
310
+        }
311
+        return self.map { segment in
312
+            "[\(formatter(segment.startOffset))] \(segment.speaker): \(segment.text)"
313
+        }.joined(separator: "\n")
314
+    }
315
+}

+ 101 - 28
meetings_app/ViewController.swift

@@ -242,6 +242,7 @@ final class ViewController: NSViewController {
242 242
     private enum MeetingTranscriptSource: String, Codable {
243 243
         case meetApi
244 244
         case localAudioAppleSpeech
245
+        case localMultiChannelAppleSpeech
245 246
     }
246 247
 
247 248
     private enum PaywallFooterAction {
@@ -266,9 +267,12 @@ final class ViewController: NSViewController {
266 267
         let startedAt: Date
267 268
         let endedAt: Date
268 269
         let audioFilePath: String
270
+        var microphoneAudioFilePath: String?
271
+        var systemAudioFilePath: String?
269 272
         var transcriptStatusRaw: String?
270 273
         var transcriptSourceRaw: String?
271 274
         var transcriptText: String?
275
+        var transcriptSegmentsJSON: String?
272 276
         var transcriptErrorMessage: String?
273 277
     }
274 278
 
@@ -474,6 +478,8 @@ final class ViewController: NSViewController {
474 478
     private let nonPremiumJoinTrialConsumedDefaultsKey = "join.nonPremiumTrialConsumed"
475 479
     private let aiCompanionLocalRecordingsDefaultsKey = "aiCompanion.localRecordings"
476 480
     private let ratingEligibleUsageSeconds: TimeInterval = 30 * 60
481
+    private let meetingTranscriptionService = MeetingTranscriptionService()
482
+    private var aiCompanionTranscriptProgressByMeetingId: [String: String] = [:]
477 483
     private var darkModeEnabled: Bool {
478 484
         get {
479 485
             let hasValue = UserDefaults.standard.object(forKey: darkModeDefaultsKey) != nil
@@ -1392,6 +1398,9 @@ private extension ViewController {
1392 1398
         case .notRequested:
1393 1399
             return "Transcript not requested"
1394 1400
         case .processing:
1401
+            if let progress = aiCompanionTranscriptProgressByMeetingId[recording.id], progress.isEmpty == false {
1402
+                return progress
1403
+            }
1395 1404
             return "Transcript processing..."
1396 1405
         case .ready:
1397 1406
             return "Transcript ready"
@@ -1604,7 +1613,7 @@ private extension ViewController {
1604 1613
         Task { [weak self] in
1605 1614
             guard let self else { return }
1606 1615
             if let stopSystemAudio { await stopSystemAudio() }
1607
-            let finalURL = await self.finalizeMeetingAudioFile(
1616
+            let finalized = await self.finalizeMeetingAudioFile(
1608 1617
                 systemURL: session.systemAudioFileURL,
1609 1618
                 microphoneURL: session.microphoneAudioFileURL,
1610 1619
                 recordingID: session.id
@@ -1616,10 +1625,13 @@ private extension ViewController {
1616 1625
                     meetURLString: session.meetURL.absoluteString,
1617 1626
                     startedAt: session.startedAt,
1618 1627
                     endedAt: Date(),
1619
-                    audioFilePath: finalURL.path,
1628
+                    audioFilePath: finalized.mixedURL.path,
1629
+                    microphoneAudioFilePath: finalized.microphoneURL?.path,
1630
+                    systemAudioFilePath: finalized.systemURL?.path,
1620 1631
                     transcriptStatusRaw: MeetingTranscriptStatus.notRequested.rawValue,
1621 1632
                     transcriptSourceRaw: nil,
1622 1633
                     transcriptText: nil,
1634
+                    transcriptSegmentsJSON: nil,
1623 1635
                     transcriptErrorMessage: nil
1624 1636
                 )
1625 1637
                 self.aiCompanionLocalRecordings.insert(summary, at: 0)
@@ -1645,44 +1657,54 @@ private extension ViewController {
1645 1657
         return fileSize(at: url) >= minBytes
1646 1658
     }
1647 1659
 
1648
-    private func finalizeMeetingAudioFile(systemURL: URL, microphoneURL: URL, recordingID: String) async -> URL {
1660
+    private struct FinalizedMeetingAudio {
1661
+        let mixedURL: URL
1662
+        let microphoneURL: URL?
1663
+        let systemURL: URL?
1664
+    }
1665
+
1666
+    private func finalizeMeetingAudioFile(systemURL: URL, microphoneURL: URL, recordingID: String) async -> FinalizedMeetingAudio {
1649 1667
         let destinationURL = localRecordingDirectoryURL().appendingPathComponent("\(recordingID).m4a")
1650 1668
         let hasSystem = hasAudioPayload(at: systemURL)
1651 1669
         let hasMic = hasAudioPayload(at: microphoneURL)
1652 1670
 
1671
+        if !hasSystem { try? FileManager.default.removeItem(at: systemURL) }
1672
+        if !hasMic { try? FileManager.default.removeItem(at: microphoneURL) }
1673
+
1674
+        let savedMicURL: URL? = hasMic ? microphoneURL : nil
1675
+        let savedSystemURL: URL? = hasSystem ? systemURL : nil
1676
+
1653 1677
         if hasSystem && hasMic {
1654 1678
             do {
1655 1679
                 try await mixAudioFiles(systemURL: systemURL, microphoneURL: microphoneURL, destinationURL: destinationURL)
1656
-                try? FileManager.default.removeItem(at: systemURL)
1657
-                try? FileManager.default.removeItem(at: microphoneURL)
1658
-                return destinationURL
1680
+                return FinalizedMeetingAudio(mixedURL: destinationURL, microphoneURL: savedMicURL, systemURL: savedSystemURL)
1659 1681
             } catch {
1660
-                // Fall back to best available single track.
1682
+                // Fall back to best available single track for playback; per-channel files stay intact.
1661 1683
             }
1662 1684
         }
1663 1685
 
1664
-        let chosenURL: URL
1686
+        let chosenURL: URL?
1665 1687
         if hasSystem {
1666 1688
             chosenURL = systemURL
1667 1689
         } else if hasMic {
1668 1690
             chosenURL = microphoneURL
1669 1691
         } else {
1670
-            let systemSize = fileSize(at: systemURL)
1671
-            let micSize = fileSize(at: microphoneURL)
1672
-            chosenURL = systemSize >= micSize ? systemURL : microphoneURL
1692
+            chosenURL = nil
1673 1693
         }
1674 1694
 
1675
-        if chosenURL.path != destinationURL.path {
1676
-            try? FileManager.default.removeItem(at: destinationURL)
1677
-            do {
1678
-                try FileManager.default.copyItem(at: chosenURL, to: destinationURL)
1679
-            } catch {
1680
-                return chosenURL
1695
+        if let chosenURL {
1696
+            if chosenURL.path != destinationURL.path {
1697
+                try? FileManager.default.removeItem(at: destinationURL)
1698
+                do {
1699
+                    try FileManager.default.copyItem(at: chosenURL, to: destinationURL)
1700
+                } catch {
1701
+                    return FinalizedMeetingAudio(mixedURL: chosenURL, microphoneURL: savedMicURL, systemURL: savedSystemURL)
1702
+                }
1681 1703
             }
1704
+            return FinalizedMeetingAudio(mixedURL: destinationURL, microphoneURL: savedMicURL, systemURL: savedSystemURL)
1682 1705
         }
1683
-        try? FileManager.default.removeItem(at: systemURL)
1684
-        try? FileManager.default.removeItem(at: microphoneURL)
1685
-        return destinationURL
1706
+
1707
+        return FinalizedMeetingAudio(mixedURL: destinationURL, microphoneURL: nil, systemURL: nil)
1686 1708
     }
1687 1709
 
1688 1710
     private func mixAudioFiles(systemURL: URL, microphoneURL: URL, destinationURL: URL) async throws {
@@ -2990,10 +3012,12 @@ private extension ViewController {
2990 3012
                 )
2991 3013
                 await MainActor.run {
2992 3014
                     guard requestId == nil || self.aiCompanionTranscriptCurrentRequestId == requestId else { return }
3015
+                    self.aiCompanionTranscriptProgressByMeetingId[meetingId] = nil
2993 3016
                     _ = self.aiCompanionUpdateRecording(meetingId: meetingId) { recording in
2994 3017
                         recording.transcriptStatusRaw = MeetingTranscriptStatus.ready.rawValue
2995 3018
                         recording.transcriptSourceRaw = result.source.rawValue
2996 3019
                         recording.transcriptText = result.text
3020
+                        recording.transcriptSegmentsJSON = result.segmentsJSON
2997 3021
                         recording.transcriptErrorMessage = nil
2998 3022
                     }
2999 3023
                     self.aiCompanionTranscriptTextView?.string = result.text
@@ -3006,6 +3030,7 @@ private extension ViewController {
3006 3030
             } catch {
3007 3031
                 await MainActor.run {
3008 3032
                     guard requestId == nil || self.aiCompanionTranscriptCurrentRequestId == requestId else { return }
3033
+                    self.aiCompanionTranscriptProgressByMeetingId[meetingId] = nil
3009 3034
                     let msg = error.localizedDescription.isEmpty ? "Failed to load transcript." : error.localizedDescription
3010 3035
                     _ = self.aiCompanionUpdateRecording(meetingId: meetingId) { recording in
3011 3036
                         recording.transcriptStatusRaw = MeetingTranscriptStatus.failed.rawValue
@@ -3027,23 +3052,71 @@ private extension ViewController {
3027 3052
         meetingId: String,
3028 3053
         interactiveAuth: Bool,
3029 3054
         presentingWindow: NSWindow?
3030
-    ) async throws -> (text: String, source: MeetingTranscriptSource) {
3055
+    ) async throws -> (text: String, segmentsJSON: String?, source: MeetingTranscriptSource) {
3031 3056
         guard let recording = aiCompanionLocalRecordings.first(where: { $0.id == meetingId }) else {
3032 3057
             throw NSError(domain: "AiCompanionTranscript", code: 10, userInfo: [NSLocalizedDescriptionKey: "Recording not found."])
3033 3058
         }
3034 3059
         _ = interactiveAuth
3035 3060
         _ = presentingWindow
3036 3061
 
3037
-        let audioURL = URL(fileURLWithPath: recording.audioFilePath)
3038
-        guard FileManager.default.fileExists(atPath: audioURL.path) else {
3039
-            throw NSError(domain: "AiCompanionTranscript", code: 12, userInfo: [NSLocalizedDescriptionKey: "Local meeting audio is missing."])
3062
+        let micURL: URL? = recording.microphoneAudioFilePath
3063
+            .map { URL(fileURLWithPath: $0) }
3064
+            .flatMap { FileManager.default.fileExists(atPath: $0.path) ? $0 : nil }
3065
+        let systemURL: URL? = recording.systemAudioFilePath
3066
+            .map { URL(fileURLWithPath: $0) }
3067
+            .flatMap { FileManager.default.fileExists(atPath: $0.path) ? $0 : nil }
3068
+
3069
+        let hasPerChannel = micURL != nil || systemURL != nil
3070
+
3071
+        let mixedURL = URL(fileURLWithPath: recording.audioFilePath)
3072
+        if hasPerChannel == false {
3073
+            guard FileManager.default.fileExists(atPath: mixedURL.path) else {
3074
+                throw NSError(domain: "AiCompanionTranscript", code: 12, userInfo: [NSLocalizedDescriptionKey: "Local meeting audio is missing."])
3075
+            }
3040 3076
         }
3041
-        let text = try await transcribeLocalAudioWithAppleSpeech(audioURL: audioURL)
3042
-        let cleaned = text.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
3043
-        guard cleaned.isEmpty == false else {
3077
+
3078
+        let progressHandler: @Sendable (MeetingTranscriptionProgress) -> Void = { [weak self] progress in
3079
+            Task { @MainActor [weak self] in
3080
+                guard let self else { return }
3081
+                let text: String
3082
+                if progress.totalChunks > 0 {
3083
+                    text = "Transcribing \(progress.completedChunks)/\(progress.totalChunks) chunks..."
3084
+                } else {
3085
+                    text = "Transcript processing..."
3086
+                }
3087
+                self.aiCompanionTranscriptProgressByMeetingId[meetingId] = text
3088
+                self.aiCompanionRefreshTranscriptStatusLabels(forMeetingID: meetingId)
3089
+            }
3090
+        }
3091
+
3092
+        let segments: [TranscriptSegment]
3093
+        let source: MeetingTranscriptSource
3094
+        if hasPerChannel {
3095
+            segments = try await meetingTranscriptionService.transcribeMeeting(
3096
+                micURL: micURL,
3097
+                systemURL: systemURL,
3098
+                onProgress: progressHandler
3099
+            )
3100
+            source = .localMultiChannelAppleSpeech
3101
+        } else {
3102
+            // Backward compatibility: old recordings only have a mixed file.
3103
+            segments = try await meetingTranscriptionService.transcribeMeeting(
3104
+                micURL: nil,
3105
+                systemURL: mixedURL,
3106
+                onProgress: progressHandler
3107
+            )
3108
+            source = .localAudioAppleSpeech
3109
+        }
3110
+
3111
+        let renderedText = segments.renderedTimelineText().trimmingCharacters(in: .whitespacesAndNewlines)
3112
+        guard renderedText.isEmpty == false else {
3044 3113
             throw NSError(domain: "AiCompanionTranscript", code: 14, userInfo: [NSLocalizedDescriptionKey: "Generated transcript was empty."])
3045 3114
         }
3046
-        return (cleaned, .localAudioAppleSpeech)
3115
+
3116
+        let encoder = JSONEncoder()
3117
+        let segmentsJSON = (try? encoder.encode(segments)).flatMap { String(data: $0, encoding: .utf8) }
3118
+
3119
+        return (renderedText, segmentsJSON, source)
3047 3120
     }
3048 3121
 
3049 3122
     @objc private func aiCompanionStopRecordingTapped(_ sender: NSButton) {