Explorar el Código

Fix AI Companion notes language selection.

Notes generation now follows the user’s preferred transcription language (preferred language 1/2) and only falls back to detection when unset.

Co-authored-by: Cursor <cursoragent@cursor.com>
huzaifahayat12 hace 1 mes
padre
commit
a647911c19

+ 141 - 27
meetings_app/Transcription/MeetingTranscriptionService.swift

@@ -510,25 +510,15 @@ final class MeetingNotesService {
510 510
         return nil
511 511
     }
512 512
 
513
-    func generateNotes(from transcript: String, apiKey: String? = nil) async throws -> String {
513
+    func generateNotes(
514
+        from transcript: String,
515
+        preferredLanguageIdentifier1: String? = nil,
516
+        preferredLanguageIdentifier2: String? = nil,
517
+        apiKey: String? = nil
518
+    ) async throws -> String {
514 519
         let keys = resolveAPIKeyCandidates(apiKey: apiKey)
515 520
         guard keys.isEmpty == false else { throw MeetingNotesError.missingAPIKey }
516 521
 
517
-        let prompt = """
518
-        You are a meeting assistant. Generate structured notes from the transcript.
519
-        Important: the transcript may have missing words, dropped sentences, or minor recognition errors.
520
-        Infer likely intent conservatively and produce useful notes without inventing specific facts.
521
-        
522
-        Output sections:
523
-        1) Summary (3-5 bullets)
524
-        2) Decisions
525
-        3) Action Items (owner if identifiable, otherwise "Unassigned")
526
-        4) Risks / Open Questions
527
-        
528
-        Transcript:
529
-        \(transcript)
530
-        """
531
-
532 522
         struct Message: Encodable {
533 523
             let role: String
534 524
             let content: String
@@ -539,19 +529,34 @@ final class MeetingNotesService {
539 529
             let temperature: Double
540 530
         }
541 531
 
542
-        let body = Body(
543
-            model: "gpt-4.1-mini",
544
-            messages: [
545
-                Message(role: "system", content: "You create practical meeting notes from imperfect transcripts."),
546
-                Message(role: "user", content: prompt)
547
-            ],
548
-            temperature: 0.2
549
-        )
550
-
551 532
         var lastError: MeetingNotesError?
552 533
         for (index, candidate) in keys.enumerated() {
553 534
             do {
554
-                return try await requestNotes(body: body, apiKey: candidate.key)
535
+                let preference = preferredNotesLanguage(
536
+                    primary: preferredLanguageIdentifier1,
537
+                    secondary: preferredLanguageIdentifier2
538
+                )
539
+                let notesLanguage: DominantLanguage
540
+                if let preference {
541
+                    notesLanguage = preference
542
+                } else {
543
+                    notesLanguage = try await detectDominantLanguage(in: transcript, apiKey: candidate.key)
544
+                }
545
+
546
+                let notesPrompt = buildNotesPrompt(
547
+                    transcript: transcript,
548
+                    dominantLanguage: notesLanguage,
549
+                    preferredLanguage: preference
550
+                )
551
+                let body = Body(
552
+                    model: "gpt-4.1-mini",
553
+                    messages: [
554
+                        Message(role: "system", content: "You create practical meeting notes from imperfect transcripts."),
555
+                        Message(role: "user", content: notesPrompt)
556
+                    ],
557
+                    temperature: 0.2
558
+                )
559
+                return try await requestChatCompletionText(body: body, apiKey: candidate.key)
555 560
             } catch let error as MeetingNotesError {
556 561
                 switch error {
557 562
                 case .httpStatus(let code, _):
@@ -570,7 +575,7 @@ final class MeetingNotesService {
570 575
         throw lastError ?? MeetingNotesError.invalidResponse
571 576
     }
572 577
 
573
-    private func requestNotes(body: Encodable, apiKey: String) async throws -> String {
578
+    private func requestChatCompletionText(body: Encodable, apiKey: String) async throws -> String {
574 579
         var request = URLRequest(url: URL(string: "https://api.openai.com/v1/chat/completions")!)
575 580
         request.httpMethod = "POST"
576 581
         request.setValue("application/json", forHTTPHeaderField: "Content-Type")
@@ -610,6 +615,115 @@ final class MeetingNotesService {
610 615
         return notes
611 616
     }
612 617
 
618
+    private struct DominantLanguage: Sendable {
619
+        let tag: String
620
+        let name: String
621
+    }
622
+
623
+    private func preferredNotesLanguage(primary: String?, secondary: String?) -> DominantLanguage? {
624
+        // Notes should follow Preferred Language 1 when set (user intent),
625
+        // because dominant-language detection can confuse similar scripts (e.g., Urdu vs Hindi).
626
+        let cleaned = [
627
+            primary?.trimmingCharacters(in: .whitespacesAndNewlines),
628
+            secondary?.trimmingCharacters(in: .whitespacesAndNewlines)
629
+        ]
630
+            .compactMap { $0 }
631
+            .filter { $0.isEmpty == false }
632
+
633
+        guard let identifier = cleaned.first else { return nil }
634
+        let tag = identifier.replacingOccurrences(of: "_", with: "-")
635
+        let english = Locale(identifier: "en")
636
+        let languageName = english.localizedString(forIdentifier: identifier)
637
+            ?? english.localizedString(forIdentifier: tag)
638
+            ?? identifier
639
+        return DominantLanguage(tag: tag, name: languageName)
640
+    }
641
+
642
+    private func detectDominantLanguage(in transcript: String, apiKey: String) async throws -> DominantLanguage {
643
+        struct DetectResponse: Decodable {
644
+            let tag: String?
645
+            let name: String?
646
+        }
647
+
648
+        let detectPrompt = """
649
+        Identify the dominant language used in the transcript (the language used most overall).
650
+        If multiple languages are present, still choose only ONE dominant language.
651
+
652
+        Return ONLY valid JSON with this exact schema:
653
+        {"tag":"<BCP-47 language tag>","name":"<language name in English>"}
654
+
655
+        Transcript:
656
+        \(transcript)
657
+        """
658
+
659
+        struct Message: Encodable {
660
+            let role: String
661
+            let content: String
662
+        }
663
+        struct Body: Encodable {
664
+            let model: String
665
+            let messages: [Message]
666
+            let temperature: Double
667
+        }
668
+
669
+        let body = Body(
670
+            model: "gpt-4.1-mini",
671
+            messages: [
672
+                Message(role: "system", content: "You detect languages and reply with strict JSON only."),
673
+                Message(role: "user", content: detectPrompt)
674
+            ],
675
+            temperature: 0.0
676
+        )
677
+
678
+        let raw = try await requestChatCompletionText(body: body, apiKey: apiKey)
679
+        let data = Data(raw.utf8)
680
+        let decoded = try JSONDecoder().decode(DetectResponse.self, from: data)
681
+        let tag = (decoded.tag ?? "").trimmingCharacters(in: .whitespacesAndNewlines)
682
+        let name = (decoded.name ?? "").trimmingCharacters(in: .whitespacesAndNewlines)
683
+
684
+        if tag.isEmpty == false, name.isEmpty == false {
685
+            return DominantLanguage(tag: tag, name: name)
686
+        }
687
+
688
+        // Fallback: if decoding fails partially, still force non-English when possible.
689
+        if tag.isEmpty == false {
690
+            return DominantLanguage(tag: tag, name: tag)
691
+        }
692
+        return DominantLanguage(tag: "und", name: "the dominant language of the transcript")
693
+    }
694
+
695
+    private func buildNotesPrompt(
696
+        transcript: String,
697
+        dominantLanguage: DominantLanguage,
698
+        preferredLanguage: DominantLanguage?
699
+    ) -> String {
700
+        let preferenceLine: String = {
701
+            guard let preferredLanguage else { return "" }
702
+            return "\nPreferred language selected by the user: \(preferredLanguage.name) (\(preferredLanguage.tag)). Follow this preference."
703
+        }()
704
+
705
+        return """
706
+        You are a meeting assistant. Generate structured notes from the transcript.
707
+        Important: the transcript may have missing words, dropped sentences, or minor recognition errors.
708
+        Infer likely intent conservatively and produce useful notes without inventing specific facts.
709
+        \(preferenceLine)
710
+
711
+        Output language:
712
+        - Write ALL notes (including headings) strictly in \(dominantLanguage.name) (\(dominantLanguage.tag)).
713
+        - Do NOT respond in English unless \(dominantLanguage.name) is English.
714
+        - Keep proper nouns, people names, product names, acronyms, URLs, and code tokens exactly as-is.
715
+
716
+        Output sections (translate these section headings into \(dominantLanguage.name)):
717
+        1) Summary (3-5 bullets)
718
+        2) Decisions
719
+        3) Action Items (include owner if identifiable; otherwise write the equivalent of "Unassigned" in \(dominantLanguage.name))
720
+        4) Risks / Open Questions
721
+
722
+        Transcript:
723
+        \(transcript)
724
+        """
725
+    }
726
+
613 727
     private func resolveAPIKeyCandidates(apiKey: String?) -> [(source: APIKeySource, key: String)] {
614 728
         var candidates: [(APIKeySource, String)] = []
615 729
         if let value = normalizedAPIKey(from: apiKey) {

+ 8 - 1
meetings_app/ViewController.swift

@@ -3814,7 +3814,14 @@ private extension ViewController {
3814 3814
             self.aiCompanionNotesProgressByMeetingId[meetingId] = "Generating notes with GPT..."
3815 3815
             self.aiCompanionRefreshNotesStatusLabels(forMeetingID: meetingId)
3816 3816
         }
3817
-        return try await meetingNotesService.generateNotes(from: transcriptText)
3817
+        let defaults = UserDefaults.standard
3818
+        let preferred1 = defaults.string(forKey: aiCompanionPreferredLanguage1DefaultsKey)
3819
+        let preferred2 = defaults.string(forKey: aiCompanionPreferredLanguage2DefaultsKey)
3820
+        return try await meetingNotesService.generateNotes(
3821
+            from: transcriptText,
3822
+            preferredLanguageIdentifier1: preferred1,
3823
+            preferredLanguageIdentifier2: preferred2
3824
+        )
3818 3825
     }
3819 3826
 
3820 3827
     @objc private func aiCompanionStopRecordingTapped(_ sender: NSButton) {