From 5e089cc9f83139cf7ae81067eed58ea526960059 Mon Sep 17 00:00:00 2001 From: Ian Dundas <1131967+iandundas@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:55:33 +0100 Subject: [PATCH 1/7] SegmentDiscovery callback --- Sources/WhisperKit/Core/Models.swift | 1 + Sources/WhisperKit/Core/TranscribeTask.swift | 3 +++ Sources/WhisperKit/Core/WhisperKit.swift | 4 ++++ 3 files changed, 8 insertions(+) diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index 5ca6995f..bb53071e 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -629,6 +629,7 @@ public struct TranscriptionProgress { } } +public typealias SegmentDiscoveryCallback = (([TranscriptionSegment]) -> Void) /// Callback to receive progress updates during transcription. /// /// - Parameters: diff --git a/Sources/WhisperKit/Core/TranscribeTask.swift b/Sources/WhisperKit/Core/TranscribeTask.swift index a6939e7a..a77cd991 100644 --- a/Sources/WhisperKit/Core/TranscribeTask.swift +++ b/Sources/WhisperKit/Core/TranscribeTask.swift @@ -15,6 +15,7 @@ final class TranscribeTask { private let textDecoder: any TextDecoding private let tokenizer: any WhisperTokenizer + public var segmentDiscoveryCallback: (([TranscriptionSegment]) -> Void)? init( currentTimings: TranscriptionTimings, progress: Progress?, @@ -230,6 +231,8 @@ final class TranscribeTask { } } + segmentDiscoveryCallback?(currentSegments) + // add them to the `allSegments` list allSegments.append(contentsOf: currentSegments) let allCurrentTokens = currentSegments.flatMap { $0.tokens } diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift index 88a665fc..15da09f9 100644 --- a/Sources/WhisperKit/Core/WhisperKit.swift +++ b/Sources/WhisperKit/Core/WhisperKit.swift @@ -42,6 +42,8 @@ open class WhisperKit { public var tokenizerFolder: URL? public private(set) var useBackgroundDownloadSession: Bool + /// Callbacks + public var segmentDiscoveryCallback: SegmentDiscoveryCallback? public init(_ config: WhisperKitConfig = WhisperKitConfig()) async throws { modelCompute = config.computeOptions ?? ModelComputeOptions() audioProcessor = config.audioProcessor ?? AudioProcessor() @@ -872,6 +874,8 @@ open class WhisperKit { tokenizer: tokenizer ) + transcribeTask.segmentDiscoveryCallback = self.segmentDiscoveryCallback + let transcribeTaskResult = try await transcribeTask.run( audioArray: audioArray, decodeOptions: decodeOptions, From 7c21fdc3029eeab3a7de6446dfe70299eb204f63 Mon Sep 17 00:00:00 2001 From: Ian Dundas <1131967+iandundas@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:59:11 +0100 Subject: [PATCH 2/7] ModelState callback --- Sources/WhisperKit/Core/Models.swift | 1 + Sources/WhisperKit/Core/WhisperKit.swift | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index bb53071e..ef511eb9 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -630,6 +630,7 @@ public struct TranscriptionProgress { } public typealias SegmentDiscoveryCallback = (([TranscriptionSegment]) -> Void) +public typealias ModelStateCallback = ((ModelState?, ModelState) -> Void) /// Callback to receive progress updates during transcription. /// /// - Parameters: diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift index 15da09f9..55f83b0d 100644 --- a/Sources/WhisperKit/Core/WhisperKit.swift +++ b/Sources/WhisperKit/Core/WhisperKit.swift @@ -13,7 +13,11 @@ import Tokenizers open class WhisperKit { /// Models public private(set) var modelVariant: ModelVariant = .tiny - public private(set) var modelState: ModelState = .unloaded + public private(set) var modelState: ModelState = .unloaded { + didSet { + modelStateCallback?(oldValue, modelState) + } + } public var modelCompute: ModelComputeOptions public var tokenizer: WhisperTokenizer? @@ -44,7 +48,9 @@ open class WhisperKit { /// Callbacks public var segmentDiscoveryCallback: SegmentDiscoveryCallback? - public init(_ config: WhisperKitConfig = WhisperKitConfig()) async throws { + public var modelStateCallback: ModelStateCallback? + + public init(_ config: WhisperKitConfig = WhisperKitConfig(), modelStateDidChangeCallback: ModelStateCallback? = nil) async throws { modelCompute = config.computeOptions ?? ModelComputeOptions() audioProcessor = config.audioProcessor ?? AudioProcessor() featureExtractor = config.featureExtractor ?? FeatureExtractor() @@ -58,6 +64,8 @@ open class WhisperKit { currentTimings = TranscriptionTimings() Logging.shared.logLevel = config.verbose ? config.logLevel : .none + self.modelStateCallback = modelStateDidChangeCallback + try await setupModels( model: config.model, downloadBase: config.downloadBase, @@ -96,7 +104,8 @@ open class WhisperKit { prewarm: Bool? = nil, load: Bool? = nil, download: Bool = true, - useBackgroundDownloadSession: Bool = false + useBackgroundDownloadSession: Bool = false, + modelStateDidChangeCallback: ModelStateCallback? = nil ) async throws { let config = WhisperKitConfig( model: model, @@ -118,7 +127,7 @@ open class WhisperKit { download: download, useBackgroundDownloadSession: useBackgroundDownloadSession ) - try await self.init(config) + try await self.init(config, modelStateDidChangeCallback: modelStateDidChangeCallback) } // MARK: - Model Loading From e3fdde4b9f54871c8f032224afe052d56fc81f76 Mon Sep 17 00:00:00 2001 From: Ian Dundas <1131967+iandundas@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:59:54 +0100 Subject: [PATCH 3/7] FractionCompleted callback --- Sources/WhisperKit/Core/Models.swift | 1 + Sources/WhisperKit/Core/TranscribeTask.swift | 5 +++++ Sources/WhisperKit/Core/WhisperKit.swift | 2 ++ 3 files changed, 8 insertions(+) diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index ef511eb9..651d2df5 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -631,6 +631,7 @@ public struct TranscriptionProgress { public typealias SegmentDiscoveryCallback = (([TranscriptionSegment]) -> Void) public typealias ModelStateCallback = ((ModelState?, ModelState) -> Void) +public typealias FractionCompletedCallback = ((Float) -> Void) /// Callback to receive progress updates during transcription. /// /// - Parameters: diff --git a/Sources/WhisperKit/Core/TranscribeTask.swift b/Sources/WhisperKit/Core/TranscribeTask.swift index a77cd991..af179ff1 100644 --- a/Sources/WhisperKit/Core/TranscribeTask.swift +++ b/Sources/WhisperKit/Core/TranscribeTask.swift @@ -16,6 +16,8 @@ final class TranscribeTask { private let tokenizer: any WhisperTokenizer public var segmentDiscoveryCallback: (([TranscriptionSegment]) -> Void)? + public var fractionCompletedCallback: ((Float) -> Void)? + init( currentTimings: TranscriptionTimings, progress: Progress?, @@ -116,6 +118,9 @@ final class TranscribeTask { Logging.debug("Decoding Seek: \(seek) (\(formatTimestamp(timeOffset))s)") Logging.debug("Decoding Window Size: \(segmentSize) (\(formatTimestamp(timeOffsetEnd - timeOffset))s)") + let totalLength = Float(seekClipEnd)/Float(WhisperKit.sampleRate) + self.fractionCompletedCallback?(timeOffset / totalLength) + let audioProcessingStart = Date() let clipAudioSamples = Array(audioArray[seek..<(seek + segmentSize)]) guard let audioSamples = AudioProcessor.padOrTrimAudio(fromArray: clipAudioSamples, startAt: 0, toLength: WhisperKit.windowSamples) else { diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift index 55f83b0d..0c879886 100644 --- a/Sources/WhisperKit/Core/WhisperKit.swift +++ b/Sources/WhisperKit/Core/WhisperKit.swift @@ -48,6 +48,7 @@ open class WhisperKit { /// Callbacks public var segmentDiscoveryCallback: SegmentDiscoveryCallback? + public var fractionCompletedCallback: FractionCompletedCallback? public var modelStateCallback: ModelStateCallback? public init(_ config: WhisperKitConfig = WhisperKitConfig(), modelStateDidChangeCallback: ModelStateCallback? = nil) async throws { @@ -884,6 +885,7 @@ open class WhisperKit { ) transcribeTask.segmentDiscoveryCallback = self.segmentDiscoveryCallback + transcribeTask.fractionCompletedCallback = self.fractionCompletedCallback let transcribeTaskResult = try await transcribeTask.run( audioArray: audioArray, From fdad075493814c7a973f85aea8a2ff11b319d0bd Mon Sep 17 00:00:00 2001 From: Ian Dundas <1131967+iandundas@users.noreply.github.com> Date: Fri, 1 Nov 2024 15:00:20 +0100 Subject: [PATCH 4/7] TranscriptionPhaseCallback callback --- Sources/WhisperKit/Core/Models.swift | 8 ++++++++ Sources/WhisperKit/Core/WhisperKit.swift | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index 651d2df5..9701d849 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -632,6 +632,14 @@ public struct TranscriptionProgress { public typealias SegmentDiscoveryCallback = (([TranscriptionSegment]) -> Void) public typealias ModelStateCallback = ((ModelState?, ModelState) -> Void) public typealias FractionCompletedCallback = ((Float) -> Void) +public typealias TranscriptionPhaseCallback = ((TranscriptionPhase) -> Void) + +public enum TranscriptionPhase { + case convertingAudio + case transcribing + case finished +} + /// Callback to receive progress updates during transcription. /// /// - Parameters: diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift index 0c879886..4c0fcee8 100644 --- a/Sources/WhisperKit/Core/WhisperKit.swift +++ b/Sources/WhisperKit/Core/WhisperKit.swift @@ -48,6 +48,7 @@ open class WhisperKit { /// Callbacks public var segmentDiscoveryCallback: SegmentDiscoveryCallback? + public var transcriptionPhaseCallback: TranscriptionPhaseCallback? public var fractionCompletedCallback: FractionCompletedCallback? public var modelStateCallback: ModelStateCallback? @@ -745,6 +746,9 @@ open class WhisperKit { decodeOptions: DecodingOptions? = nil, callback: TranscriptionCallback = nil ) async throws -> [TranscriptionResult] { + + transcriptionPhaseCallback?(.convertingAudio) + // Process input audio file into audio samples let audioArray = try await withThrowingTaskGroup(of: [Float].self) { group -> [Float] in let convertAudioStart = Date() @@ -758,6 +762,12 @@ open class WhisperKit { return try AudioProcessor.loadAudioAsFloatArray(fromPath: audioPath) } + transcriptionPhaseCallback?(.transcribing) + defer { + transcriptionPhaseCallback?(.finished) + } + + // Send converted samples to be transcribed let transcribeResults: [TranscriptionResult] = try await transcribe( audioArray: audioArray, decodeOptions: decodeOptions, From 68c1997c9c7879ec6c3d5a76a4628426dc70c245 Mon Sep 17 00:00:00 2001 From: ZachNagengast Date: Tue, 5 Nov 2024 01:16:21 -0800 Subject: [PATCH 5/7] Updates for review --- Sources/WhisperKit/Core/Models.swift | 42 +++++++++++++++++--- Sources/WhisperKit/Core/TranscribeTask.swift | 8 +--- Sources/WhisperKit/Core/WhisperKit.swift | 31 ++++++++------- Tests/WhisperKitTests/UnitTests.swift | 37 +++++++++++++++++ 4 files changed, 92 insertions(+), 26 deletions(-) diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index 9701d849..aa7dba24 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -629,16 +629,46 @@ public struct TranscriptionProgress { } } -public typealias SegmentDiscoveryCallback = (([TranscriptionSegment]) -> Void) -public typealias ModelStateCallback = ((ModelState?, ModelState) -> Void) -public typealias FractionCompletedCallback = ((Float) -> Void) -public typealias TranscriptionPhaseCallback = ((TranscriptionPhase) -> Void) +/// Callbacks to receive state updates during transcription. -public enum TranscriptionPhase { +/// A callback that provides transcription segments as they are discovered. +/// - Parameters: +/// - segments: An array of `TranscriptionSegment` objects representing the transcribed segments +public typealias SegmentDiscoveryCallback = (_ segments: [TranscriptionSegment]) -> Void + +/// A callback that reports changes in the model's state. +/// - Parameters: +/// - oldState: The previous state of the model, if any +/// - newState: The current state of the model +public typealias ModelStateCallback = (_ oldState: ModelState?, _ newState: ModelState) -> Void + +/// A callback that reports changes in the transcription process. +/// - Parameter state: The current `TranscriptionState` of the transcription process +public typealias TranscriptionStateCallback = (_ state: TranscriptionState) -> Void + +/// Represents the different states of the transcription process. +public enum TranscriptionState: CustomStringConvertible { + /// The audio is being converted to the required format for transcription case convertingAudio + + /// The audio is actively being transcribed to text case transcribing + + /// The transcription process has completed case finished -} + + /// A human-readable description of the transcription state + public var description: String { + switch self { + case .convertingAudio: + return "Converting Audio" + case .transcribing: + return "Transcribing" + case .finished: + return "Finished" + } + } +} /// Callback to receive progress updates during transcription. /// diff --git a/Sources/WhisperKit/Core/TranscribeTask.swift b/Sources/WhisperKit/Core/TranscribeTask.swift index af179ff1..0ec031a3 100644 --- a/Sources/WhisperKit/Core/TranscribeTask.swift +++ b/Sources/WhisperKit/Core/TranscribeTask.swift @@ -15,9 +15,8 @@ final class TranscribeTask { private let textDecoder: any TextDecoding private let tokenizer: any WhisperTokenizer - public var segmentDiscoveryCallback: (([TranscriptionSegment]) -> Void)? - public var fractionCompletedCallback: ((Float) -> Void)? - + public var segmentDiscoveryCallback: SegmentDiscoveryCallback? + init( currentTimings: TranscriptionTimings, progress: Progress?, @@ -118,9 +117,6 @@ final class TranscribeTask { Logging.debug("Decoding Seek: \(seek) (\(formatTimestamp(timeOffset))s)") Logging.debug("Decoding Window Size: \(segmentSize) (\(formatTimestamp(timeOffsetEnd - timeOffset))s)") - let totalLength = Float(seekClipEnd)/Float(WhisperKit.sampleRate) - self.fractionCompletedCallback?(timeOffset / totalLength) - let audioProcessingStart = Date() let clipAudioSamples = Array(audioArray[seek..<(seek + segmentSize)]) guard let audioSamples = AudioProcessor.padOrTrimAudio(fromArray: clipAudioSamples, startAt: 0, toLength: WhisperKit.windowSamples) else { diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift index 4c0fcee8..6d194f93 100644 --- a/Sources/WhisperKit/Core/WhisperKit.swift +++ b/Sources/WhisperKit/Core/WhisperKit.swift @@ -18,6 +18,7 @@ open class WhisperKit { modelStateCallback?(oldValue, modelState) } } + public var modelCompute: ModelComputeOptions public var tokenizer: WhisperTokenizer? @@ -48,11 +49,10 @@ open class WhisperKit { /// Callbacks public var segmentDiscoveryCallback: SegmentDiscoveryCallback? - public var transcriptionPhaseCallback: TranscriptionPhaseCallback? - public var fractionCompletedCallback: FractionCompletedCallback? public var modelStateCallback: ModelStateCallback? + public var transcriptionStateCallback: TranscriptionStateCallback? - public init(_ config: WhisperKitConfig = WhisperKitConfig(), modelStateDidChangeCallback: ModelStateCallback? = nil) async throws { + public init(_ config: WhisperKitConfig = WhisperKitConfig()) async throws { modelCompute = config.computeOptions ?? ModelComputeOptions() audioProcessor = config.audioProcessor ?? AudioProcessor() featureExtractor = config.featureExtractor ?? FeatureExtractor() @@ -66,8 +66,6 @@ open class WhisperKit { currentTimings = TranscriptionTimings() Logging.shared.logLevel = config.verbose ? config.logLevel : .none - self.modelStateCallback = modelStateDidChangeCallback - try await setupModels( model: config.model, downloadBase: config.downloadBase, @@ -129,7 +127,7 @@ open class WhisperKit { download: download, useBackgroundDownloadSession: useBackgroundDownloadSession ) - try await self.init(config, modelStateDidChangeCallback: modelStateDidChangeCallback) + try await self.init(config) } // MARK: - Model Loading @@ -378,7 +376,7 @@ open class WhisperKit { } else { currentTimings.decoderLoadTime = CFAbsoluteTimeGetCurrent() - decoderLoadStart } - + Logging.debug("Loaded text decoder in \(String(format: "%.2f", currentTimings.decoderLoadTime))s") } @@ -391,13 +389,13 @@ open class WhisperKit { computeUnits: modelCompute.audioEncoderCompute, prewarmMode: prewarmMode ) - + if prewarmMode { currentTimings.encoderSpecializationTime = CFAbsoluteTimeGetCurrent() - encoderLoadStart } else { currentTimings.encoderLoadTime = CFAbsoluteTimeGetCurrent() - encoderLoadStart } - + Logging.debug("Loaded audio encoder in \(String(format: "%.2f", currentTimings.encoderLoadTime))s") } @@ -562,6 +560,8 @@ open class WhisperKit { decodeOptions: DecodingOptions? = nil, callback: TranscriptionCallback = nil ) async -> [Result<[TranscriptionResult], Swift.Error>] { + transcriptionStateCallback?(.convertingAudio) + // Start timing the audio loading and conversion process let loadAudioStart = Date() @@ -574,6 +574,11 @@ open class WhisperKit { currentTimings.audioLoading = loadAndConvertTime Logging.debug("Total Audio Loading and Converting Time: \(loadAndConvertTime)") + transcriptionStateCallback?(.transcribing) + defer { + transcriptionStateCallback?(.finished) + } + // Transcribe the loaded audio arrays let transcribeResults = await transcribeWithResults( audioArrays: audioArrays, @@ -746,8 +751,7 @@ open class WhisperKit { decodeOptions: DecodingOptions? = nil, callback: TranscriptionCallback = nil ) async throws -> [TranscriptionResult] { - - transcriptionPhaseCallback?(.convertingAudio) + transcriptionStateCallback?(.convertingAudio) // Process input audio file into audio samples let audioArray = try await withThrowingTaskGroup(of: [Float].self) { group -> [Float] in @@ -762,9 +766,9 @@ open class WhisperKit { return try AudioProcessor.loadAudioAsFloatArray(fromPath: audioPath) } - transcriptionPhaseCallback?(.transcribing) + transcriptionStateCallback?(.transcribing) defer { - transcriptionPhaseCallback?(.finished) + transcriptionStateCallback?(.finished) } // Send converted samples to be transcribed @@ -895,7 +899,6 @@ open class WhisperKit { ) transcribeTask.segmentDiscoveryCallback = self.segmentDiscoveryCallback - transcribeTask.fractionCompletedCallback = self.fractionCompletedCallback let transcribeTaskResult = try await transcribeTask.run( audioArray: audioArray, diff --git a/Tests/WhisperKitTests/UnitTests.swift b/Tests/WhisperKitTests/UnitTests.swift index df258a46..0ec1b2d0 100644 --- a/Tests/WhisperKitTests/UnitTests.swift +++ b/Tests/WhisperKitTests/UnitTests.swift @@ -1067,6 +1067,43 @@ final class UnitTests: XCTestCase { XCTAssertEqual(result.segments.first?.text, " and so my fellow americans ask not what your country can do for you ask what you can do for your country.") } + func testCallbacks() async throws { + let config = try WhisperKitConfig( + modelFolder: tinyModelPath(), + verbose: true, + logLevel: .debug, + load: false + ) + let whisperKit = try await WhisperKit(config) + let modelStateExpectation = XCTestExpectation(description: "Model state callback expectation") + whisperKit.modelStateCallback = { (oldState: ModelState?, newState: ModelState) in + Logging.debug("Model state: \(newState)") + modelStateExpectation.fulfill() + } + + let segmentDiscoveryExpectation = XCTestExpectation(description: "Segment discovery callback expectation") + whisperKit.segmentDiscoveryCallback = { (segments: [TranscriptionSegment]) in + Logging.debug("Segments discovered: \(segments)") + segmentDiscoveryExpectation.fulfill() + } + + let transcriptionStateExpectation = XCTestExpectation(description: "Transcription state callback expectation") + whisperKit.transcriptionStateCallback = { (state: TranscriptionState) in + Logging.debug("Transcription state: \(state)") + transcriptionStateExpectation.fulfill() + } + + // Run the full pipeline + try await whisperKit.loadModels() + let audioFilePath = try XCTUnwrap( + Bundle.current.path(forResource: "jfk", ofType: "wav"), + "Audio file not found" + ) + let _ = try await whisperKit.transcribe(audioPath: audioFilePath) + + await fulfillment(of: [modelStateExpectation, segmentDiscoveryExpectation, transcriptionStateExpectation], timeout: 1) + } + // MARK: - Utils Tests func testFillIndexesWithValue() throws { From 635551db487ac706f75f9a04fe4c060c6c98dea6 Mon Sep 17 00:00:00 2001 From: ZachNagengast Date: Tue, 5 Nov 2024 01:19:37 -0800 Subject: [PATCH 6/7] Formatting --- Sources/WhisperKit/Core/Models.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index aa7dba24..325e0622 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -629,7 +629,7 @@ public struct TranscriptionProgress { } } -/// Callbacks to receive state updates during transcription. +// Callbacks to receive state updates during transcription. /// A callback that provides transcription segments as they are discovered. /// - Parameters: From 7ca47847f71ab8ff6b620c34b7a6f1620ead3dbb Mon Sep 17 00:00:00 2001 From: ZachNagengast Date: Tue, 5 Nov 2024 08:45:10 -0800 Subject: [PATCH 7/7] Remove remaining callback from init --- Sources/WhisperKit/Core/WhisperKit.swift | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Sources/WhisperKit/Core/WhisperKit.swift b/Sources/WhisperKit/Core/WhisperKit.swift index 6d194f93..5f617e6a 100644 --- a/Sources/WhisperKit/Core/WhisperKit.swift +++ b/Sources/WhisperKit/Core/WhisperKit.swift @@ -104,8 +104,7 @@ open class WhisperKit { prewarm: Bool? = nil, load: Bool? = nil, download: Bool = true, - useBackgroundDownloadSession: Bool = false, - modelStateDidChangeCallback: ModelStateCallback? = nil + useBackgroundDownloadSession: Bool = false ) async throws { let config = WhisperKitConfig( model: model,