diff --git a/package.json b/package.json index f0e51f6..14f4952 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "name": "Cartesia", "url": "https://cartesia.ai" }, - "version": "1.3.0", + "version": "1.4.0", "description": "Client for the Cartesia API.", "type": "module", "module": "./dist/index.js", diff --git a/src/types/index.ts b/src/types/index.ts index 5f60195..1795058 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -120,18 +120,35 @@ export type EmitteryCallbacks = { events: Emittery["events"]; }; -export type CloneOptions = +export type CloneOptions = { + mode: "clip"; + clip: Blob; + enhance?: boolean; +}; + +export type CloneVoiceOptions = | { - mode: "url"; - link: string; + mode: "stability"; + clip: Blob; enhance?: boolean; + name: string; + description: string; + language: Language; } | { - mode: "clip"; + mode: "similarity"; clip: Blob; enhance?: boolean; + name: string; + description: string; + language: Language; + transcript?: string; }; +export type CloneResponse = { + embedding: number[]; +}; + export type VoiceChangerOptions = { clip: File; voice: { id: string }; // match VoiceSpecifier shape, but only id is supported for now @@ -191,10 +208,6 @@ export type UpdateVoice = Partial< Pick >; -export type CloneResponse = { - embedding: number[]; -}; - export type VoiceChangerBytesResponse = { buffer: ArrayBuffer; }; diff --git a/src/voices/index.ts b/src/voices/index.ts index 81951f2..07ea0ae 100644 --- a/src/voices/index.ts +++ b/src/voices/index.ts @@ -2,6 +2,7 @@ import { Client } from "../lib/client"; import type { CloneOptions, CloneResponse, + CloneVoiceOptions, CreateVoice, LocalizeOptions, LocalizeResponse, @@ -38,7 +39,10 @@ export default class Voices extends Client { return response.json() as Promise; } - async clone(options: CloneOptions): Promise { + async clone(options: CloneOptions): Promise + async clone(options: CloneVoiceOptions): Promise + async clone(options: CloneOptions | CloneVoiceOptions): Promise { + // First: handle old clip mode/endpoint if (options.mode === "clip") { const formData = new FormData(); formData.append("clip", options.clip); @@ -53,7 +57,36 @@ export default class Voices extends Client { return response.json(); } - throw new Error("Invalid mode for clone()"); + const formData = new FormData(); + formData.append("clip", options.clip); + formData.append("mode", options.mode); + formData.append("name", options.name); + formData.append("description", options.description); + formData.append("language", options.language); + if (options.enhance !== undefined) { + formData.append("enhance", options.enhance.toString()); + } + if (options.mode === "similarity") { + if (options.transcript) { + formData.append("transcript", options.transcript); + } + } + + const response = await this._fetch("/voices/clone", { + method: "POST", + body: formData, + }); + + if (!response.ok) { + if (response.headers.get("content-type")?.includes("application/json")) { + const errorData = await response.json(); + throw new Error(errorData.message || "Clone voice failed"); + } + const errorText = await response.text(); + throw new Error(errorText || "Clone voice failed"); + } + + return response.json() as Promise; } async mix(options: MixVoicesOptions): Promise {