{"version":3,"sources":["../src/embedder.ts"],"sourcesContent":["/**\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport {\n defineAction,\n z,\n type Action,\n type ActionMetadata,\n} from '@genkit-ai/core';\nimport type { Registry } from '@genkit-ai/core/registry';\nimport { toJsonSchema } from '@genkit-ai/core/schema';\nimport { Document, DocumentDataSchema, type DocumentData } from './document.js';\n\n/**\n * A batch (array) of embeddings.\n */\nexport type EmbeddingBatch = { embedding: number[] }[];\n\n/**\n * EmbeddingSchema includes the embedding and also metadata so you know\n * which of multiple embeddings corresponds to which part of a document.\n */\nexport const EmbeddingSchema = z.object({\n embedding: z.array(z.number()),\n metadata: z.record(z.string(), z.unknown()).optional(),\n});\nexport type Embedding = z.infer;\n\n/**\n * A function used for embedder definition, encapsulates embedder implementation.\n */\nexport type EmbedderFn = (\n input: Document[],\n embedderOpts?: z.infer\n) => Promise;\n\n/**\n * Zod schema of an embed request.\n */\nconst EmbedRequestSchema = z.object({\n input: z.array(DocumentDataSchema),\n options: z.any().optional(),\n});\n\n/**\n * Zod schema of an embed response.\n */\nconst EmbedResponseSchema = z.object({\n embeddings: z.array(EmbeddingSchema),\n // TODO: stats, etc.\n});\ntype EmbedResponse = z.infer;\n\n/**\n * Embedder action -- a subtype of {@link Action} with input/output types for embedders.\n */\nexport type EmbedderAction =\n Action & {\n __configSchema?: CustomOptions;\n };\n\n/**\n * Options of an `embed` function.\n */\nexport interface EmbedderParams<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n> {\n embedder: EmbedderArgument;\n content: string | DocumentData;\n metadata?: Record;\n options?: z.infer;\n}\n\nfunction withMetadata(\n embedder: Action,\n configSchema?: CustomOptions\n): EmbedderAction {\n const withMeta = embedder as EmbedderAction;\n withMeta.__configSchema = configSchema;\n return withMeta;\n}\n\n/**\n * Creates embedder model for the provided {@link EmbedderFn} model implementation.\n */\nexport function defineEmbedder<\n ConfigSchema extends z.ZodTypeAny = z.ZodTypeAny,\n>(\n registry: Registry,\n options: {\n name: string;\n configSchema?: ConfigSchema;\n info?: EmbedderInfo;\n },\n runner: EmbedderFn\n) {\n const embedder = defineAction(\n registry,\n {\n actionType: 'embedder',\n name: options.name,\n inputSchema: options.configSchema\n ? EmbedRequestSchema.extend({\n options: options.configSchema.optional(),\n })\n : EmbedRequestSchema,\n outputSchema: EmbedResponseSchema,\n metadata: {\n type: 'embedder',\n info: options.info,\n embedder: {\n customOptions: options.configSchema\n ? toJsonSchema({ schema: options.configSchema })\n : undefined,\n },\n },\n },\n (i) =>\n runner(\n i.input.map((dd) => new Document(dd)),\n i.options\n )\n );\n const ewm = withMetadata(\n embedder as Action,\n options.configSchema\n );\n return ewm;\n}\n\n/**\n * A union type representing all the types that can refer to an embedder.\n */\nexport type EmbedderArgument<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n> = string | EmbedderAction | EmbedderReference;\n\n/**\n * A veneer for interacting with embedder models.\n */\nexport async function embed(\n registry: Registry,\n params: EmbedderParams\n): Promise {\n const embedder = await resolveEmbedder(registry, params);\n if (!embedder.embedderAction) {\n let embedderId: string;\n if (typeof params.embedder === 'string') {\n embedderId = params.embedder;\n } else if ((params.embedder as EmbedderAction)?.__action?.name) {\n embedderId = (params.embedder as EmbedderAction).__action.name;\n } else {\n embedderId = (params.embedder as EmbedderReference).name;\n }\n throw new Error(`Unable to resolve embedder ${embedderId}`);\n }\n const response = await embedder.embedderAction({\n input:\n typeof params.content === 'string'\n ? [Document.fromText(params.content, params.metadata)]\n : [params.content],\n options: {\n version: embedder.version,\n ...embedder.config,\n ...params.options,\n },\n });\n return response.embeddings;\n}\n\ninterface ResolvedEmbedder {\n embedderAction: EmbedderAction;\n config?: z.infer;\n version?: string;\n}\n\nasync function resolveEmbedder<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(\n registry: Registry,\n params: EmbedderParams\n): Promise> {\n if (typeof params.embedder === 'string') {\n return {\n embedderAction: await registry.lookupAction(\n `/embedder/${params.embedder}`\n ),\n };\n } else if (Object.hasOwnProperty.call(params.embedder, '__action')) {\n return {\n embedderAction: params.embedder as EmbedderAction,\n };\n } else if (Object.hasOwnProperty.call(params.embedder, 'name')) {\n const ref = params.embedder as EmbedderReference;\n return {\n embedderAction: await registry.lookupAction(\n `/embedder/${(params.embedder as EmbedderReference).name}`\n ),\n config: {\n ...ref.config,\n },\n version: ref.version,\n };\n }\n throw new Error(`failed to resolve embedder ${params.embedder}`);\n}\n\n/**\n * A veneer for interacting with embedder models in bulk.\n */\nexport async function embedMany<\n ConfigSchema extends z.ZodTypeAny = z.ZodTypeAny,\n>(\n registry: Registry,\n params: {\n embedder: EmbedderArgument;\n content: string[] | DocumentData[];\n metadata?: Record;\n options?: z.infer;\n }\n): Promise {\n let embedder: EmbedderAction;\n if (typeof params.embedder === 'string') {\n embedder = await registry.lookupAction(`/embedder/${params.embedder}`);\n } else if (Object.hasOwnProperty.call(params.embedder, 'info')) {\n embedder = await registry.lookupAction(\n `/embedder/${(params.embedder as EmbedderReference).name}`\n );\n } else {\n embedder = params.embedder as EmbedderAction;\n }\n if (!embedder) {\n throw new Error('Unable to utilize the provided embedder');\n }\n const response = await embedder({\n input: params.content.map((i) =>\n typeof i === 'string' ? Document.fromText(i, params.metadata) : i\n ),\n options: params.options,\n });\n return response.embeddings;\n}\n\n/**\n * Zod schema of embedder info object.\n */\nexport const EmbedderInfoSchema = z.object({\n /** Friendly label for this model (e.g. \"Google AI - Gemini Pro\") */\n label: z.string().optional(),\n /** Supported model capabilities. */\n supports: z\n .object({\n /** Model can input this type of data. */\n input: z.array(z.enum(['text', 'image', 'video'])).optional(),\n /** Model can support multiple languages */\n multilingual: z.boolean().optional(),\n })\n .optional(),\n /** Embedding dimension */\n dimensions: z.number().optional(),\n});\nexport type EmbedderInfo = z.infer;\n\n/**\n * A reference object that can used to resolve an embedder instance. Include additional type information\n * about the specific embedder, e.g. custom config options schema.\n */\nexport interface EmbedderReference<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n> {\n name: string;\n configSchema?: CustomOptions;\n info?: EmbedderInfo;\n config?: z.infer;\n version?: string;\n}\n\n/**\n * Helper method to configure a {@link EmbedderReference} to a plugin.\n */\nexport function embedderRef<\n CustomOptionsSchema extends z.ZodTypeAny = z.ZodTypeAny,\n>(\n options: EmbedderReference\n): EmbedderReference {\n return { ...options };\n}\n\n/**\n * Packages embedder information into ActionMetadata object.\n */\nexport function embedderActionMetadata({\n name,\n info,\n configSchema,\n}: {\n name: string;\n info?: EmbedderInfo;\n configSchema?: z.ZodTypeAny;\n}): ActionMetadata {\n return {\n actionType: 'embedder',\n name: name,\n inputJsonSchema: toJsonSchema({ schema: EmbedRequestSchema }),\n outputJsonSchema: toJsonSchema({ schema: EmbedResponseSchema }),\n metadata: {\n embedder: {\n ...info,\n customOptions: configSchema\n ? toJsonSchema({ schema: configSchema })\n : undefined,\n },\n },\n } as ActionMetadata;\n}\n"],"mappings":"AAgBA;AAAA,EACE;AAAA,EACA;AAAA,OAGK;AAEP,SAAS,oBAAoB;AAC7B,SAAS,UAAU,0BAA6C;AAWzD,MAAM,kBAAkB,EAAE,OAAO;AAAA,EACtC,WAAW,EAAE,MAAM,EAAE,OAAO,CAAC;AAAA,EAC7B,UAAU,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC,EAAE,SAAS;AACvD,CAAC;AAcD,MAAM,qBAAqB,EAAE,OAAO;AAAA,EAClC,OAAO,EAAE,MAAM,kBAAkB;AAAA,EACjC,SAAS,EAAE,IAAI,EAAE,SAAS;AAC5B,CAAC;AAKD,MAAM,sBAAsB,EAAE,OAAO;AAAA,EACnC,YAAY,EAAE,MAAM,eAAe;AAAA;AAErC,CAAC;AAuBD,SAAS,aACP,UACA,cAC+B;AAC/B,QAAM,WAAW;AACjB,WAAS,iBAAiB;AAC1B,SAAO;AACT;AAKO,SAAS,eAGd,UACA,SAKA,QACA;AACA,QAAM,WAAW;AAAA,IACf;AAAA,IACA;AAAA,MACE,YAAY;AAAA,MACZ,MAAM,QAAQ;AAAA,MACd,aAAa,QAAQ,eACjB,mBAAmB,OAAO;AAAA,QACxB,SAAS,QAAQ,aAAa,SAAS;AAAA,MACzC,CAAC,IACD;AAAA,MACJ,cAAc;AAAA,MACd,UAAU;AAAA,QACR,MAAM;AAAA,QACN,MAAM,QAAQ;AAAA,QACd,UAAU;AAAA,UACR,eAAe,QAAQ,eACnB,aAAa,EAAE,QAAQ,QAAQ,aAAa,CAAC,IAC7C;AAAA,QACN;AAAA,MACF;AAAA,IACF;AAAA,IACA,CAAC,MACC;AAAA,MACE,EAAE,MAAM,IAAI,CAAC,OAAO,IAAI,SAAS,EAAE,CAAC;AAAA,MACpC,EAAE;AAAA,IACJ;AAAA,EACJ;AACA,QAAM,MAAM;AAAA,IACV;AAAA,IACA,QAAQ;AAAA,EACV;AACA,SAAO;AACT;AAYA,eAAsB,MACpB,UACA,QACsB;AACtB,QAAM,WAAW,MAAM,gBAAgB,UAAU,MAAM;AACvD,MAAI,CAAC,SAAS,gBAAgB;AAC5B,QAAI;AACJ,QAAI,OAAO,OAAO,aAAa,UAAU;AACvC,mBAAa,OAAO;AAAA,IACtB,WAAY,OAAO,UAA6B,UAAU,MAAM;AAC9D,mBAAc,OAAO,SAA4B,SAAS;AAAA,IAC5D,OAAO;AACL,mBAAc,OAAO,SAAoC;AAAA,IAC3D;AACA,UAAM,IAAI,MAAM,8BAA8B,UAAU,EAAE;AAAA,EAC5D;AACA,QAAM,WAAW,MAAM,SAAS,eAAe;AAAA,IAC7C,OACE,OAAO,OAAO,YAAY,WACtB,CAAC,SAAS,SAAS,OAAO,SAAS,OAAO,QAAQ,CAAC,IACnD,CAAC,OAAO,OAAO;AAAA,IACrB,SAAS;AAAA,MACP,SAAS,SAAS;AAAA,MAClB,GAAG,SAAS;AAAA,MACZ,GAAG,OAAO;AAAA,IACZ;AAAA,EACF,CAAC;AACD,SAAO,SAAS;AAClB;AAQA,eAAe,gBAGb,UACA,QAC0C;AAC1C,MAAI,OAAO,OAAO,aAAa,UAAU;AACvC,WAAO;AAAA,MACL,gBAAgB,MAAM,SAAS;AAAA,QAC7B,aAAa,OAAO,QAAQ;AAAA,MAC9B;AAAA,IACF;AAAA,EACF,WAAW,OAAO,eAAe,KAAK,OAAO,UAAU,UAAU,GAAG;AAClE,WAAO;AAAA,MACL,gBAAgB,OAAO;AAAA,IACzB;AAAA,EACF,WAAW,OAAO,eAAe,KAAK,OAAO,UAAU,MAAM,GAAG;AAC9D,UAAM,MAAM,OAAO;AACnB,WAAO;AAAA,MACL,gBAAgB,MAAM,SAAS;AAAA,QAC7B,aAAc,OAAO,SAA+B,IAAI;AAAA,MAC1D;AAAA,MACA,QAAQ;AAAA,QACN,GAAG,IAAI;AAAA,MACT;AAAA,MACA,SAAS,IAAI;AAAA,IACf;AAAA,EACF;AACA,QAAM,IAAI,MAAM,8BAA8B,OAAO,QAAQ,EAAE;AACjE;AAKA,eAAsB,UAGpB,UACA,QAMyB;AACzB,MAAI;AACJ,MAAI,OAAO,OAAO,aAAa,UAAU;AACvC,eAAW,MAAM,SAAS,aAAa,aAAa,OAAO,QAAQ,EAAE;AAAA,EACvE,WAAW,OAAO,eAAe,KAAK,OAAO,UAAU,MAAM,GAAG;AAC9D,eAAW,MAAM,SAAS;AAAA,MACxB,aAAc,OAAO,SAA+B,IAAI;AAAA,IAC1D;AAAA,EACF,OAAO;AACL,eAAW,OAAO;AAAA,EACpB;AACA,MAAI,CAAC,UAAU;AACb,UAAM,IAAI,MAAM,yCAAyC;AAAA,EAC3D;AACA,QAAM,WAAW,MAAM,SAAS;AAAA,IAC9B,OAAO,OAAO,QAAQ;AAAA,MAAI,CAAC,MACzB,OAAO,MAAM,WAAW,SAAS,SAAS,GAAG,OAAO,QAAQ,IAAI;AAAA,IAClE;AAAA,IACA,SAAS,OAAO;AAAA,EAClB,CAAC;AACD,SAAO,SAAS;AAClB;AAKO,MAAM,qBAAqB,EAAE,OAAO;AAAA;AAAA,EAEzC,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA;AAAA,EAE3B,UAAU,EACP,OAAO;AAAA;AAAA,IAEN,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,QAAQ,SAAS,OAAO,CAAC,CAAC,EAAE,SAAS;AAAA;AAAA,IAE5D,cAAc,EAAE,QAAQ,EAAE,SAAS;AAAA,EACrC,CAAC,EACA,SAAS;AAAA;AAAA,EAEZ,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAoBM,SAAS,YAGd,SACwC;AACxC,SAAO,EAAE,GAAG,QAAQ;AACtB;AAKO,SAAS,uBAAuB;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AACF,GAImB;AACjB,SAAO;AAAA,IACL,YAAY;AAAA,IACZ;AAAA,IACA,iBAAiB,aAAa,EAAE,QAAQ,mBAAmB,CAAC;AAAA,IAC5D,kBAAkB,aAAa,EAAE,QAAQ,oBAAoB,CAAC;AAAA,IAC9D,UAAU;AAAA,MACR,UAAU;AAAA,QACR,GAAG;AAAA,QACH,eAAe,eACX,aAAa,EAAE,QAAQ,aAAa,CAAC,IACrC;AAAA,MACN;AAAA,IACF;AAAA,EACF;AACF;","names":[]}