temporary fix for encoding packaging issues

This commit is contained in:
Nate Sesti 2024-08-17 12:31:15 -07:00
parent 455534263a
commit f34b06fb83
4 changed files with 42 additions and 8 deletions

View File

@ -30,3 +30,15 @@ The build process is otherwise defined entirely in `build.js`.
## Debugging
To debug the binary with IntelliJ, set `useTcp` to `true` in `CoreMessenger.kt`, and then in VS Code run the "Core Binary" debug script. Instead of starting a subprocess for the binary and communicating over stdin/stdout, the IntelliJ extension will connect over TCP to the server started from the VS Code window. You can place breakpoints anywhere in the `core` or `binary` folders.
## Building
```bash
npm run build
```
## Testing
```bash
npm run test
```

View File

@ -162,6 +162,7 @@ async function installNodeModuleInTempDirAndCopyToCurrent(packageName, toCopy) {
"../core/vendor/tree-sitter.wasm",
"../core/llm/llamaTokenizerWorkerPool.mjs",
"../core/llm/llamaTokenizer.mjs",
"../core/llm/tiktokenWorkerPool.mjs",
];
for (const f of filesToCopy) {
fs.copyFileSync(

View File

@ -1,7 +1,5 @@
import llamaTokenizer from "./llamaTokenizer.js";
import { Tiktoken, encodingForModel as _encodingForModel } from "js-tiktoken";
import path from "path";
import workerpool from "workerpool";
import * as path from "path";
export interface AsyncEncoder {
encode(text: string): Promise<number[]>;
@ -13,7 +11,9 @@ export class LlamaAsyncEncoder implements AsyncEncoder {
private workerPool: workerpool.Pool;
constructor() {
this.workerPool = workerpool.pool(workerCodeFilePath("llamaTokenizerWorkerPool.mjs"));
this.workerPool = workerpool.pool(
workerCodeFilePath("llamaTokenizerWorkerPool.mjs"),
);
}
async encode(text: string): Promise<number[]> {
@ -35,7 +35,9 @@ export class GPTAsyncEncoder implements AsyncEncoder {
private workerPool: workerpool.Pool;
constructor() {
this.workerPool = workerpool.pool(workerCodeFilePath("tiktokenWorkerPool.mjs"));
this.workerPool = workerpool.pool(
workerCodeFilePath("tiktokenWorkerPool.mjs"),
);
}
async encode(text: string): Promise<number[]> {
@ -58,4 +60,4 @@ function workerCodeFilePath(workerFileName: string): string {
return path.join(__dirname, "llm", workerFileName);
}
return path.join(__dirname, workerFileName);
}
}

View File

@ -24,18 +24,37 @@ class LlamaEncoding implements Encoding {
}
}
class NonWorkerAsyncEncoder implements AsyncEncoder {
constructor(private readonly encoding: Encoding) {}
async close(): Promise<void> {}
async encode(text: string): Promise<number[]> {
return this.encoding.encode(text);
}
async decode(tokens: number[]): Promise<string> {
return this.encoding.decode(tokens);
}
}
let gptEncoding: Encoding | null = null;
const gptAsyncEncoder = new GPTAsyncEncoder();
const llamaEncoding = new LlamaEncoding();
const llamaAsyncEncoder = new LlamaAsyncEncoder();
function asyncEncoderForModel(modelName: string): AsyncEncoder {
// Temporary due to issues packaging the worker files
if (process.env.IS_BINARY) {
const encoding = encodingForModel(modelName);
return new NonWorkerAsyncEncoder(encoding);
}
const modelType = autodetectTemplateType(modelName);
if (!modelType || modelType === "none") {
return gptAsyncEncoder;
}
// Temporary due to issues packaging the worker files
return process.env.IS_BINARY ? gptAsyncEncoder : llamaAsyncEncoder;
return llamaAsyncEncoder;
}
function encodingForModel(modelName: string): Encoding {