OrkaJS
Orka.JS

Fine-tuning Orchestration

Dataset validation, API orchestration, and model versioning

The fine-tuning package provides tools to orchestrate fine-tuning jobs across multiple providers, validate datasets, collect feedback, and manage model versions.

Installation

npm install @orka-js/finetuning
# or
pnpm add @orka-js/finetuning

Features

Dataset Validation

Validate JSONL datasets before training

Multi-Provider

OpenAI, Anthropic, Mistral, Together, Anyscale

Cost Estimation

Estimate training costs before starting

Job Monitoring

Track training progress and metrics

Model Versioning

Track fine-tuned model versions

Feedback Collection

Collect user feedback and convert to datasets

Dataset Format

The expected JSONL format for fine-tuning:

{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi! How can I help?"}]}
{"messages": [{"role": "user", "content": "What is 2+2?"}, {"role": "assistant", "content": "2+2 equals 4."}]}

#Dataset Validation

Validate your JSONL datasets before submitting for fine-tuning.

import { DatasetValidator } from '@orka-js/finetuning';
 
const validator = new DatasetValidator('openai');
const result = await validator.validateFile('./training.jsonl');
 
if (result.valid) {
console.log('Dataset is valid!');
console.log('Stats:', result.stats);
// { totalExamples: 100, totalTokens: 50000, avgTokensPerExample: 500, ... }
} else {
console.log('Errors:', result.errors);
// [{ line: 5, message: 'Missing assistant message' }, ...]
}
 
// Check warnings
if (result.warnings.length > 0) {
console.log('Warnings:', result.warnings);
}

#Fine-tuning Orchestration

Create and monitor fine-tuning jobs with automatic status tracking.

import { FineTuningOrchestrator } from '@orka-js/finetuning';
 
const orchestrator = new FineTuningOrchestrator({
provider: 'openai',
baseModel: 'gpt-4o-mini-2024-07-18',
apiKey: process.env.OPENAI_API_KEY,
hyperparameters: {
nEpochs: 3,
batchSize: 4,
},
});
 
// Estimate cost before training
const estimate = orchestrator.estimateCost(100000, 3); // 100k tokens, 3 epochs
console.log(`Estimated cost: $${estimate.trainingCost.toFixed(2)}`);
 
// Create a fine-tuning job
const job = await orchestrator.createJob('./training.jsonl', {
validationPath: './validation.jsonl',
});
 
console.log('Job created:', job.id);
 
// Monitor progress
orchestrator.on((event) => {
if (event.type === 'metrics') {
console.log('Training loss:', event.metrics?.trainingLoss);
}
if (event.type === 'message') {
console.log('Status:', event.message);
}
});
 
// Wait for completion
const completed = await orchestrator.waitForCompletion(job.id);
console.log('Fine-tuned model:', completed.fineTunedModel);
 
// Get model versions
const versions = orchestrator.getModelVersions();
const latest = orchestrator.getLatestVersion();

#Feedback Collection

Collect user feedback from your application and convert it to training data.

import { FeedbackCollector } from '@orka-js/finetuning';
 
const collector = new FeedbackCollector({
minSamples: 50,
filterLowRatings: true,
ratingThreshold: 4,
});
 
// Collect feedback from your application
collector.add({
input: 'What is the capital of France?',
output: 'The capital of France is Paris.',
rating: 5,
});
 
collector.add({
input: 'Explain quantum computing',
output: 'Quantum computing uses qubits...',
expectedOutput: 'Quantum computing is a type of computation...',
rating: 3,
feedback: 'Could be more detailed',
});
 
// Check stats
const stats = collector.getStats();
console.log('Total entries:', stats.total);
console.log('Filtered entries:', stats.filtered);
console.log('Average rating:', stats.avgRating);
 
// Check if ready for training
if (collector.isReadyForTraining()) {
// Convert to dataset
const dataset = collector.toDataset();
 
// Or save directly to file
await collector.saveToFile('./feedback-dataset.jsonl');
}

Supported Providers

Live Ecosystem Compatibility

ProviderIntegration StatusCapability / Models
OpenAI
Full
GPT-4o, GPT-3.5-turbo, DALL-E
Mistral AI
Full
Mixtral, Mistral Large, Codestral
Anthropic
Beta
Claude 3.5 Sonnet / Opus
Together AI
Planned
Llama 3, Qwen, Mixtral 8x22B
Anyscale
Planned
Coming Q3 2026

* All providers are monitored 24/7 for API uptime and performance latency.

Complete Examples

Example 1: Customer Support Fine-tuning Pipeline

Complete workflow from feedback collection to fine-tuned model deployment:

import { FeedbackCollector, DatasetValidator, FineTuningOrchestrator } from '@orka-js/finetuning';
import { OpenAIAdapter } from '@orka-js/openai';
 
// Step 1: Collect user feedback
const collector = new FeedbackCollector({
storage: 'file',
filePath: './feedback.jsonl'
});
 
// Collect feedback from production interactions
async function collectProductionFeedback() {
// User asks a question
const userInput = "How do I reset my password?";
const modelOutput = "Click on 'Forgot Password' on the login page.";
 
// User rates the response
await collector.addFeedback({
input: userInput,
output: modelOutput,
rating: 8,
correction: "Click on 'Forgot Password' on the login page, then check your email for the reset link.",
metadata: {
userId: 'user_123',
timestamp: new Date().toISOString(),
category: 'password_reset'
}
});
 
console.log('Feedback collected. Total:', await collector.getCount());
}
 
// Step 2: Export and validate dataset
async function prepareDataset() {
// Export feedback as training dataset
const dataset = await collector.exportDataset({
format: 'openai',
minRating: 7, // Only use highly rated responses
includeCorrections: true
});
 
console.log(`Exported ${dataset.length} training examples`);
 
// Validate dataset
const validator = new DatasetValidator();
const validation = await validator.validate(dataset, {
provider: 'openai',
checkDuplicates: true,
checkBalance: true
});
 
if (!validation.valid) {
console.error('Validation errors:', validation.errors);
throw new Error('Dataset validation failed');
}
 
console.log('Dataset validated successfully');
console.log('Stats:', validation.stats);
 
return dataset;
}
 
// Step 3: Estimate cost and create fine-tuning job
async function createFineTuningJob(dataset: any[]) {
const orchestrator = new FineTuningOrchestrator({
provider: 'openai',
apiKey: process.env.OPENAI_API_KEY!
});
 
// Estimate cost before starting
const costEstimate = await orchestrator.estimateCost({
dataset,
baseModel: 'gpt-3.5-turbo',
epochs: 3
});
 
console.log(`Estimated cost: $${costEstimate.estimatedCost.toFixed(2)}`);
console.log(`Training tokens: ${costEstimate.trainingTokens}`);
 
// Create fine-tuning job
const job = await orchestrator.createJob({
trainingData: dataset,
baseModel: 'gpt-3.5-turbo',
suffix: 'customer-support-v1',
hyperparameters: {
nEpochs: 3,
batchSize: 4,
learningRateMultiplier: 0.1
}
});
 
console.log('Fine-tuning job created:', job.id);
return job;
}
 
// Step 4: Monitor job progress
async function monitorJob(orchestrator: FineTuningOrchestrator, jobId: string) {
const checkInterval = setInterval(async () => {
const jobs = await orchestrator.listJobs();
const job = jobs.find(j => j.id === jobId);
 
if (!job) {
clearInterval(checkInterval);
return;
}
 
console.log(`Job ${jobId} status: ${job.status}`);
 
if (job.status === 'succeeded') {
console.log('Fine-tuning completed!');
console.log('Fine-tuned model:', job.fineTunedModel);
clearInterval(checkInterval);
 
// Use the fine-tuned model
await useFineTunedModel(job.fineTunedModel!);
} else if (job.status === 'failed') {
console.error('Fine-tuning failed:', job.error);
clearInterval(checkInterval);
}
}, 30000); // Check every 30 seconds
}
 
// Step 5: Use the fine-tuned model
async function useFineTunedModel(modelName: string) {
const llm = new OpenAIAdapter({
apiKey: process.env.OPENAI_API_KEY!,
model: modelName
});
 
const response = await llm.generate({
messages: [
{ role: 'user', content: 'How do I reset my password?' }
]
});
 
console.log('Fine-tuned model response:', response.content);
}
 
// Run the complete pipeline
async function main() {
// Collect feedback (run this continuously in production)
await collectProductionFeedback();
 
// When ready to fine-tune (e.g., after collecting 1000+ examples)
const dataset = await prepareDataset();
const job = await createFineTuningJob(dataset);
 
const orchestrator = new FineTuningOrchestrator({
provider: 'openai',
apiKey: process.env.OPENAI_API_KEY!
});
 
await monitorJob(orchestrator, job.id);
}
 
main().catch(console.error);

Example 2: A/B Testing with Fine-tuned Models

Compare base model vs fine-tuned model performance:

import { FineTuningOrchestrator, FeedbackCollector } from '@orka-js/finetuning';
import { OpenAIAdapter } from '@orka-js/openai';
 
interface ABTestResult {
modelType: 'base' | 'finetuned';
response: string;
rating?: number;
latency: number;
}
 
class ABTestManager {
private baseModel: OpenAIAdapter;
private fineTunedModel: OpenAIAdapter;
private collector: FeedbackCollector;
private results: ABTestResult[] = [];
 
constructor(fineTunedModelName: string) {
this.baseModel = new OpenAIAdapter({
apiKey: process.env.OPENAI_API_KEY!,
model: 'gpt-3.5-turbo'
});
 
this.fineTunedModel = new OpenAIAdapter({
apiKey: process.env.OPENAI_API_KEY!,
model: fineTunedModelName
});
 
this.collector = new FeedbackCollector({
storage: 'file',
filePath: './ab-test-results.jsonl'
});
}
 
async runTest(userInput: string): Promise<void> {
// Randomly select which model to use (50/50 split)
const useFineTuned = Math.random() > 0.5;
const model = useFineTuned ? this.fineTunedModel: this.baseModel;
const modelType = useFineTuned ? 'finetuned' : 'base';
 
const startTime = Date.now();
const response = await model.generate({
messages: [{ role: 'user', content: userInput }]
});
const latency = Date.now() - startTime;
 
const result: ABTestResult = {
modelType,
response: response.content,
latency
};
 
this.results.push(result);
 
// Show response to user and collect feedback
console.log(`Model: ${modelType}`);
console.log(`Response: ${response.content}`);
console.log(`Latency: ${latency}ms`);
 
return;
}
 
async recordFeedback(index: number, rating: number): Promise<void> {
const result = this.results[index];
if (!result) return;
 
result.rating = rating;
 
await this.collector.addFeedback({
input: 'User query',
output: result.response,
rating,
metadata: {
modelType: result.modelType,
latency: result.latency,
timestamp: new Date().toISOString()
}
});
}
 
async getAnalytics() {
const baseResults = this.results.filter(r => r.modelType === 'base' && r.rating);
const fineTunedResults = this.results.filter(r => r.modelType === 'finetuned' && r.rating);
 
const avgRatingBase = baseResults.reduce((sum, r) => sum + (r.rating || 0), 0) / baseResults.length;
const avgRatingFineTuned = fineTunedResults.reduce((sum, r) => sum + (r.rating || 0), 0) / fineTunedResults.length;
 
const avgLatencyBase = baseResults.reduce((sum, r) => sum + r.latency, 0) / baseResults.length;
const avgLatencyFineTuned = fineTunedResults.reduce((sum, r) => sum + r.latency, 0) / fineTunedResults.length;
 
return {
base: {
count: baseResults.length,
avgRating: avgRatingBase.toFixed(2),
avgLatency: avgLatencyBase.toFixed(0) + 'ms'
},
fineTuned: {
count: fineTunedResults.length,
avgRating: avgRatingFineTuned.toFixed(2),
avgLatency: avgLatencyFineTuned.toFixed(0) + 'ms'
},
improvement: {
rating: ((avgRatingFineTuned - avgRatingBase) / avgRatingBase * 100).toFixed(1) + '%',
latency: ((avgLatencyBase - avgLatencyFineTuned) / avgLatencyBase * 100).toFixed(1) + '%'
}
};
}
}
 
// Usage
async function runABTest() {
const abTest = new ABTestManager('ft:gpt-3.5-turbo:customer-support-v1');
 
// Run 100 tests
const queries = [
"How do I reset my password?",
"What are your business hours?",
"How do I cancel my subscription?",
// ... more queries
];
 
for (let i = 0; i < queries.length; i++) {
await abTest.runTest(queries[i]);
 
// Simulate user rating (in production, get real user feedback)
const rating = Math.floor(Math.random() * 3) + 7; // 7-10
await abTest.recordFeedback(i, rating);
}
 
// Get analytics
const analytics = await abTest.getAnalytics();
console.log('A/B Test Results:', analytics);
 
// Example output:
// {
// base: { count: 52, avgRating: '7.8', avgLatency: '1250ms' },
// fineTuned: { count: 48, avgRating: '8.9', avgLatency: '980ms' },
// improvement: { rating: '+14.1%', latency: '+21.6%' }
// }
}
 
runABTest().catch(console.error);

Example 3: Continuous Learning Loop

Automatically retrain models based on new feedback:

import { FeedbackCollector, DatasetValidator, FineTuningOrchestrator } from '@orka-js/finetuning';
 
class ContinuousLearningPipeline {
private collector: FeedbackCollector;
private validator: DatasetValidator;
private orchestrator: FineTuningOrchestrator;
private retrainThreshold = 1000; // Retrain after 1000 new feedbacks
private minRating = 8; // Only use high-quality feedback
 
constructor() {
this.collector = new FeedbackCollector({
storage: 'database',
connectionString: process.env.DATABASE_URL!
});
 
this.validator = new DatasetValidator();
 
this.orchestrator = new FineTuningOrchestrator({
provider: 'openai',
apiKey: process.env.OPENAI_API_KEY!
});
}
 
async checkAndRetrain(): Promise<void> {
const feedbackCount = await this.collector.getCount();
const lastRetrainCount = await this.getLastRetrainCount();
const newFeedbackCount = feedbackCount - lastRetrainCount;
 
console.log(`New feedback since last retrain: ${newFeedbackCount}`);
 
if (newFeedbackCount >= this.retrainThreshold) {
console.log('Threshold reached. Starting retraining...');
await this.retrain();
}
}
 
private async retrain(): Promise<void> {
// Export high-quality feedback
const dataset = await this.collector.exportDataset({
format: 'openai',
minRating: this.minRating,
includeCorrections: true,
since: await this.getLastRetrainDate()
});
 
console.log(`Exported ${dataset.length} new training examples`);
 
// Validate
const validation = await this.validator.validate(dataset, {
provider: 'openai',
checkDuplicates: true,
checkBalance: true
});
 
if (!validation.valid) {
console.error('Dataset validation failed:', validation.errors);
await this.notifyTeam('Dataset validation failed', validation.errors);
return;
}
 
// Estimate cost
const costEstimate = await this.orchestrator.estimateCost({
dataset,
baseModel: 'gpt-3.5-turbo',
epochs: 3
});
 
console.log(`Estimated cost: $${costEstimate.estimatedCost.toFixed(2)}`);
 
// Auto-approve if cost is below threshold
if (costEstimate.estimatedCost > 100) {
await this.notifyTeam('Manual approval needed', {
cost: costEstimate.estimatedCost,
examples: dataset.length
});
return;
}
 
// Create job
const job = await this.orchestrator.createJob({
trainingData: dataset,
baseModel: 'gpt-3.5-turbo',
suffix: `v${await this.getNextVersion()}`,
hyperparameters: {
nEpochs: 3,
batchSize: 4
}
});
 
console.log(`Fine-tuning job created: ${job.id}`);
 
// Save retrain metadata
await this.saveRetrainMetadata({
jobId: job.id,
feedbackCount: await this.collector.getCount(),
timestamp: new Date()
});
 
// Monitor job
await this.monitorAndDeploy(job.id);
}
 
private async monitorAndDeploy(jobId: string): Promise<void> {
const checkInterval = setInterval(async () => {
const jobs = await this.orchestrator.listJobs();
const job = jobs.find(j => j.id === jobId);
 
if (!job) {
clearInterval(checkInterval);
return;
}
 
if (job.status === 'succeeded') {
console.log(`Job ${jobId} succeeded!`);
console.log(`New model: ${job.fineTunedModel}`);
 
clearInterval(checkInterval);
 
// Auto-deploy to staging
await this.deployToStaging(job.fineTunedModel!);
 
// Notify team
await this.notifyTeam('New model deployed to staging', {
model: job.fineTunedModel,
jobId: job.id
});
} else if (job.status === 'failed') {
console.error(`Job ${jobId} failed:`, job.error);
clearInterval(checkInterval);
await this.notifyTeam('Fine-tuning failed', { jobId, error: job.error });
}
}, 60000); // Check every minute
}
 
private async deployToStaging(modelName: string): Promise<void> {
// Update environment variable or config
console.log(`Deploying ${modelName} to staging...`);
// Implementation depends on your deployment setup
}
 
private async notifyTeam(subject: string, data: any): Promise<void> {
// Send notification (Slack, email, etc.)
console.log(`[NOTIFICATION] ${subject}`, data);
}
 
private async getLastRetrainCount(): Promise<number> {
// Get from database
return 0;
}
 
private async getLastRetrainDate(): Promise<Date> {
// Get from database
return new Date(0);
}
 
private async getNextVersion(): Promise<number> {
// Get from database
return 1;
}
 
private async saveRetrainMetadata(metadata: any): Promise<void> {
// Save to database
console.log('Saving retrain metadata:', metadata);
}
}
 
// Run as a cron job (e.g., daily)
async function main() {
const pipeline = new ContinuousLearningPipeline();
await pipeline.checkAndRetrain();
}
 
// Or run continuously
async function runContinuously() {
const pipeline = new ContinuousLearningPipeline();
 
setInterval(async () => {
await pipeline.checkAndRetrain();
}, 24 * 60 * 60 * 1000); // Check daily
}
 
main().catch(console.error);