screentinker/server/lib/image-gen.js
ScreenTinker 303c83e86a feat(ai): generate background + foreground images for signs (#41 Phase 2)
A prompt now produces a full sign: the LLM writes the design AND image prompts,
the server generates the images and composites them with the crisp text layer.

- lib/image-gen.js: text-to-image with 3 BYO/self-hostable backends, all behind
  the SSRF guard: 'sdcpp' (local stable-diffusion.cpp OpenAI-compatible server,
  exact small sizes that fit VRAM), 'openai' (cloud / OpenAI-compatible, snapped
  sizes), 'comfyui' (prompt/history/view API).
- ai.js: prompt asks for a background_prompt (preferred — full-bleed atmosphere)
  and an optional foreground image element; after the design is normalized, the
  bg + fg images are generated best-effort (a failed image never fails the sign)
  and returned as data URLs. New image_* settings (provider/base_url/model),
  image_provider whitelist, schema column + migration.
- designer.js: AI-images section in settings; generate applies the background
  image; publish bakes the background image into the HTML so it survives.
- server.js: raise JSON body limit to 12mb for embedded image data URLs.

Verified end-to-end on local Vulkan SDXL (RTX 5090): prompt -> bg+fg images on
the canvas -> publish creates a widget with the images embedded. 63/63.

Note: prod (not self-hosted) requires a PUBLIC image endpoint (e.g. OpenAI); the
SSRF guard blocks localhost there. Follow-up: upload generated images to the
content store and reference by URL to avoid multi-MB widget configs.
2026-06-09 13:40:14 -05:00

110 lines
5.2 KiB
JavaScript

'use strict';
// #41 Phase 2: text-to-image for AI signage. Two backends, both BYO/self-hostable:
// - 'comfyui' -> local ComfyUI (SDXL) via its prompt/history/view API
// - 'openai' -> any OpenAI-compatible /images/generations endpoint
// Returns a data: URL (base64 PNG) the Designer can drop straight onto a layer.
// The caller is responsible for the SSRF check on the base URL.
const NEGATIVE = 'text, words, letters, watermark, signature, logo, blurry, low quality, deformed';
function buildComfyWorkflow(prompt, ckpt, width, height, seed) {
return {
'3': { class_type: 'KSampler', inputs: { seed, steps: 25, cfg: 7, sampler_name: 'dpmpp_2m', scheduler: 'karras', denoise: 1, model: ['4', 0], positive: ['6', 0], negative: ['7', 0], latent_image: ['5', 0] } },
'4': { class_type: 'CheckpointLoaderSimple', inputs: { ckpt_name: ckpt } },
'5': { class_type: 'EmptyLatentImage', inputs: { width, height, batch_size: 1 } },
'6': { class_type: 'CLIPTextEncode', inputs: { text: prompt, clip: ['4', 1] } },
'7': { class_type: 'CLIPTextEncode', inputs: { text: NEGATIVE, clip: ['4', 1] } },
'8': { class_type: 'VAEDecode', inputs: { samples: ['3', 0], vae: ['4', 2] } },
'9': { class_type: 'SaveImage', inputs: { filename_prefix: 'signage', images: ['8', 0] } },
};
}
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
async function comfyGenerate(baseUrl, prompt, model, width, height, signal) {
const ckpt = model || 'sd_xl_base_1.0.safetensors';
const seed = Math.floor(Math.random() * 1e15);
const wf = buildComfyWorkflow(prompt, ckpt, width, height, seed);
const sub = await fetch(baseUrl + '/prompt', {
method: 'POST', headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ prompt: wf }), signal,
});
if (!sub.ok) throw new Error(`ComfyUI rejected the job (${sub.status}): ${(await sub.text().catch(() => '')).slice(0, 150)}`);
const { prompt_id } = await sub.json();
if (!prompt_id) throw new Error('ComfyUI did not return a prompt id');
// poll history until this prompt produces an output
for (let i = 0; i < 120; i++) {
if (signal && signal.aborted) throw Object.assign(new Error('aborted'), { name: 'AbortError' });
await sleep(1000);
const h = await fetch(`${baseUrl}/history/${prompt_id}`, { signal }).then((r) => r.json()).catch(() => null);
const entry = h && h[prompt_id];
if (!entry) continue;
const outputs = entry.outputs || {};
for (const nodeId of Object.keys(outputs)) {
const imgs = outputs[nodeId].images;
if (imgs && imgs.length) {
const im = imgs[0];
const q = new URLSearchParams({ filename: im.filename, subfolder: im.subfolder || '', type: im.type || 'output' });
const buf = Buffer.from(await (await fetch(`${baseUrl}/view?${q}`, { signal })).arrayBuffer());
return 'data:image/png;base64,' + buf.toString('base64');
}
}
if (entry.status && entry.status.status_str === 'error') throw new Error('ComfyUI reported a generation error');
}
throw new Error('ComfyUI image timed out');
}
// 'openai' (real cloud) only accepts a fixed set of sizes; 'sdcpp' (local) takes
// exact dimensions and is VRAM-bound, so we keep those small. Both speak the same
// /v1/images/generations API.
function sizeFor(provider, width, height) {
if (provider === 'sdcpp') return `${width}x${height}`;
return width > height ? '1792x1024' : (height > width ? '1024x1792' : '1024x1024');
}
async function openaiCompatGenerate(baseUrl, key, prompt, model, size, signal) {
const body = { prompt, n: 1, size, response_format: 'b64_json' };
if (model) body.model = model; // omit for sd.cpp (uses its loaded checkpoint)
const res = await fetch(baseUrl + '/images/generations', {
method: 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` },
body: JSON.stringify(body),
signal,
});
if (!res.ok) throw new Error(`Image endpoint error ${res.status}: ${(await res.text().catch(() => '')).slice(0, 150)}`);
const j = await res.json();
const b64 = j && j.data && j.data[0] && j.data[0].b64_json;
if (b64) return 'data:image/png;base64,' + b64;
const url = j && j.data && j.data[0] && j.data[0].url;
if (url) {
const buf = Buffer.from(await (await fetch(url, { signal })).arrayBuffer());
return 'data:image/png;base64,' + buf.toString('base64');
}
throw new Error('Image endpoint returned no image');
}
// generateImage(opts) -> data URL. opts: { provider, baseUrl, apiKey, model,
// prompt, width, height, timeoutMs }
async function generateImage(opts) {
const { provider, baseUrl, apiKey, model, prompt } = opts;
const width = opts.width || 1024;
const height = opts.height || 1024;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), opts.timeoutMs || 180000);
try {
if (provider === 'openai' || provider === 'sdcpp') {
return await openaiCompatGenerate(baseUrl, apiKey || 'none', prompt, model, sizeFor(provider, width, height), controller.signal);
}
return await comfyGenerate(baseUrl, prompt, model, width, height, controller.signal);
} catch (e) {
if (e.name === 'AbortError') throw new Error('image generation timed out');
throw e;
} finally {
clearTimeout(timer);
}
}
module.exports = { generateImage };