Merge pull request #61 from screentinker/feat/ai-images-phase2

feat(ai): background + foreground images for signs (#41 Phase 2)
This commit is contained in:
screentinker 2026-06-09 13:40:19 -05:00 committed by GitHub
commit c23e8ca289
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 222 additions and 22 deletions

View file

@ -605,8 +605,16 @@ export default {
'designer.ai.placeholder': "Describe your sign — e.g. 'Summer sale, 20% off mains, bright modern'",
'designer.ai.generate': 'Generate design',
'designer.ai.generating': 'Generating…',
'designer.ai.contacting': 'Contacting your AI endpoint… (local models can be slow)',
'designer.ai.contacting': 'Generating… text is quick; images add ~1030s',
'designer.ai.done': 'Generated {n} element(s) — tweak and Publish.',
'designer.ai.done_imgwarn': 'Generated {n} element(s) — an image couldnt be generated (text is ready). Publish.',
'designer.ai.images_title': 'AI images (optional)',
'designer.ai.images_desc': 'Generate a background (and a foreground graphic) from your prompt. Point at a local sd.cpp / ComfyUI server or OpenAI. Off = text + shapes only.',
'designer.ai.image_provider': 'Image provider',
'designer.ai.image_off': 'Off (text + shapes only)',
'designer.ai.image_base_url': 'Image endpoint URL',
'designer.ai.image_model': 'Image model',
'designer.ai.image_model_ph': 'optional — e.g. dall-e-3; blank for sd.cpp / ComfyUI',
'designer.ai.failed': 'Generation failed',
'designer.ai.need_prompt': 'Enter a prompt first',
'designer.ai.settings_title': 'AI design settings',

View file

@ -136,13 +136,18 @@ export function render(container) {
try {
const design = await api.aiGenerateDesign(prompt);
elements = []; selectedIdx = -1;
if (design.background) {
if (design.backgroundImage) {
bgImageDataUrl = design.backgroundImage; // AI-generated backdrop
if (design.background) bgValue = design.background; // kept as fallback
} else if (design.background) {
bgValue = design.background; bgImageDataUrl = null;
const bc = document.getElementById('bgColor'); if (bc) bc.value = design.background;
}
(design.elements || []).forEach(el => elements.push(el));
redraw();
status.textContent = t('designer.ai.done', { n: (design.elements || []).length });
status.textContent = design.image_warning
? t('designer.ai.done_imgwarn', { n: (design.elements || []).length })
: t('designer.ai.done', { n: (design.elements || []).length });
} catch (err) {
status.textContent = (err && err.message) || t('designer.ai.failed');
} finally {
@ -340,6 +345,21 @@ async function openAiSettings() {
<div class="form-group"><label>${t('designer.ai.api_key')}</label>
<input id="aiKey" class="input" type="password" autocomplete="off" placeholder="${cur.has_key ? t('designer.ai.key_set') : t('designer.ai.key_placeholder')}" style="width:100%">
<div style="font-size:11px;color:var(--text-muted);margin-top:4px">${t('designer.ai.key_hint')}</div></div>
<hr style="border:none;border-top:1px solid var(--border);margin:14px 0 10px">
<h4 style="font-size:13px;margin-bottom:4px">${t('designer.ai.images_title')}</h4>
<p style="font-size:11px;color:var(--text-muted);margin-bottom:8px">${t('designer.ai.images_desc')}</p>
<div class="form-group"><label>${t('designer.ai.image_provider')}</label>
<select id="aiImageProvider" class="input" style="width:100%">
<option value="" ${!cur.image_provider ? 'selected' : ''}>${t('designer.ai.image_off')}</option>
<option value="sdcpp" ${cur.image_provider === 'sdcpp' ? 'selected' : ''}>Stable Diffusion local (sd.cpp)</option>
<option value="openai" ${cur.image_provider === 'openai' ? 'selected' : ''}>OpenAI / OpenAI-compatible</option>
<option value="comfyui" ${cur.image_provider === 'comfyui' ? 'selected' : ''}>ComfyUI</option>
</select></div>
<div class="form-group"><label>${t('designer.ai.image_base_url')}</label>
<input id="aiImageBaseUrl" class="input" value="${esc(cur.image_base_url || '')}" placeholder="http://localhost:8080/v1 · http://localhost:8188" style="width:100%"></div>
<div class="form-group"><label>${t('designer.ai.image_model')}</label>
<input id="aiImageModel" class="input" value="${esc(cur.image_model || '')}" placeholder="${t('designer.ai.image_model_ph')}" style="width:100%"></div>
<div id="aiSettingsErr" style="display:none;color:var(--danger);font-size:13px;margin-top:8px"></div>
</div>
<div class="modal-footer">
@ -380,6 +400,9 @@ async function openAiSettings() {
const data = {
base_url: overlay.querySelector('#aiBaseUrl').value.trim(),
model: overlay.querySelector('#aiModel').value.trim(),
image_provider: overlay.querySelector('#aiImageProvider').value,
image_base_url: overlay.querySelector('#aiImageBaseUrl').value.trim(),
image_model: overlay.querySelector('#aiImageModel').value.trim(),
};
const key = overlay.querySelector('#aiKey').value;
if (key) data.api_key = key;
@ -631,6 +654,9 @@ function updateLayers() {
function generateInnerHTML() {
let html = '';
// A background image (e.g. AI-generated) is the body background in the editor;
// bake it into the published HTML as a full-cover bottom layer so it survives.
if (bgImageDataUrl) html += `<img src="${bgImageDataUrl}" style="position:absolute;inset:0;width:100%;height:100%;object-fit:cover" alt="">`;
elements.forEach((el, i) => {
// Use vw units for font sizes (same as designer preview) so output scales to any viewport
const fs = el.fontSize / 10;

View file

@ -177,6 +177,8 @@ const migrations = [
// known password, must_change_password=1 forces a password change on first
// login. Default 0 so all existing users are unaffected.
"ALTER TABLE users ADD COLUMN must_change_password INTEGER NOT NULL DEFAULT 0",
// #41 Phase 2: which image backend the workspace's image endpoint speaks.
"ALTER TABLE ai_settings ADD COLUMN image_provider TEXT",
];
// Apply each ALTER idempotently. A "duplicate column name" / "already exists"
// error means the column is already present (expected on a migrated DB) - benign.

View file

@ -401,6 +401,7 @@ CREATE TABLE IF NOT EXISTS ai_settings (
model TEXT,
image_base_url TEXT,
image_model TEXT,
image_provider TEXT,
updated_at INTEGER NOT NULL DEFAULT (strftime('%s','now'))
);

109
server/lib/image-gen.js Normal file
View file

@ -0,0 +1,109 @@
'use strict';
// #41 Phase 2: text-to-image for AI signage. Two backends, both BYO/self-hostable:
// - 'comfyui' -> local ComfyUI (SDXL) via its prompt/history/view API
// - 'openai' -> any OpenAI-compatible /images/generations endpoint
// Returns a data: URL (base64 PNG) the Designer can drop straight onto a layer.
// The caller is responsible for the SSRF check on the base URL.
const NEGATIVE = 'text, words, letters, watermark, signature, logo, blurry, low quality, deformed';
function buildComfyWorkflow(prompt, ckpt, width, height, seed) {
return {
'3': { class_type: 'KSampler', inputs: { seed, steps: 25, cfg: 7, sampler_name: 'dpmpp_2m', scheduler: 'karras', denoise: 1, model: ['4', 0], positive: ['6', 0], negative: ['7', 0], latent_image: ['5', 0] } },
'4': { class_type: 'CheckpointLoaderSimple', inputs: { ckpt_name: ckpt } },
'5': { class_type: 'EmptyLatentImage', inputs: { width, height, batch_size: 1 } },
'6': { class_type: 'CLIPTextEncode', inputs: { text: prompt, clip: ['4', 1] } },
'7': { class_type: 'CLIPTextEncode', inputs: { text: NEGATIVE, clip: ['4', 1] } },
'8': { class_type: 'VAEDecode', inputs: { samples: ['3', 0], vae: ['4', 2] } },
'9': { class_type: 'SaveImage', inputs: { filename_prefix: 'signage', images: ['8', 0] } },
};
}
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
async function comfyGenerate(baseUrl, prompt, model, width, height, signal) {
const ckpt = model || 'sd_xl_base_1.0.safetensors';
const seed = Math.floor(Math.random() * 1e15);
const wf = buildComfyWorkflow(prompt, ckpt, width, height, seed);
const sub = await fetch(baseUrl + '/prompt', {
method: 'POST', headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ prompt: wf }), signal,
});
if (!sub.ok) throw new Error(`ComfyUI rejected the job (${sub.status}): ${(await sub.text().catch(() => '')).slice(0, 150)}`);
const { prompt_id } = await sub.json();
if (!prompt_id) throw new Error('ComfyUI did not return a prompt id');
// poll history until this prompt produces an output
for (let i = 0; i < 120; i++) {
if (signal && signal.aborted) throw Object.assign(new Error('aborted'), { name: 'AbortError' });
await sleep(1000);
const h = await fetch(`${baseUrl}/history/${prompt_id}`, { signal }).then((r) => r.json()).catch(() => null);
const entry = h && h[prompt_id];
if (!entry) continue;
const outputs = entry.outputs || {};
for (const nodeId of Object.keys(outputs)) {
const imgs = outputs[nodeId].images;
if (imgs && imgs.length) {
const im = imgs[0];
const q = new URLSearchParams({ filename: im.filename, subfolder: im.subfolder || '', type: im.type || 'output' });
const buf = Buffer.from(await (await fetch(`${baseUrl}/view?${q}`, { signal })).arrayBuffer());
return 'data:image/png;base64,' + buf.toString('base64');
}
}
if (entry.status && entry.status.status_str === 'error') throw new Error('ComfyUI reported a generation error');
}
throw new Error('ComfyUI image timed out');
}
// 'openai' (real cloud) only accepts a fixed set of sizes; 'sdcpp' (local) takes
// exact dimensions and is VRAM-bound, so we keep those small. Both speak the same
// /v1/images/generations API.
function sizeFor(provider, width, height) {
if (provider === 'sdcpp') return `${width}x${height}`;
return width > height ? '1792x1024' : (height > width ? '1024x1792' : '1024x1024');
}
async function openaiCompatGenerate(baseUrl, key, prompt, model, size, signal) {
const body = { prompt, n: 1, size, response_format: 'b64_json' };
if (model) body.model = model; // omit for sd.cpp (uses its loaded checkpoint)
const res = await fetch(baseUrl + '/images/generations', {
method: 'POST',
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` },
body: JSON.stringify(body),
signal,
});
if (!res.ok) throw new Error(`Image endpoint error ${res.status}: ${(await res.text().catch(() => '')).slice(0, 150)}`);
const j = await res.json();
const b64 = j && j.data && j.data[0] && j.data[0].b64_json;
if (b64) return 'data:image/png;base64,' + b64;
const url = j && j.data && j.data[0] && j.data[0].url;
if (url) {
const buf = Buffer.from(await (await fetch(url, { signal })).arrayBuffer());
return 'data:image/png;base64,' + buf.toString('base64');
}
throw new Error('Image endpoint returned no image');
}
// generateImage(opts) -> data URL. opts: { provider, baseUrl, apiKey, model,
// prompt, width, height, timeoutMs }
async function generateImage(opts) {
const { provider, baseUrl, apiKey, model, prompt } = opts;
const width = opts.width || 1024;
const height = opts.height || 1024;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), opts.timeoutMs || 180000);
try {
if (provider === 'openai' || provider === 'sdcpp') {
return await openaiCompatGenerate(baseUrl, apiKey || 'none', prompt, model, sizeFor(provider, width, height), controller.signal);
}
return await comfyGenerate(baseUrl, prompt, model, width, height, controller.signal);
} catch (e) {
if (e.name === 'AbortError') throw new Error('image generation timed out');
throw e;
} finally {
clearTimeout(timer);
}
}
module.exports = { generateImage };

View file

@ -10,6 +10,7 @@ const router = express.Router();
const { db } = require('../db/database');
const config = require('../config');
const { encrypt, decrypt } = require('../lib/secretbox');
const { generateImage } = require('../lib/image-gen');
const { logActivity, getClientIp } = require('../services/activity');
const isWorkspaceAdmin = (req) => req.isPlatformAdmin || req.actingAs || req.workspaceRole === 'workspace_admin';
@ -30,13 +31,19 @@ function endpointAllowed(rawUrl) {
return true;
}
const DESIGN_SYSTEM_PROMPT =
`You are a digital-signage designer. The canvas is 1920x1080 (16:9). Respond with ONLY a JSON object (no prose, no markdown fences) shaped exactly:
{"background":"#RRGGBB","elements":[ELEMENT, ...]}
function designSystemPrompt(imagesAvailable) {
const imgLine = imagesAvailable ? '\n{"type":"image","image_prompt":"DESCRIPTION","x":N,"y":N,"width":N,"height":N}' : '';
const bgImg = imagesAvailable ? '"background_prompt":"DESCRIPTION or omit",' : '';
const imgRules = imagesAvailable
? ' Strongly PREFER a "background_prompt" — a vivid full-bleed atmospheric scene behind everything; this makes the best-looking signs. Only add a foreground "image" element when a specific product/object must appear as a distinct picture. image_prompt / background_prompt describe a PICTURE ONLY and must contain NO words, letters, or text (the AI cannot render text) — all wording goes in text elements layered on top, and pick text colors with strong contrast against the image.'
: '';
return `You are a digital-signage designer. The canvas is 1920x1080 (16:9). Respond with ONLY a JSON object (no prose, no markdown fences) shaped exactly:
{"background":"#RRGGBB",${bgImg}"elements":[ELEMENT, ...]}
ELEMENT is one of:
{"type":"text","x":N,"y":N,"text":"STRING","fontSize":N,"color":"#RRGGBB","bold":true|false}
{"type":"shape","x":N,"y":N,"width":N,"height":N,"color":"#RRGGBB","opacity":N}
x, y, width, height are PERCENTAGES of the canvas (0-100). fontSize is a number where a big headline is about 90 and body text about 36. Use 3 to 6 elements: one bold headline, 1-2 supporting lines, and 0-2 shapes as colored accent bands behind/beside the text. Pick a tasteful, high-contrast palette that fits the request. Keep every element within 0-95 on both axes. Output JSON only.`;
{"type":"shape","x":N,"y":N,"width":N,"height":N,"color":"#RRGGBB","opacity":N}${imgLine}
x, y, width, height are PERCENTAGES of the canvas (0-100). fontSize is a number where a big headline is about 90 and body text about 36. Use 3 to 6 elements: one bold headline, 1-2 supporting lines, and 0-2 shapes as colored accent bands behind/beside the text. Pick a tasteful, high-contrast palette that fits the request. Keep every element within 0-95 on both axes.${imgRules} Output JSON only.`;
}
const clampN = (n, lo, hi, d) => { n = Number(n); return Number.isFinite(n) ? Math.min(hi, Math.max(lo, n)) : d; };
const hex = (c, d) => (typeof c === 'string' && /^#[0-9a-fA-F]{3,8}$/.test(c.trim())) ? c.trim() : d;
@ -66,10 +73,22 @@ function fitText(el) {
// often emit pixels), strip any HTML from text, validate colors, fit to canvas.
function normalizeDesign(raw) {
const out = { background: hex(raw && raw.background, '#111827'), elements: [] };
const bgPrompt = cleanText(raw && raw.background_prompt);
if (bgPrompt) out.background_prompt = bgPrompt;
const els = Array.isArray(raw && raw.elements) ? raw.elements.slice(0, 20) : [];
for (const e of els) {
if (!e || typeof e !== 'object') continue;
if (e.type === 'text') {
if (e.type === 'image') {
const prompt = cleanText(e.image_prompt || e.prompt);
if (!prompt) continue;
const w = clampN(e.width, 5, 100, 30), h = clampN(e.height, 5, 100, 40);
out.elements.push({
type: 'image', image_prompt: prompt,
x: Math.min(clampN(e.x, 0, 100, 60), 100 - w),
y: Math.min(clampN(e.y, 0, 100, 30), 100 - h),
width: w, height: h,
});
} else if (e.type === 'text') {
const text = cleanText(e.text);
if (!text) continue;
const el = {
@ -96,12 +115,13 @@ function normalizeDesign(raw) {
}
}
// De-overlap text lines (models stack them at the same y) and order shapes
// behind text so accent bands never hide the words.
// De-overlap text lines (models stack them at the same y) and stack layers so
// text is always on top: shapes (back) -> images (mid) -> text (front).
const shapes = out.elements.filter((e) => e.type === 'shape');
const images = out.elements.filter((e) => e.type === 'image').slice(0, 2);
const texts = out.elements.filter((e) => e.type === 'text');
deoverlapTexts(texts);
out.elements = [...shapes, ...texts];
out.elements = [...shapes, ...images, ...texts];
return out;
}
@ -136,14 +156,16 @@ function deoverlapTexts(texts) {
// GET /api/ai/settings — workspace members (never returns the key)
router.get('/settings', (req, res) => {
const row = db.prepare('SELECT base_url, model, image_base_url, image_model, api_key_enc FROM ai_settings WHERE workspace_id = ?').get(req.workspaceId);
const row = db.prepare('SELECT base_url, model, image_base_url, image_model, image_provider, api_key_enc FROM ai_settings WHERE workspace_id = ?').get(req.workspaceId);
res.json({
base_url: row ? row.base_url || '' : '',
model: row ? row.model || '' : '',
image_base_url: row ? row.image_base_url || '' : '',
image_model: row ? row.image_model || '' : '',
image_provider: row ? row.image_provider || '' : '',
has_key: !!(row && row.api_key_enc),
configured: !!(row && row.base_url && row.model),
image_configured: !!(row && row.image_base_url && row.image_provider),
});
});
@ -154,6 +176,7 @@ router.put('/settings', (req, res) => {
const model = String(req.body && req.body.model || '').trim();
const image_base_url = String(req.body && req.body.image_base_url || '').trim().replace(/\/+$/, '');
const image_model = String(req.body && req.body.image_model || '').trim();
const image_provider = ['comfyui', 'openai', 'sdcpp'].includes(req.body && req.body.image_provider) ? req.body.image_provider : null;
if (base_url && !endpointAllowed(base_url)) return res.status(400).json({ error: 'Endpoint URL not allowed (private/internal addresses are blocked on this instance).' });
if (image_base_url && !endpointAllowed(image_base_url)) return res.status(400).json({ error: 'Image endpoint URL not allowed.' });
@ -163,11 +186,12 @@ router.put('/settings', (req, res) => {
if (req.body && req.body.clear_key) api_key_enc = null;
db.prepare(`
INSERT INTO ai_settings (workspace_id, base_url, api_key_enc, model, image_base_url, image_model, updated_at)
VALUES (?, ?, ?, ?, ?, ?, strftime('%s','now'))
INSERT INTO ai_settings (workspace_id, base_url, api_key_enc, model, image_base_url, image_model, image_provider, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, strftime('%s','now'))
ON CONFLICT(workspace_id) DO UPDATE SET base_url=excluded.base_url, api_key_enc=excluded.api_key_enc,
model=excluded.model, image_base_url=excluded.image_base_url, image_model=excluded.image_model, updated_at=excluded.updated_at
`).run(req.workspaceId, base_url || null, api_key_enc, model || null, image_base_url || null, image_model || null);
model=excluded.model, image_base_url=excluded.image_base_url, image_model=excluded.image_model,
image_provider=excluded.image_provider, updated_at=excluded.updated_at
`).run(req.workspaceId, base_url || null, api_key_enc, model || null, image_base_url || null, image_model || null, image_provider);
logActivity(req.user.id, 'ai_settings_update', `endpoint: ${base_url || '(none)'} model: ${model || '(none)'}`, null, getClientIp(req), req.workspaceId);
res.json({ ok: true });
});
@ -203,10 +227,13 @@ router.post('/generate-design', async (req, res) => {
const prompt = String(req.body && req.body.prompt || '').trim().slice(0, 500);
if (!prompt) return res.status(400).json({ error: 'Prompt required' });
const row = db.prepare('SELECT base_url, api_key_enc, model FROM ai_settings WHERE workspace_id = ?').get(req.workspaceId);
const row = db.prepare('SELECT base_url, api_key_enc, model, image_base_url, image_model, image_provider FROM ai_settings WHERE workspace_id = ?').get(req.workspaceId);
if (!row || !row.base_url || !row.model) return res.status(400).json({ error: 'AI is not configured. Set an endpoint and model in AI settings first.' });
if (!endpointAllowed(row.base_url)) return res.status(400).json({ error: 'Configured endpoint is not allowed.' });
const imgBase = row.image_base_url ? row.image_base_url.replace(/\/+$/, '') : '';
const imagesAvailable = !!(imgBase && row.image_provider && endpointAllowed(imgBase));
const key = decrypt(row.api_key_enc) || 'none';
const url = row.base_url.replace(/\/+$/, '') + '/chat/completions';
const controller = new AbortController();
@ -218,7 +245,7 @@ router.post('/generate-design', async (req, res) => {
headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${key}` },
body: JSON.stringify({
model: row.model, temperature: 0.6, stream: false,
messages: [{ role: 'system', content: DESIGN_SYSTEM_PROMPT }, { role: 'user', content: prompt }],
messages: [{ role: 'system', content: designSystemPrompt(imagesAvailable) }, { role: 'user', content: prompt }],
}),
signal: controller.signal,
});
@ -240,8 +267,32 @@ router.post('/generate-design', async (req, res) => {
parsed = JSON.parse(m ? m[0] : content);
} catch { return res.status(502).json({ error: 'AI did not return a usable design. Try rephrasing.' }); }
const design = normalizeDesign(parsed);
if (!design.elements.length) return res.status(502).json({ error: 'AI returned an empty design. Try a more specific prompt.' });
logActivity(req.user.id, 'ai_generate_design', `prompt: ${prompt.slice(0, 80)}`, null, getClientIp(req), req.workspaceId);
if (!design.elements.length && !design.background_prompt) return res.status(502).json({ error: 'AI returned an empty design. Try a more specific prompt.' });
// Phase 2: generate the AI background + foreground images (best-effort: a failed
// image never fails the whole design — the text/shapes still come back).
const imageEls = design.elements.filter((e) => e.type === 'image');
if (imagesAvailable && (design.background_prompt || imageEls.length)) {
const common = { provider: row.image_provider, baseUrl: imgBase, apiKey: key, model: row.image_model, timeoutMs: 180000 };
const jobs = [];
if (design.background_prompt) {
jobs.push(generateImage({ ...common, prompt: design.background_prompt, width: 1024, height: 576 })
.then((src) => { design.backgroundImage = src; })
.catch((e) => { design.image_warning = 'Background image failed: ' + e.message; }));
}
for (const el of imageEls) {
jobs.push(generateImage({ ...common, prompt: el.image_prompt, width: 768, height: 768 })
.then((src) => { el.src = src; })
.catch(() => { el._failed = true; }));
}
await Promise.all(jobs);
}
// drop image elements that never got a src (no endpoint, or generation failed)
design.elements = design.elements.filter((e) => e.type !== 'image' || e.src);
design.elements.forEach((e) => { delete e.image_prompt; delete e._failed; });
delete design.background_prompt;
logActivity(req.user.id, 'ai_generate_design', `prompt: ${prompt.slice(0, 80)}${imagesAvailable ? ' (+images)' : ''}`, null, getClientIp(req), req.workspaceId);
res.json(design);
});

View file

@ -137,7 +137,10 @@ app.use(cors({
const stripeRouter = require('./routes/stripe');
app.post('/api/stripe/webhook', express.raw({ type: 'application/json' }), stripeRouter);
app.use(express.json());
// 12mb so AI-designed signs with embedded generated images (base64 data URLs)
// can be published. #41 follow-up: upload generated images to the content store
// and reference by URL instead of embedding, to keep widget configs small.
app.use(express.json({ limit: '12mb' }));
const { sanitizeBody } = require('./middleware/sanitize');
app.use(sanitizeBody);