diff --git a/CHANGELOG.md b/CHANGELOG.md index 3df571d5e..32a565fc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added language model input-modality and document capability resolution, automatically resolved from the models.dev catalog (falls back to text-only for uncatalogued/self-hosted models). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) - [EE] Added DPoP sender-constrained OAuth tokens for MCP clients. [#1395](https://github.com/sourcebot-dev/sourcebot/pull/1395) - [EE] Added text file attachments to Ask Sourcebot, letting users attach text/code/config files to a chat message via the paperclip button, drag-and-drop, or paste, with large pastes auto-converted to attachments. [#1374](https://github.com/sourcebot-dev/sourcebot/pull/1374) +- [EE] Added image attachments to Ask Sourcebot, letting users attach images to a chat message when the selected model supports image input. [#1375](https://github.com/sourcebot-dev/sourcebot/pull/1375) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) diff --git a/docs/docs/configuration/environment-variables.mdx b/docs/docs/configuration/environment-variables.mdx index 2d4c57acb..68e94c19b 100644 --- a/docs/docs/configuration/environment-variables.mdx +++ b/docs/docs/configuration/environment-variables.mdx @@ -40,6 +40,8 @@ The following environment variables allow you to configure your Sourcebot deploy | `SOURCEBOT_TELEMETRY_DISABLED` | `false` |

Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/misc/telemetry) for more info.

| | `DEFAULT_MAX_MATCH_COUNT` | `10000` |

The default maximum number of search results to return when using search in the web app.

| | `ALWAYS_INDEX_FILE_PATTERNS` | - |

A comma separated list of glob patterns matching file paths that should always be indexed, regardless of size or number of trigrams.

| +| `SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES` | `10485760` (10 MiB) |

Maximum size in bytes of a single image attachment uploaded to Ask Sourcebot. Enforced server-side at upload time.

| +| `SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS` | `24` |

How long in hours an uploaded-but-unsent attachment is retained before being deleted by the orphan sweep. Set to `0` to disable the sweep.

| | `NODE_USE_ENV_PROXY` | `0` |

Enables Node.js to automatically use `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` environment variables for network requests. Set to `1` to enable or `0` to disable. See [this doc](https://nodejs.org/en/learn/http/enterprise-network-configuration) for more info.

| | `HTTP_PROXY` | - |

HTTP proxy URL for routing non-SSL requests through a proxy server (e.g., `http://proxy.company.com:8080`). Requires `NODE_USE_ENV_PROXY=1`.

| | `HTTPS_PROXY` | - |

HTTPS proxy URL for routing SSL requests through a proxy server (e.g., `http://proxy.company.com:8080`). Requires `NODE_USE_ENV_PROXY=1`.

| diff --git a/packages/backend/src/attachmentPruner.ts b/packages/backend/src/attachmentPruner.ts new file mode 100644 index 000000000..ffeb534c3 --- /dev/null +++ b/packages/backend/src/attachmentPruner.ts @@ -0,0 +1,156 @@ +import { AttachmentStatus, PrismaClient } from "@sourcebot/db"; +import { createLogger, env, getStorageBackend } from "@sourcebot/shared"; +import { setIntervalAsync } from "./utils.js"; + +const BATCH_SIZE = 1_000; +const ONE_HOUR_MS = 60 * 60 * 1000; + +const logger = createLogger('attachment-pruner'); + +/** + * Periodically reclaims orphaned attachment blobs older than the configured TTL, + * along with their stored bytes, using the `DELETING` tombstone protocol: an + * orphan is first atomically flipped to `DELETING`, then its bytes are deleted, + * and only then is the row removed. Because the row (the only durable handle to + * the bytes) outlives the byte delete, a failed byte delete is always retryable. + * + * Each tick condemns two classes of orphan to `DELETING`, then reclaims all + * tombstones: + * + * 1. PENDING (uploaded-but-never-linked): produced when a user selects a file + * in the chat box but never sends the message. + * 2. COMMITTED with zero links: normally a committed blob is reclaimed inline + * by the chat-delete sweep in the web app, but if that sweep is interrupted + * (process crash / DB error / failed byte delete) the blob is left tombstoned + * or unlinked. This is the backstop for that case. + * + * @note Byte deletion goes through the shared `StorageBackend`, so the web app + * and this worker share one on-disk layout. + */ +export class AttachmentPruner { + private interval?: NodeJS.Timeout; + private readonly storage = getStorageBackend(); + + constructor(private db: PrismaClient) {} + + startScheduler() { + const ttlHours = env.SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS; + if (ttlHours <= 0) { + logger.info('SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS is 0, attachment orphan pruning is disabled.'); + return; + } + + logger.debug(`Attachment pruner started. Reclaiming orphaned attachments older than ${ttlHours} hours.`); + + // Run immediately on startup, then every hour. The startup call isn't + // awaited, so log any failure here: this worker exits on + // unhandledRejection, and the recurring schedule will retry. + this.pruneOrphanedAttachments().catch((error) => { + logger.warn(`Initial attachment prune failed: ${error}`); + }); + this.interval = setIntervalAsync(() => this.pruneOrphanedAttachments(), ONE_HOUR_MS); + } + + async dispose() { + if (this.interval) { + clearInterval(this.interval); + this.interval = undefined; + } + } + + private async pruneOrphanedAttachments() { + const cutoff = new Date(Date.now() - env.SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS * ONE_HOUR_MS); + + // Condemn orphans by flipping them to the DELETING tombstone. Each claim + // is atomic, so a PENDING blob committed by a concurrent send (its commit + // matches only PENDING rows) or a zero-link blob re-linked by a concurrent + // duplicate-chat loses the claim and is left intact. + // + // PENDING orphans: uploaded but the message was never sent. + const pendingClaimed = await this.db.attachment.updateMany({ + where: { + status: AttachmentStatus.PENDING, + createdAt: { lt: cutoff }, + }, + data: { status: AttachmentStatus.DELETING }, + }); + + // COMMITTED orphans: blobs left with zero links by an interrupted + // chat-delete sweep in the web app. + const committedClaimed = await this.db.attachment.updateMany({ + where: { + status: AttachmentStatus.COMMITTED, + createdAt: { lt: cutoff }, + chats: { none: {} }, + }, + data: { status: AttachmentStatus.DELETING }, + }); + + // Reclaim every tombstone: delete bytes, then the row. This also picks up + // tombstones left behind by the web app's inline reclaim (or a crashed + // earlier tick) whose byte delete failed. + const reclaimed = await this.reclaimTombstonedAttachments(); + + if (pendingClaimed.count > 0 || committedClaimed.count > 0 || reclaimed > 0) { + logger.debug( + `Attachment prune: condemned ${pendingClaimed.count} PENDING + ` + + `${committedClaimed.count} COMMITTED orphan(s), reclaimed ${reclaimed} tombstone(s).`, + ); + } + } + + /** + * Deletes the bytes for every `DELETING` tombstone, then removes the row. + * The row (the only durable handle to the bytes) is removed only after its + * bytes are confirmed gone, so a failed byte delete leaves the tombstone in + * place to be retried on the next tick — bytes can never be orphaned by a + * transient storage error. Rows whose byte delete fails this run are + * excluded from subsequent batches so a persistent failure can't spin the + * loop. + * + * @returns the number of tombstones fully reclaimed (bytes + row). + */ + private async reclaimTombstonedAttachments(): Promise { + let totalReclaimed = 0; + const failedIds: string[] = []; + + while (true) { + const batch = await this.db.attachment.findMany({ + where: { status: AttachmentStatus.DELETING, id: { notIn: failedIds } }, + select: { id: true, storageKey: true }, + take: BATCH_SIZE, + }); + + if (batch.length === 0) { + break; + } + + const settled = await Promise.allSettled( + batch.map((attachment) => this.storage.delete(attachment.storageKey))); + + const reclaimedIds: string[] = []; + batch.forEach((attachment, index) => { + const outcome = settled[index]; + if (outcome.status === 'fulfilled') { + reclaimedIds.push(attachment.id); + } else { + logger.warn(`Failed to delete bytes for tombstoned attachment ${attachment.id}, will retry next tick: ${outcome.reason}`); + failedIds.push(attachment.id); + } + }); + + if (reclaimedIds.length > 0) { + const result = await this.db.attachment.deleteMany({ + where: { id: { in: reclaimedIds }, status: AttachmentStatus.DELETING }, + }); + totalReclaimed += result.count; + } + + if (batch.length < BATCH_SIZE) { + break; + } + } + + return totalReclaimed; + } +} diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index 4b668a996..b97fe248f 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -8,6 +8,7 @@ import 'express-async-errors'; import { existsSync } from 'fs'; import { mkdir } from 'fs/promises'; import { Api } from "./api.js"; +import { AttachmentPruner } from "./attachmentPruner.js"; import { ConfigManager } from "./configManager.js"; import { ConnectionManager } from './connectionManager.js'; import { INDEX_CACHE_DIR, REPOS_CACHE_DIR, SHUTDOWN_SIGNALS } from './constants.js'; @@ -55,10 +56,12 @@ const accountPermissionSyncer = new AccountPermissionSyncer(prisma, settings, re const repoIndexManager = new RepoIndexManager(prisma, settings, redis, promClient); const configManager = new ConfigManager(prisma, connectionManager, env.CONFIG_PATH); const auditLogPruner = new AuditLogPruner(prisma); +const attachmentPruner = new AttachmentPruner(prisma); connectionManager.startScheduler(); await repoIndexManager.startScheduler(); auditLogPruner.startScheduler(); +attachmentPruner.startScheduler(); if (env.PERMISSION_SYNC_ENABLED === 'true' && !await hasEntitlement('permission-syncing')) { logger.warn('Permission syncing is not supported in current plan. Please contact team@sourcebot.dev for assistance.'); @@ -99,6 +102,7 @@ const listenToShutdownSignals = () => { await repoPermissionSyncer.dispose() await accountPermissionSyncer.dispose() await auditLogPruner.dispose() + await attachmentPruner.dispose() await configManager.dispose() await prisma.$disconnect(); diff --git a/packages/db/prisma/migrations/20260629200000_add_chat_attachments/migration.sql b/packages/db/prisma/migrations/20260629200000_add_chat_attachments/migration.sql new file mode 100644 index 000000000..55bbab471 --- /dev/null +++ b/packages/db/prisma/migrations/20260629200000_add_chat_attachments/migration.sql @@ -0,0 +1,49 @@ +-- CreateEnum +CREATE TYPE "AttachmentStatus" AS ENUM ('PENDING', 'COMMITTED'); + +-- CreateTable +CREATE TABLE "Attachment" ( + "id" TEXT NOT NULL, + "orgId" INTEGER NOT NULL, + "storageKey" TEXT NOT NULL, + "filename" TEXT NOT NULL, + "mediaType" TEXT NOT NULL, + "sizeBytes" INTEGER NOT NULL, + "checksum" TEXT NOT NULL, + "uploadedById" TEXT, + "status" "AttachmentStatus" NOT NULL DEFAULT 'PENDING', + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "Attachment_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "ChatAttachment" ( + "id" TEXT NOT NULL, + "chatId" TEXT NOT NULL, + "attachmentId" TEXT NOT NULL, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "ChatAttachment_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "Attachment_status_createdAt_idx" ON "Attachment"("status", "createdAt"); + +-- CreateIndex +CREATE INDEX "ChatAttachment_attachmentId_idx" ON "ChatAttachment"("attachmentId"); + +-- CreateIndex +CREATE UNIQUE INDEX "ChatAttachment_chatId_attachmentId_key" ON "ChatAttachment"("chatId", "attachmentId"); + +-- AddForeignKey +ALTER TABLE "Attachment" ADD CONSTRAINT "Attachment_orgId_fkey" FOREIGN KEY ("orgId") REFERENCES "Org"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "Attachment" ADD CONSTRAINT "Attachment_uploadedById_fkey" FOREIGN KEY ("uploadedById") REFERENCES "User"("id") ON DELETE SET NULL ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "ChatAttachment" ADD CONSTRAINT "ChatAttachment_chatId_fkey" FOREIGN KEY ("chatId") REFERENCES "Chat"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "ChatAttachment" ADD CONSTRAINT "ChatAttachment_attachmentId_fkey" FOREIGN KEY ("attachmentId") REFERENCES "Attachment"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/packages/db/prisma/migrations/20260629210000_add_attachment_deleting_status/migration.sql b/packages/db/prisma/migrations/20260629210000_add_attachment_deleting_status/migration.sql new file mode 100644 index 000000000..f2b723929 --- /dev/null +++ b/packages/db/prisma/migrations/20260629210000_add_attachment_deleting_status/migration.sql @@ -0,0 +1,2 @@ +-- AlterEnum +ALTER TYPE "AttachmentStatus" ADD VALUE 'DELETING'; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index 014526907..250d67c9d 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -24,6 +24,20 @@ enum ChatVisibility { PUBLIC } +/// Lifecycle status of an uploaded attachment blob. +/// PENDING: uploaded but not yet linked to a chat (orphan until a message +/// referencing it is sent). COMMITTED: linked to at least one chat. +/// DELETING: condemned tombstone. The row is kept (as the only durable handle +/// to the bytes) until the stored bytes are confirmed deleted, at which point +/// the row is removed. A failed byte delete leaves the row DELETING for the +/// attachment pruner's reclaim sweep to retry, so a transient storage error +/// can never orphan bytes. +enum AttachmentStatus { + PENDING + COMMITTED + DELETING +} + /// @note: The @map annotation is required to maintain backwards compatibility /// with the existing database. /// @note: In the generated client, these mapped values will be in pascalCase. @@ -272,6 +286,7 @@ model Org { connections Connection[] repos Repo[] apiKeys ApiKey[] + attachments Attachment[] isOnboarded Boolean @default(false) imageUrl String? @@ -456,6 +471,7 @@ model User { chats Chat[] sharedChats ChatAccess[] repoVisits RepoVisit[] + uploadedAttachments Attachment[] oauthTokens OAuthToken[] oauthAuthCodes OAuthAuthorizationCode[] @@ -608,6 +624,69 @@ model Chat { messages Json // This is a JSON array of `Message` types from @ai-sdk/ui-utils. sharedWith ChatAccess[] + + attachments ChatAttachment[] +} + +/// A user-uploaded binary attachment blob (e.g. an image). The bytes live in +/// the configured StorageBackend (keyed by `storageKey`), never in the DB. +/// Attachments are NOT chat-bound: they are uploaded before any chat +/// association exists, and linked to chats via `ChatAttachment`. Permissions +/// are derived entirely from the linked chat(s); there are no independent ACLs. +model Attachment { + id String @id @default(cuid()) + + org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) + orgId Int + + /// Opaque key the StorageBackend uses to locate the bytes. + storageKey String + + /// Original (sanitized) filename supplied by the uploader. + filename String + + /// Final media type of the stored bytes (validated by decoding at upload). + mediaType String + + /// Size of the stored bytes. + sizeBytes Int + + /// Hex SHA-256 of the stored bytes (integrity / debugging; not used for dedup). + checksum String + + /// The user who uploaded this blob. Uploads require authentication, so this + /// is set at creation (anonymous users cannot upload binary attachments). It + /// is nulled if the uploader is later deleted, so committed attachments + /// survive on the chats they're linked to. + uploadedBy User? @relation(fields: [uploadedById], references: [id], onDelete: SetNull) + uploadedById String? + + status AttachmentStatus @default(PENDING) + + createdAt DateTime @default(now()) + + chats ChatAttachment[] + + @@index([status, createdAt]) +} + +/// Join table linking an `Attachment` blob to a `Chat`. This is the linker +/// that makes chat duplication metadata-only (no byte copy) and keeps +/// attachment access purely chat-derived. Deleting a chat cascades these rows; +/// a separate sweep deletes `Attachment`s left with zero links (and their bytes). +model ChatAttachment { + id String @id @default(cuid()) + + chat Chat @relation(fields: [chatId], references: [id], onDelete: Cascade) + chatId String + + attachment Attachment @relation(fields: [attachmentId], references: [id], onDelete: Cascade) + attachmentId String + + createdAt DateTime @default(now()) + + @@unique([chatId, attachmentId]) + @@index([attachmentId]) } /// Represents a user's access to a chat that has been shared with them. diff --git a/packages/shared/src/env.server.ts b/packages/shared/src/env.server.ts index 57c83f9f0..b832bce4f 100644 --- a/packages/shared/src/env.server.ts +++ b/packages/shared/src/env.server.ts @@ -321,6 +321,23 @@ const options = { SOURCEBOT_CHAT_PROMPT_CACHE_BREAK_DETECTION_ENABLED: booleanSchema.default('false'), SOURCEBOT_MCP_TOOL_CALL_TIMEOUT_MS: numberSchema.int().positive().max(maxTimerDelayMs).default(60000), + /** + * Maximum size (in bytes) of a single image attachment uploaded to the + * Ask chat. Enforced server-side at upload time. Distinct from the + * inline-text cap (which lives as a web-package constant). + * @default 10 MiB + */ + SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES: numberSchema.int().positive().default(10 * 1024 * 1024), + + /** + * How long (in hours) an uploaded-but-unlinked (PENDING) attachment + * blob is retained before the orphan sweep deletes it and its bytes. + * Covers "select a file then never send" abandonment. Set to 0 to + * disable the orphan sweep entirely. + * @default 24 hours + */ + SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS: numberSchema.int().nonnegative().default(24), + DEBUG_WRITE_CHAT_MESSAGES_TO_FILE: booleanSchema.default('false'), DEBUG_ENABLE_REACT_SCAN: booleanSchema.default('false'), DEBUG_ENABLE_REACT_GRAB: booleanSchema.default('false'), diff --git a/packages/shared/src/index.server.ts b/packages/shared/src/index.server.ts index 6a09b9340..63049fe91 100644 --- a/packages/shared/src/index.server.ts +++ b/packages/shared/src/index.server.ts @@ -75,6 +75,13 @@ export { export { createRedisClient, } from "./redis.js"; +export { + getStorageBackend, + LocalFsStorageBackend, +} from "./storage.js"; +export type { + StorageBackend, +} from "./storage.js"; export { SOURCEBOT_VERSION, } from "./version.js"; diff --git a/packages/shared/src/storage.ts b/packages/shared/src/storage.ts new file mode 100644 index 000000000..a8c13983b --- /dev/null +++ b/packages/shared/src/storage.ts @@ -0,0 +1,103 @@ +import { createReadStream as fsCreateReadStream } from 'fs'; +import fs from 'fs/promises'; +import path from 'path'; +import { Readable } from 'stream'; +import { env } from './env.server.js'; + +/** + * App-mediated storage for binary chat attachments. The application always + * brokers access (no public URLs); callers resolve permissions via the chat + * linker before reading. A `LocalFsStorageBackend` is provided; additional + * drivers (e.g. S3) can implement the same contract. + * + * @note Lives in the shared package so the web app (upload/serve) and the + * backend worker (orphan pruning) share one implementation and one on-disk + * layout. + */ +export interface StorageBackend { + /** Writes the bytes for `key`, overwriting any existing object. */ + put(key: string, data: Buffer): Promise; + /** Reads the full bytes for `key`. Throws if the object is missing. */ + get(key: string): Promise; + /** + * Returns the object's byte size, or `undefined` if it is missing. Lets the + * serving route detect a missing object before committing response headers. + */ + stat(key: string): Promise<{ sizeBytes: number } | undefined>; + /** Opens a Node read stream for `key` (used by the serving route). */ + createReadStream(key: string): Readable; + /** Deletes the object for `key`. A missing object is not an error. */ + delete(key: string): Promise; +} + +/** + * Stores attachment bytes on the local filesystem under + * `DATA_CACHE_DIR/attachments`. Keys are opaque, server-generated strings; the + * backend defensively rejects any key that would resolve outside the base + * directory (path-traversal guard), even though keys are not client-controlled. + */ +export class LocalFsStorageBackend implements StorageBackend { + private readonly baseDir: string; + + constructor(baseDir: string = path.join(env.DATA_CACHE_DIR, 'attachments')) { + this.baseDir = path.resolve(baseDir); + } + + private resolveKey(key: string): string { + const resolved = path.resolve(this.baseDir, key); + // Defense-in-depth: never let a key escape the base directory. + if (resolved !== this.baseDir && !resolved.startsWith(this.baseDir + path.sep)) { + throw new Error(`Invalid storage key: ${key}`); + } + return resolved; + } + + async put(key: string, data: Buffer): Promise { + const filePath = this.resolveKey(key); + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await fs.writeFile(filePath, data); + } + + async get(key: string): Promise { + return fs.readFile(this.resolveKey(key)); + } + + async stat(key: string): Promise<{ sizeBytes: number } | undefined> { + try { + const { size } = await fs.stat(this.resolveKey(key)); + return { sizeBytes: size }; + } catch (error) { + if ((error as NodeJS.ErrnoException)?.code === 'ENOENT') { + return undefined; + } + throw error; + } + } + + createReadStream(key: string): Readable { + return fsCreateReadStream(this.resolveKey(key)); + } + + async delete(key: string): Promise { + try { + await fs.unlink(this.resolveKey(key)); + } catch (error) { + if ((error as NodeJS.ErrnoException)?.code !== 'ENOENT') { + throw error; + } + } + } +} + +let storageBackend: StorageBackend | undefined; + +/** + * Returns the process-wide StorageBackend singleton. Currently always the + * local-FS backend; driver selection (e.g. S3) could be made config-driven. + */ +export const getStorageBackend = (): StorageBackend => { + if (!storageBackend) { + storageBackend = new LocalFsStorageBackend(); + } + return storageBackend; +}; diff --git a/packages/web/src/app/(app)/askgh/[owner]/[repo]/components/landingPage.tsx b/packages/web/src/app/(app)/askgh/[owner]/[repo]/components/landingPage.tsx index afc7af4c3..29cedbb71 100644 --- a/packages/web/src/app/(app)/askgh/[owner]/[repo]/components/landingPage.tsx +++ b/packages/web/src/app/(app)/askgh/[owner]/[repo]/components/landingPage.tsx @@ -21,6 +21,7 @@ interface LandingPageProps { imageUrl?: string | null; repoId: number; isAuthenticated: boolean; + maxImageBytes: number; } export const LandingPage = ({ @@ -30,6 +31,7 @@ export const LandingPage = ({ imageUrl, repoId, isAuthenticated, + maxImageBytes, }: LandingPageProps) => { const { createNewChatThread, isLoading } = useCreateNewChatThread(); const [isContextSelectorOpen, setIsContextSelectorOpen] = useState(false); @@ -87,6 +89,7 @@ export const LandingPage = ({ isDisabled={isChatBoxDisabled} isAuthenticated={isAuthenticated} isLoginWallEnabled={true} + maxImageBytes={maxImageBytes} />
diff --git a/packages/web/src/app/(app)/askgh/[owner]/[repo]/page.tsx b/packages/web/src/app/(app)/askgh/[owner]/[repo]/page.tsx index dd1dd95c3..37e49ce5a 100644 --- a/packages/web/src/app/(app)/askgh/[owner]/[repo]/page.tsx +++ b/packages/web/src/app/(app)/askgh/[owner]/[repo]/page.tsx @@ -10,6 +10,7 @@ import { getConfiguredLanguageModelsInfo } from "@/features/chat/utils.server"; import { auth } from "@/auth"; import { hasEntitlement } from "@/lib/entitlements"; import { ChatEntitlementMessage } from "@/features/chat/components/chatEntitlementMessage"; +import { env } from "@sourcebot/shared"; interface PageProps { params: Promise<{ owner: string; repo: string }>; @@ -68,6 +69,7 @@ export default async function GitHubRepoPage(props: PageProps) { imageUrl={repoInfo.imageUrl ?? undefined} repoId={repoInfo.id} isAuthenticated={!!session?.user} + maxImageBytes={env.SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES} /> diff --git a/packages/web/src/app/(app)/chat/[id]/page.tsx b/packages/web/src/app/(app)/chat/[id]/page.tsx index e49d359b8..a16b2a93f 100644 --- a/packages/web/src/app/(app)/chat/[id]/page.tsx +++ b/packages/web/src/app/(app)/chat/[id]/page.tsx @@ -179,6 +179,7 @@ export default async function Page(props: PageProps) { isOwner={isOwner} isAuthenticated={!!session} isLoginWallEnabled={env.EXPERIMENT_ASK_GH_ENABLED === 'true'} + maxImageBytes={env.SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES} chatName={name ?? undefined} />
diff --git a/packages/web/src/app/(app)/chat/chatLandingPage.tsx b/packages/web/src/app/(app)/chat/chatLandingPage.tsx index f81e27247..153a044a2 100644 --- a/packages/web/src/app/(app)/chat/chatLandingPage.tsx +++ b/packages/web/src/app/(app)/chat/chatLandingPage.tsx @@ -71,6 +71,7 @@ export async function ChatLandingPage() { searchContexts={searchContexts} isAuthenticated={!!session} isLoginWallEnabled={env.EXPERIMENT_ASK_GH_ENABLED === 'true'} + maxImageBytes={env.SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES} /> diff --git a/packages/web/src/app/(app)/chat/components/landingPageChatBox.tsx b/packages/web/src/app/(app)/chat/components/landingPageChatBox.tsx index 61f78bf4d..f9349dc92 100644 --- a/packages/web/src/app/(app)/chat/components/landingPageChatBox.tsx +++ b/packages/web/src/app/(app)/chat/components/landingPageChatBox.tsx @@ -19,6 +19,7 @@ interface LandingPageChatBox { searchContexts: SearchContextQuery[]; isAuthenticated: boolean; isLoginWallEnabled: boolean; + maxImageBytes: number; } export const LandingPageChatBox = ({ @@ -27,6 +28,7 @@ export const LandingPageChatBox = ({ searchContexts, isAuthenticated, isLoginWallEnabled, + maxImageBytes, }: LandingPageChatBox) => { const { createNewChatThread, isLoading } = useCreateNewChatThread(); const [selectedSearchScopes, setSelectedSearchScopes] = useLocalStorage(SELECTED_SEARCH_SCOPES_LOCAL_STORAGE_KEY, [], { initializeWithValue: false }); @@ -50,6 +52,7 @@ export const LandingPageChatBox = ({ isDisabled={isChatBoxDisabled} isAuthenticated={isAuthenticated} isLoginWallEnabled={isLoginWallEnabled} + maxImageBytes={maxImageBytes} />
diff --git a/packages/web/src/app/api/(server)/ee/chat/[chatId]/attachments/[attachmentId]/route.ts b/packages/web/src/app/api/(server)/ee/chat/[chatId]/attachments/[attachmentId]/route.ts new file mode 100644 index 000000000..d5adc4c17 --- /dev/null +++ b/packages/web/src/app/api/(server)/ee/chat/[chatId]/attachments/[attachmentId]/route.ts @@ -0,0 +1,123 @@ +import { sew } from "@/middleware/sew"; +import { apiHandler } from "@/lib/apiHandler"; +import { notFound, serviceErrorResponse } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { withOptionalAuth } from "@/middleware/withAuth"; +import { checkAskEntitlement, resolveChatAccess } from "@/features/chat/utils.server"; +import { getStorageBackend } from "@sourcebot/shared"; +import { NextRequest } from "next/server"; +import { Readable } from "stream"; + +/** + * Serves the bytes of a binary attachment. Two access paths: + * + * 1. The uploader can always read their own bytes. This covers the brief + * pre-commit window right after sending (before the `ChatAttachment` link + * exists), so a just-sent image renders straight from this route. + * 2. Everyone else gets purely chat-derived access: they must be able to view + * the chat (owner / shared / public) AND a `ChatAttachment(chatId, + * attachmentId)` link must exist. The link requirement is what makes chat + * duplication safe (the same blob can be served only through chats it is + * actually linked to). + */ +export const GET = apiHandler(async ( + _req: NextRequest, + { params }: { params: Promise<{ chatId: string; attachmentId: string }> }, +) => { + const { chatId, attachmentId } = await params; + + const response = await sew(() => + withOptionalAuth(async ({ org, user, prisma }) => { + const askError = await checkAskEntitlement(); + if (askError) { + return askError; + } + + const chat = await prisma.chat.findUnique({ + where: { + id: chatId, + orgId: org.id, + }, + }); + + if (!chat) { + return notFound(); + } + + const attachment = await prisma.attachment.findFirst({ + where: { id: attachmentId, orgId: org.id }, + }); + + if (!attachment) { + return notFound(); + } + + // Path 1: the uploader can always read their own bytes (pre-commit + // preview window included). + const isUploader = user !== undefined && attachment.uploadedById === user.id; + + if (!isUploader) { + // Path 2: chat-derived access. The caller must be able to view + // the chat AND the attachment must be linked to THIS chat, so a + // blob can never be served via a chat it isn't linked to. + const { canView } = await resolveChatAccess({ prisma, chat, user }); + if (!canView) { + return notFound(); + } + + const link = await prisma.chatAttachment.findUnique({ + where: { + chatId_attachmentId: { chatId, attachmentId }, + }, + select: { id: true }, + }); + + if (!link) { + return notFound(); + } + } + + return { attachment }; + }) + ); + + if (isServiceError(response)) { + return serviceErrorResponse(response); + } + + const { attachment } = response; + const storage = getStorageBackend(); + + // Confirm the bytes exist before committing headers; a missing object would + // otherwise surface as a stream error after a 200 is already sent. + const stat = await storage.stat(attachment.storageKey); + if (!stat) { + return serviceErrorResponse(notFound()); + } + + const nodeStream = storage.createReadStream(attachment.storageKey); + const webStream = Readable.toWeb(nodeStream) as ReadableStream; + + // Build a header-safe Content-Disposition: an ASCII fallback plus an + // RFC 5987 UTF-8 form for the real filename. + const asciiName = attachment.filename + .replace(/[^\x20-\x7e]/g, '_') + .replace(/["\\]/g, '_'); + const contentDisposition = + `inline; filename="${asciiName}"; filename*=UTF-8''${encodeURIComponent(attachment.filename)}`; + + return new Response(webStream, { + headers: { + 'Content-Type': attachment.mediaType, + // On-disk size, so the header always matches the streamed bytes. + 'Content-Length': stat.sizeBytes.toString(), + 'Content-Disposition': contentDisposition, + // Never let the browser sniff a different (potentially executable) + // content type from the bytes. + 'X-Content-Type-Options': 'nosniff', + // Access is re-checked per request, so don't let the browser reuse + // these bytes after a logout / share revocation / visibility change. + 'Cache-Control': 'private, no-store', + }, + }); +}, { track: false }); diff --git a/packages/web/src/app/api/(server)/ee/chat/attachments/route.ts b/packages/web/src/app/api/(server)/ee/chat/attachments/route.ts new file mode 100644 index 000000000..ca689f019 --- /dev/null +++ b/packages/web/src/app/api/(server)/ee/chat/attachments/route.ts @@ -0,0 +1,123 @@ +import { sew } from "@/middleware/sew"; +import { apiHandler } from "@/lib/apiHandler"; +import { ErrorCode } from "@/lib/errorCodes"; +import { captureEvent } from "@/lib/posthog"; +import { ServiceError, serviceErrorResponse } from "@/lib/serviceError"; +import { isServiceError } from "@/lib/utils"; +import { withAuth } from "@/middleware/withAuth"; +import { checkAskEntitlement } from "@/features/chat/utils.server"; +import { validateImageAttachment } from "@/features/chat/attachments/validation"; +import { sanitizeFilename } from "@/features/chat/attachments/filename"; +import { env, getStorageBackend } from "@sourcebot/shared"; +import { createHash, randomUUID } from "crypto"; +import { StatusCodes } from "http-status-codes"; +import { NextRequest } from "next/server"; + +export const POST = apiHandler(async (req: NextRequest) => { + // Reject obviously-oversized bodies before reading them into memory. The + // multipart envelope adds some overhead beyond the raw bytes, so allow a + // 1 MiB slack on top of the image cap; the exact byte cap is re-checked + // against the decoded buffer below. + const maxImageBytes = env.SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES; + const contentLength = Number(req.headers.get('content-length') ?? 0); + if (contentLength > maxImageBytes + 1024 * 1024) { + return serviceErrorResponse({ + statusCode: StatusCodes.REQUEST_TOO_LONG, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Attachment exceeds the ${Math.round(maxImageBytes / (1024 * 1024))}MB limit.`, + } satisfies ServiceError); + } + + const response = await sew(() => + // `withAuth` (not `withOptionalAuth`) so anonymous users cannot upload + // binary attachments. + withAuth(async ({ org, user, prisma }) => { + const askError = await checkAskEntitlement(); + if (askError) { + return askError; + } + + const formData = await req.formData(); + const file = formData.get('file'); + if (!(file instanceof File)) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: 'Expected a `file` field in the multipart body.', + } satisfies ServiceError; + } + + // Authoritative size reject from the parsed file, independent of the + // (best-effort, spoofable) content-length header, before buffering + // the bytes into a Buffer. + if (file.size > maxImageBytes) { + return { + statusCode: StatusCodes.REQUEST_TOO_LONG, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Attachment exceeds the ${Math.round(maxImageBytes / (1024 * 1024))}MB limit.`, + } satisfies ServiceError; + } + + const buffer = Buffer.from(await file.arrayBuffer()); + + // Authoritative content-type + size check by decoding the image + // (never the client-supplied MIME type or extension). + const validation = await validateImageAttachment(buffer, maxImageBytes); + if (!validation.ok) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: validation.reason, + } satisfies ServiceError; + } + + const { mediaType } = validation; + const filename = sanitizeFilename(file.name || 'attachment'); + const sizeBytes = buffer.length; + const checksum = createHash('sha256').update(buffer).digest('hex'); + const storageKey = `${org.id}/${randomUUID()}`; + + const storage = getStorageBackend(); + await storage.put(storageKey, buffer); + + let attachment; + try { + attachment = await prisma.attachment.create({ + data: { + orgId: org.id, + storageKey, + filename, + mediaType, + sizeBytes, + checksum, + uploadedById: user.id, + status: 'PENDING', + }, + }); + } catch (error) { + // Roll back the orphaned bytes if the DB row couldn't be written. + await storage.delete(storageKey).catch(() => { /* best effort */ }); + throw error; + } + + await captureEvent('chat_attachment_uploaded', { + source: req.headers.get('X-Sourcebot-Client-Source') ?? 'unknown', + mediaType, + sizeBytes, + }); + + return { + attachmentId: attachment.id, + filename, + mediaType, + sizeBytes, + }; + }) + ); + + if (isServiceError(response)) { + return serviceErrorResponse(response); + } + + return Response.json(response); +}, { track: false }); diff --git a/packages/web/src/app/api/(server)/ee/chat/route.ts b/packages/web/src/app/api/(server)/ee/chat/route.ts index cbb11f06e..90a9ddee2 100644 --- a/packages/web/src/app/api/(server)/ee/chat/route.ts +++ b/packages/web/src/app/api/(server)/ee/chat/route.ts @@ -3,8 +3,11 @@ import { getAskMcpAvailabilityAnalytics, getAskMcpTurnCompletedAnalytics } from import { createMessageStream } from "@/ee/features/chat/agent"; import { getPromptCacheStrategy } from "@/ee/features/chat/promptCaching"; import { additionalChatRequestParamsSchema } from "@/features/chat/types"; -import { getLanguageModelKey } from "@/features/chat/utils"; -import { checkAskEntitlement, getConfiguredLanguageModels, isOwnerOfChat, updateChatMessages } from "@/features/chat/utils.server"; +import { getLanguageModelKey, getMessageTextBytes, getUserMessageAttachments } from "@/features/chat/utils"; +import { ATTACHMENT_MAX_TURN_TEXT_BYTES } from "@/features/chat/constants"; +import { isMediaTypeAccepted, mediaTypeToModality } from "@/features/chat/attachments/modality"; +import { resolveModelCapabilities } from "@/features/chat/modelCapabilities.server"; +import { checkAskEntitlement, commitMessageAttachments, getConfiguredLanguageModels, isOwnerOfChat, updateChatMessages } from "@/features/chat/utils.server"; import { getAISDKLanguageModelAndOptions } from "@/features/chat/llm.server"; import { resolveContextWindow } from "@/features/chat/modelContextWindow.server"; import { apiHandler } from "@/lib/apiHandler"; @@ -74,6 +77,32 @@ export const POST = apiHandler(async (req: NextRequest) => { } satisfies ServiceError; } + const latestMessage = messages[messages.length - 1]; + + // `z.array(z.any())` permits an empty array; reject it before + // anything downstream dereferences the latest message. + if (!latestMessage) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: 'At least one message is required.', + } satisfies ServiceError; + } + + // Authoritatively enforce the per-turn inline-text budget (the client + // gate can't be trusted), keeping oversized text out of the prompt and + // the persisted messages. Only the user turn carries submitted text. + if ( + latestMessage.role === 'user' && + getMessageTextBytes(latestMessage) > ATTACHMENT_MAX_TURN_TEXT_BYTES + ) { + return { + statusCode: StatusCodes.REQUEST_TOO_LONG, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Message and attachments exceed the ${Math.round(ATTACHMENT_MAX_TURN_TEXT_BYTES / 1024)}KB per-message limit.`, + } satisfies ServiceError; + } + // From the language model ID, attempt to find the // corresponding config in `config.json`. const languageModelConfig = @@ -88,8 +117,47 @@ export const POST = apiHandler(async (req: NextRequest) => { } satisfies ServiceError; } + // Verify and commit any binary attachments referenced by the latest + // message (links them to this chat, flips PENDING -> COMMITTED). + // Rejects forged/unauthorized attachment ids before the agent runs. + // Done after model validation so a rejected model can't leave + // attachments committed without a persisted message. + const attachmentError = await commitMessageAttachments({ + prisma, + chatId: id, + orgId: org.id, + userId: user?.id, + message: latestMessage, + }); + if (attachmentError) { + return attachmentError; + } + const { model, providerOptions, temperature } = await getAISDKLanguageModelAndOptions(languageModelConfig); + // Authoritative, server-side resolution of the model's input + // modalities. The agent's multimodal content builder and degrade + // logic rely on this value, never the client. + const acceptedModalities = (await resolveModelCapabilities(languageModelConfig)).inputModalities; + + // If the latest message carries native-media attachments the selected + // model cannot accept, the agent will degrade (omit the bytes). Record it. + const droppedAttachmentCount = getUserMessageAttachments(latestMessage).filter( + (attachment) => + attachment.kind === 'blob' && + mediaTypeToModality(attachment.mediaType) !== undefined && + !isMediaTypeAccepted(attachment.mediaType, acceptedModalities), + ).length; + if (droppedAttachmentCount > 0) { + await captureEvent('chat_attachment_degraded', { + chatId: id, + source: req.headers.get('X-Sourcebot-Client-Source') ?? 'unknown', + droppedImageCount: droppedAttachmentCount, + modelProvider: languageModelConfig.provider, + model: languageModelConfig.model, + }); + } + // Total context window for the selected model, used as the // denominator for the UI's context-usage gauge. Undefined when // unknown (e.g. self-hosted models). @@ -151,6 +219,7 @@ export const POST = apiHandler(async (req: NextRequest) => { modelTemperature: temperature, userId: user?.id, orgId: org.id, + acceptedModalities, onFinish: async ({ messages }) => { await updateChatMessages({ chatId: id, messages, prisma }); const askMcpTurnCompleted = getAskMcpTurnCompletedAnalytics({ diff --git a/packages/web/src/ee/features/chat/agent.test.ts b/packages/web/src/ee/features/chat/agent.test.ts index a16c4041c..9787212c6 100644 --- a/packages/web/src/ee/features/chat/agent.test.ts +++ b/packages/web/src/ee/features/chat/agent.test.ts @@ -31,6 +31,13 @@ vi.mock('@sourcebot/shared', () => ({ SOURCEBOT_CHAT_PROMPT_CACHE_BREAK_DETECTION_ENABLED: 'false', }, getDBConnectionString: () => 'postgresql://sourcebot:sourcebot@db.example.com:5432/sourcebot', + getStorageBackend: () => ({ + get: vi.fn(), + put: vi.fn(), + stat: vi.fn(), + createReadStream: vi.fn(), + delete: vi.fn(), + }), })); vi.mock('server-only', () => ({})); diff --git a/packages/web/src/ee/features/chat/agent.ts b/packages/web/src/ee/features/chat/agent.ts index e6cc49ca3..9819d5202 100644 --- a/packages/web/src/ee/features/chat/agent.ts +++ b/packages/web/src/ee/features/chat/agent.ts @@ -1,4 +1,6 @@ -import { SBChatMessage, SBChatMessageMetadata, StepTokenUsageEntry, ToolTokenUsageEntry } from "@/features/chat/types"; +import { BlobAttachment, InputModality, SBChatMessage, SBChatMessageMetadata, StepTokenUsageEntry, ToolTokenUsageEntry } from "@/features/chat/types"; +import { isMediaTypeAccepted, mediaTypeToModality } from "@/features/chat/attachments/modality"; +import { getStorageBackend } from "@sourcebot/shared"; import { estimateModelToolOutputTokens } from "@/ee/features/chat/tokenEstimation"; import { getFileSource } from '@/features/git'; import { isServiceError } from "@/lib/utils"; @@ -34,6 +36,142 @@ const dedent = _dedent.withOptions({ alignValues: true }); const logger = createLogger('chat-agent'); +// Resolved attachment bytes for the whole chat, keyed by attachment id. Only +// blobs the model can natively accept are loaded; the content builder +// recomputes per-attachment status from the message to leave degrade markers. +type ResolvedTurnMedia = Map; + +// A native (non-text) attachment part for a model message. The single place +// that maps a stored blob to its model content part; extend this resolver to +// add PDF / audio / video support. +type ModelMediaPart = { type: 'image'; image: Buffer; mediaType: string }; + +const buildModelMediaPart = (bytes: Buffer, mediaType: string): ModelMediaPart | undefined => { + const modality = mediaTypeToModality(mediaType); + if (modality === 'image') { + return { type: 'image', image: bytes, mediaType }; + } + // audio / video / document modalities are not yet wired into the model + // content; callers leave a degrade marker in their place. + return undefined; +}; + +// The native-media (non-text) attachment blobs carried by a user message. +const getMediaBlobs = (message: SBChatMessage): BlobAttachment[] => + getUserMessageAttachments(message) + .filter((attachment): attachment is BlobAttachment => + attachment.kind === 'blob' && mediaTypeToModality(attachment.mediaType) !== undefined); + +// Reads native-media attachment bytes for every user turn in the chat from the +// StorageBackend, keyed by attachment id. Media bytes are re-sent on every turn +// (so attachments stay in context and the cached prefix stays byte-stable), +// hence all turns are resolved here, not just the latest. Fail-closed: only +// blobs whose modality the model accepts are loaded, and only when linked to +// this chat (mirroring the serving route's chat-derived access), so a text-only +// model resolves nothing here and the builder leaves a marker instead. +const resolveTurnMedia = async ({ + messages, + acceptedModalities, + prisma, + orgId, + chatId, +}: { + messages: SBChatMessage[]; + acceptedModalities: InputModality[]; + prisma: PrismaClient; + orgId?: number; + chatId: string; +}): Promise => { + const result: ResolvedTurnMedia = new Map(); + if (orgId === undefined) { + return result; + } + + const acceptedBlobs = messages + .filter((message) => message.role === 'user') + .flatMap((message) => getMediaBlobs(message)) + .filter((blob) => isMediaTypeAccepted(blob.mediaType, acceptedModalities)); + if (acceptedBlobs.length === 0) { + return result; + } + + // Dedupe ids: the same attachment may be re-referenced across turns. + const ids = [...new Set(acceptedBlobs.map((blob) => blob.attachmentId))]; + const records = await prisma.attachment.findMany({ + where: { id: { in: ids }, orgId, chats: { some: { chatId } } }, + }); + + const storage = getStorageBackend(); + await Promise.all(records.map(async (record) => { + try { + const bytes = await storage.get(record.storageKey); + result.set(record.id, { bytes, mediaType: record.mediaType }); + } catch (error) { + logger.error(`Failed to read attachment ${record.id} from storage:`, error); + } + })); + + return result; +}; + +// Builds the `ModelMessage` for a user turn: the text part (with any +// inline-text attachments folded in) plus native media content parts. Media is +// re-sent on every turn so attachments stay in context. When media is present +// but omitted (an unsupported modality or a failed read), a short marker is +// appended so the model knows context was dropped. +const buildUserModelMessage = ({ + message, + acceptedModalities, + resolvedMedia, +}: { + message: SBChatMessage; + acceptedModalities: InputModality[]; + resolvedMedia?: ResolvedTurnMedia; +}): ModelMessage => { + const text = getUserMessageText(message); + const attachmentsBlock = formatAttachmentsForPrompt( + getUserMessageAttachments(message), + ); + let baseText = attachmentsBlock ? `${text}\n\n${attachmentsBlock}` : text; + + const mediaBlobs = getMediaBlobs(message); + if (mediaBlobs.length === 0) { + return { role: 'user', content: baseText }; + } + + const acceptedBlobs = mediaBlobs.filter((blob) => isMediaTypeAccepted(blob.mediaType, acceptedModalities)); + const unsupportedCount = mediaBlobs.length - acceptedBlobs.length; + + const mediaParts = acceptedBlobs + .map((blob) => { + const resolved = resolvedMedia?.get(blob.attachmentId); + return resolved ? buildModelMediaPart(resolved.bytes, resolved.mediaType) : undefined; + }) + .filter((part): part is ModelMediaPart => part !== undefined); + const failedCount = acceptedBlobs.length - mediaParts.length; + + // Distinguish the omission reasons so the model gets an accurate note. + const notes: string[] = []; + if (unsupportedCount > 0) { + notes.push(`${unsupportedCount} attachment(s) omitted (the selected model does not support that file type).`); + } + if (failedCount > 0) { + notes.push(`${failedCount} attachment(s) could not be loaded and were omitted.`); + } + if (notes.length > 0) { + baseText += `\n\n[Note: ${notes.join(' ')}]`; + } + + if (mediaParts.length > 0) { + return { + role: 'user', + content: [{ type: 'text', text: baseText }, ...mediaParts], + }; + } + + return { role: 'user', content: baseText }; +}; + // eslint-disable-next-line @typescript-eslint/no-explicit-any const mergeStreamAsync = async (stream: StreamTextResult, writer: UIMessageStreamWriter, options: UIMessageStreamOptions = {}) => { await new Promise((resolve) => writer.merge(stream.toUIMessageStream({ @@ -63,6 +201,11 @@ interface CreateMessageStreamResponseProps { metadata?: Partial; userId?: string; orgId?: number; + // Authoritative, server-resolved set of input modalities the selected model + // can natively accept (from the models.dev catalog). Fail-closed (defaults + // to text-only): attachments whose modality isn't listed are omitted from + // the model content and a marker is left in their place. + acceptedModalities?: InputModality[]; } export const createMessageStream = async ({ @@ -82,6 +225,7 @@ export const createMessageStream = async ({ onError, userId, orgId, + acceptedModalities = [], }: CreateMessageStreamResponseProps) => { // Defense-in-depth: Ask Sourcebot is a paid feature. Every caller is // expected to gate on the `ask` entitlement before reaching here (see @@ -102,20 +246,28 @@ export const createMessageStream = async ({ // We will use this as the context we carry between messages. // Server requests always receive persisted messages between client streams, so evaluate them in the ready state. const incomingTurnProgress = getTurnProgressState({ messages, status: 'ready' }); + + // Media attachment bytes are re-sent on every turn (decision: keep + // attachments in context across turns rather than dropping them after the + // turn they were added). Re-sending the same bytes in the same position + // each turn also keeps the cached prefix byte-stable. Resolve the bytes for + // all user turns up-front, reading from the StorageBackend. + const resolvedMedia = await resolveTurnMedia({ + messages, + acceptedModalities, + prisma, + orgId, + chatId, + }); + let messageHistory: ModelMessage[] = - messages.map((message, index): ModelMessage | undefined => { + (await Promise.all(messages.map(async (message, index): Promise => { if (message.role === 'user') { - // Fold inline-text attachments into this turn's content (not the - // system prompt) so they stay bound to their turn, re-emitted from - // the persisted parts. - const text = getUserMessageText(message); - const attachmentsBlock = formatAttachmentsForPrompt( - getUserMessageAttachments(message), - ); - return { - role: 'user', - content: attachmentsBlock ? `${text}\n\n${attachmentsBlock}` : text, - }; + return buildUserModelMessage({ + message, + acceptedModalities, + resolvedMedia, + }); } if (message.role === 'assistant') { @@ -130,7 +282,9 @@ export const createMessageStream = async ({ } } } - }).filter(message => message !== undefined); + + return undefined; + }))).filter((message) => message !== undefined); // When the last assistant turn has approval responses (from the tool approval flow), // the turn is incomplete — it has no answer text, only a pending tool call that was diff --git a/packages/web/src/ee/features/chat/components/chatThread/chatThread.tsx b/packages/web/src/ee/features/chat/components/chatThread/chatThread.tsx index a7dfa82af..ddfd0a151 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/chatThread.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/chatThread.tsx @@ -52,6 +52,7 @@ interface ChatThreadProps { isOwner?: boolean; isAuthenticated: boolean; isLoginWallEnabled: boolean; + maxImageBytes: number; chatName?: string; } @@ -69,6 +70,7 @@ export const ChatThread = ({ isOwner = true, isAuthenticated, isLoginWallEnabled, + maxImageBytes, chatName, }: ChatThreadProps) => { const [isErrorBannerVisible, setIsErrorBannerVisible] = useState(false); @@ -489,6 +491,7 @@ export const ChatThread = ({ isDisabled={languageModels.length === 0} isAuthenticated={isAuthenticated} isLoginWallEnabled={isLoginWallEnabled} + maxImageBytes={maxImageBytes} />
{userAttachments.length > 0 && ( - + )}
diff --git a/packages/web/src/ee/features/chat/components/chatThread/messageAttachments.tsx b/packages/web/src/ee/features/chat/components/chatThread/messageAttachments.tsx index 7d2b5040e..cef8bd750 100644 --- a/packages/web/src/ee/features/chat/components/chatThread/messageAttachments.tsx +++ b/packages/web/src/ee/features/chat/components/chatThread/messageAttachments.tsx @@ -1,46 +1,101 @@ 'use client'; import { VscodeFileIcon } from "@/app/components/vscodeFileIcon"; +import { HoverCard, HoverCardContent, HoverCardTrigger } from "@/components/ui/hover-card"; import { AttachmentViewerDialog } from "@/features/chat/components/chatBox/attachmentViewerDialog"; +import { mediaTypeToModality } from "@/features/chat/attachments/modality"; import { AttachmentData } from "@/features/chat/types"; import { cn } from "@/lib/utils"; import { useState } from "react"; interface MessageAttachmentsProps { attachments: AttachmentData[]; + chatId: string; className?: string; } -export const MessageAttachments = ({ attachments, className }: MessageAttachmentsProps) => { +// Builds the access-controlled serving URL for a blob attachment. The uploader +// can fetch their own bytes from this route immediately after sending (even +// before the commit links the blob to the chat), so a just-sent image renders +// here directly. +const getAttachmentServingUrl = (chatId: string, attachmentId: string): string => { + return `/api/ee/chat/${chatId}/attachments/${attachmentId}`; +} + +export const MessageAttachments = ({ attachments, chatId, className }: MessageAttachmentsProps) => { const [activeAttachment, setActiveAttachment] = useState(null); if (attachments.length === 0) { return null; } + const activeImageSrc = + activeAttachment?.kind === 'blob' && mediaTypeToModality(activeAttachment.mediaType) === 'image' + ? getAttachmentServingUrl(chatId, activeAttachment.attachmentId) + : undefined; + return ( <>
- {attachments.map((attachment, index) => ( - - ))} + {attachments.map((attachment, index) => { + if (attachment.kind === 'blob' && mediaTypeToModality(attachment.mediaType) === 'image') { + const imageSrc = getAttachmentServingUrl(chatId, attachment.attachmentId); + return ( + + + + + + {/* eslint-disable-next-line @next/next/no-img-element */} + {attachment.filename} + + + ); + } + + return ( + + ); + })}
!open && setActiveAttachment(null)} filename={activeAttachment?.filename} text={activeAttachment?.kind === 'text' ? activeAttachment.text : undefined} + imageSrc={activeImageSrc} /> ) diff --git a/packages/web/src/ee/features/chat/components/chatThreadPanel.test.tsx b/packages/web/src/ee/features/chat/components/chatThreadPanel.test.tsx index 46b8d4d91..b98b99d1d 100644 --- a/packages/web/src/ee/features/chat/components/chatThreadPanel.test.tsx +++ b/packages/web/src/ee/features/chat/components/chatThreadPanel.test.tsx @@ -79,6 +79,8 @@ describe('ChatThreadPanel', () => { messages={[]} isOwner={true} isAuthenticated={true} + isLoginWallEnabled={false} + maxImageBytes={10 * 1024 * 1024} /> ); diff --git a/packages/web/src/ee/features/chat/components/chatThreadPanel.tsx b/packages/web/src/ee/features/chat/components/chatThreadPanel.tsx index 0323c714f..96b39c2ff 100644 --- a/packages/web/src/ee/features/chat/components/chatThreadPanel.tsx +++ b/packages/web/src/ee/features/chat/components/chatThreadPanel.tsx @@ -17,6 +17,7 @@ interface ChatThreadPanelProps { isOwner: boolean; isAuthenticated: boolean; isLoginWallEnabled: boolean; + maxImageBytes: number; chatName?: string; } @@ -36,6 +37,7 @@ export const ChatThreadPanel = ({ isOwner, isAuthenticated, isLoginWallEnabled, + maxImageBytes, chatName, }: ChatThreadPanelProps) => { // @note: we are guaranteed to have a chatId because this component will only be @@ -90,6 +92,7 @@ export const ChatThreadPanel = ({ isOwner={isOwner} isAuthenticated={isAuthenticated} isLoginWallEnabled={isLoginWallEnabled} + maxImageBytes={maxImageBytes} chatName={chatName} />
diff --git a/packages/web/src/features/chat/actions.ts b/packages/web/src/features/chat/actions.ts index fccc6df8a..f58d43210 100644 --- a/packages/web/src/features/chat/actions.ts +++ b/packages/web/src/features/chat/actions.ts @@ -8,7 +8,7 @@ import { notFound } from "@/lib/serviceError"; import { withAuth, withOptionalAuth } from "@/middleware/withAuth"; import { ChatVisibility, Prisma } from "@sourcebot/db"; import { SBChatMessage } from "./types"; -import { checkAskEntitlement, isChatSharedWithUser, isOwnerOfChat } from "./utils.server"; +import { checkAskEntitlement, deleteOrphanedAttachments, isChatSharedWithUser, isOwnerOfChat, resolveChatAccess } from "./utils.server"; export const createChat = async ({ source }: { source?: string } = {}) => sew(() => withOptionalAuth(async ({ org, user, prisma }) => { @@ -75,11 +75,10 @@ export const getChatInfo = async ({ chatId }: { chatId: string }) => sew(() => return notFound(); } - const isOwner = await isOwnerOfChat(chat, user); - const isSharedWithUser = await isChatSharedWithUser({ prisma, chatId, userId: user?.id }); + const { isOwner, isSharedWithUser, canView } = await resolveChatAccess({ prisma, chat, user }); // Private chats can only be viewed by the owner or users it's been shared with - if (chat.visibility === ChatVisibility.PRIVATE && !isOwner && !isSharedWithUser) { + if (!canView) { return notFound(); } @@ -194,6 +193,13 @@ export const deleteChat = async ({ chatId }: { chatId: string }) => sew(() => return notFound(); } + // Capture the linked attachment ids before the delete cascades the + // link rows, so we can sweep any blobs left with zero links afterwards. + const linkedAttachments = await prisma.chatAttachment.findMany({ + where: { chatId }, + select: { attachmentId: true }, + }); + await prisma.chat.delete({ where: { id: chatId, @@ -201,6 +207,11 @@ export const deleteChat = async ({ chatId }: { chatId: string }) => sew(() => }, }); + await deleteOrphanedAttachments({ + prisma, + attachmentIds: linkedAttachments.map((link) => link.attachmentId), + }); + await createAudit({ action: "chat.deleted", actor: { id: user.id, type: "user" }, @@ -281,6 +292,14 @@ export const duplicateChat = async ({ chatId, newName }: { chatId: string, newNa const isGuestUser = user === undefined; const anonymousCreatorId = isGuestUser ? await getOrCreateAnonymousId() : undefined; + // Snapshot the source chat's links before creating the duplicate; a + // concurrent delete could otherwise cascade them away first, leaving the + // copied messages pointing at blobs the new chat is never linked to. + const originalLinks = await prisma.chatAttachment.findMany({ + where: { chatId: originalChat.id }, + select: { attachmentId: true }, + }); + const newChat = await prisma.chat.create({ data: { orgId: org.id, @@ -292,6 +311,19 @@ export const duplicateChat = async ({ chatId, newName }: { chatId: string, newNa }, }); + // Copy the attachment links (metadata-only; no byte copy). The + // duplicated messages reference the same blobs, and access stays + // chat-derived through the new chat's links. + if (originalLinks.length > 0) { + await prisma.chatAttachment.createMany({ + data: originalLinks.map((link) => ({ + chatId: newChat.id, + attachmentId: link.attachmentId, + })), + skipDuplicates: true, + }); + } + return { id: newChat.id, }; diff --git a/packages/web/src/features/chat/attachmentUtils.ts b/packages/web/src/features/chat/attachmentUtils.ts index c5b1d0ff8..8f4084b7c 100644 --- a/packages/web/src/features/chat/attachmentUtils.ts +++ b/packages/web/src/features/chat/attachmentUtils.ts @@ -1,48 +1,73 @@ 'use client'; import { + ATTACHMENT_ALLOWED_IMAGE_MIME_TYPES, ATTACHMENT_ALLOWED_TEXT_EXTENSIONS, ATTACHMENT_ALLOWED_TEXT_MIME_TYPES, + ATTACHMENT_MAX_IMAGE_BYTES, + ATTACHMENT_MAX_IMAGE_COUNT, ATTACHMENT_MAX_TURN_TEXT_BYTES, ATTACHMENT_PASTE_AUTO_CONVERT_MIN_CHARS, ATTACHMENT_PASTE_AUTO_CONVERT_MIN_LINES, } from "./constants"; -import { AttachmentData, TextAttachment } from "./types"; +import { AttachmentData } from "./types"; +import { sanitizeFilename } from "./attachments/filename"; import { v4 as uuidv4 } from "uuid"; -// Normalizes an untrusted filename: basename only, strips control chars (which -// could break the `` tag or UI), collapses whitespace. -export const sanitizeFilename = (name: string): string => { - const basename = name.split(/[\\/]/).pop() ?? name; - return Array.from(basename) - .filter((char) => { - const code = char.charCodeAt(0); - return code >= 32 && code !== 127; - }) - .join('') - .replace(/\s+/g, ' ') - .trim() || 'attachment'; -} +export { sanitizeFilename }; + +// A text attachment selected in the chat box but not yet submitted. The +// extracted text travels inline in the message (no upload). +export type PendingTextAttachment = { + kind: 'text'; + id: string; + filename: string; + mediaType: string; + sizeBytes: number; + text: string; +}; -// A text attachment selected in the chat box but not yet submitted. The `id` -// is the stable handle carried into the message: it keys list rendering and -// removal here, and is persisted onto the attachment so its content can later -// be cited and resolved. -export type PendingAttachment = TextAttachment & { id: string }; +// An image attachment selected in the chat box. Unlike text, the bytes are +// uploaded to blob storage on select; `status`/`attachmentId` track that +// upload. `previewUrl` is a local object URL used for the pre-send thumbnail, +// and `file` is retained so the upload can be (re)issued. +export type PendingImageAttachment = { + kind: 'image'; + id: string; + filename: string; + mediaType: string; + sizeBytes: number; + previewUrl: string; + file: File; + status: 'uploading' | 'uploaded' | 'error'; + attachmentId?: string; + error?: string; +}; + +// An attachment selected in the chat box but not yet submitted. For text +// attachments the `id` is the stable handle carried into the message so the +// content can later be cited and resolved; for images it is a client-only list +// key (images are addressed by their uploaded `attachmentId` instead). +export type PendingAttachment = PendingTextAttachment | PendingImageAttachment; // Builds the comma-separated `accept` attribute for a native `` -// so the OS picker only surfaces supported text file types. -export const getAttachmentAcceptAttribute = (): string => { +// so the OS picker only surfaces supported file types. Image types are included +// only when the selected model can accept image input. +export const getAttachmentAcceptAttribute = (includeImages: boolean): string => { return [ 'text/*', ...ATTACHMENT_ALLOWED_TEXT_MIME_TYPES, ...ATTACHMENT_ALLOWED_TEXT_EXTENSIONS.map((extension) => `.${extension}`), + ...(includeImages ? ATTACHMENT_ALLOWED_IMAGE_MIME_TYPES : []), ].join(','); } -// Builds react-dropzone's `accept` map. Extensions are attached to `text/plain` -// so code files that report an empty/unusual MIME type are still selectable. -export const getAttachmentDropzoneAccept = (): Record => { +// Builds the `accept` map for react-dropzone (and the native file picker) so +// the OS dialog and drag overlay only surface supported file types. The +// extension list is attached to `text/plain` so code files that report an empty +// or unusual MIME type are still selectable by extension. Image types are +// included only when the selected model can accept image input. +export const getAttachmentDropzoneAccept = (includeImages: boolean): Record => { const accept: Record = { 'text/*': [], 'text/plain': ATTACHMENT_ALLOWED_TEXT_EXTENSIONS.map((extension) => `.${extension}`), @@ -50,26 +75,52 @@ export const getAttachmentDropzoneAccept = (): Record => { for (const mimeType of ATTACHMENT_ALLOWED_TEXT_MIME_TYPES) { accept[mimeType] = []; } + if (includeImages) { + for (const mimeType of ATTACHMENT_ALLOWED_IMAGE_MIME_TYPES) { + accept[mimeType] = []; + } + } return accept; } -// Total UTF-8 byte size of a turn's submitted text (prompt + attachment bodies), -// checked against ATTACHMENT_MAX_TURN_TEXT_BYTES at submit time. +// Total UTF-8 byte size of a turn's submitted text (prompt + text attachment +// bodies), checked against ATTACHMENT_MAX_TURN_TEXT_BYTES at submit time. Image +// attachments are excluded: their bytes are uploaded as blobs, not inlined into +// the message text, so they don't count against the inline-text budget. export const getSubmittedTextBytes = (text: string, attachments: PendingAttachment[]): number => { const textBytes = new TextEncoder().encode(text).length; - const attachmentBytes = attachments.reduce((sum, attachment) => sum + attachment.sizeBytes, 0); + const attachmentBytes = attachments + .filter((attachment) => attachment.kind === 'text') + .reduce((sum, attachment) => sum + attachment.sizeBytes, 0); return textBytes + attachmentBytes; } -export const toAttachmentData = (attachment: PendingAttachment): AttachmentData => { - return { - kind: attachment.kind, - id: attachment.id, - filename: attachment.filename, - mediaType: attachment.mediaType, - sizeBytes: attachment.sizeBytes, - text: attachment.text, - }; +// Converts a pending attachment into the message `AttachmentData` part. Returns +// `undefined` for an image whose upload has not completed (it must not be +// referenced before the blob exists); callers filter these out. +export const toAttachmentData = (attachment: PendingAttachment): AttachmentData | undefined => { + if (attachment.kind === 'text') { + return { + kind: 'text', + id: attachment.id, + filename: attachment.filename, + mediaType: attachment.mediaType, + sizeBytes: attachment.sizeBytes, + text: attachment.text, + }; + } + + if (attachment.status === 'uploaded' && attachment.attachmentId) { + return { + kind: 'blob', + attachmentId: attachment.attachmentId, + filename: attachment.filename, + mediaType: attachment.mediaType, + sizeBytes: attachment.sizeBytes, + }; + } + + return undefined; } const getExtension = (filename: string): string => { @@ -100,6 +151,10 @@ export const isAllowedTextFile = (file: File): boolean => { return false; } +export const isAllowedImageFile = (file: File): boolean => { + return (ATTACHMENT_ALLOWED_IMAGE_MIME_TYPES as readonly string[]).includes(file.type); +} + const readAsText = (file: File): Promise => { return new Promise((resolve, reject) => { const reader = new FileReader(); @@ -161,40 +216,103 @@ export type ReadFilesResult = { errors: string[]; }; -// Reads files into pending text attachments, rejecting non-text files and any -// file larger than the per-turn budget (skipped before reading to avoid loading -// a huge file into memory). The aggregate budget is enforced at submit time. +// Reads and validates files into pending attachments, enforcing the per-file +// size, allowed-type, and per-message image-count caps (the per-turn text budget +// is enforced at submit time). Text is read inline; images (when `allowImages`) +// become pending uploads the caller then kicks off. The image-count cap mirrors +// the server's for early feedback. Rejected files yield an error, not a throw. export const readFilesAsAttachments = async ( files: File[], + { allowImages, existingImageCount = 0, maxImageBytes = ATTACHMENT_MAX_IMAGE_BYTES }: { + allowImages: boolean; + existingImageCount?: number; + maxImageBytes?: number; + }, ): Promise => { const attachments: PendingAttachment[] = []; const errors: string[] = []; + let imageCount = existingImageCount; for (const file of files) { - if (!isAllowedTextFile(file)) { - errors.push(`${file.name}: unsupported file type (text files only).`); - continue; - } - - if (file.size > ATTACHMENT_MAX_TURN_TEXT_BYTES) { - errors.push(`${file.name}: exceeds the ${Math.round(ATTACHMENT_MAX_TURN_TEXT_BYTES / 1024)}KB per-message limit.`); + if (isAllowedTextFile(file)) { + // Skip before reading to avoid loading a huge file into memory. + if (file.size > ATTACHMENT_MAX_TURN_TEXT_BYTES) { + errors.push(`${file.name}: exceeds the ${Math.round(ATTACHMENT_MAX_TURN_TEXT_BYTES / 1024)}KB per-message limit.`); + continue; + } + try { + const text = await readAsText(file); + attachments.push({ + id: uuidv4(), + kind: 'text', + filename: sanitizeFilename(file.name), + mediaType: file.type || 'text/plain', + sizeBytes: file.size, + text, + }); + } catch { + errors.push(`${file.name}: failed to read file.`); + } continue; } - try { - const text = await readAsText(file); + if (isAllowedImageFile(file)) { + if (!allowImages) { + errors.push(`${file.name}: the selected model does not support image input.`); + continue; + } + if (file.size > maxImageBytes) { + errors.push(`${file.name}: exceeds the ${Math.round(maxImageBytes / (1024 * 1024))}MB image limit.`); + continue; + } + if (imageCount >= ATTACHMENT_MAX_IMAGE_COUNT) { + errors.push(`You can attach at most ${ATTACHMENT_MAX_IMAGE_COUNT} images per message.`); + continue; + } attachments.push({ id: uuidv4(), - kind: 'text', + kind: 'image', filename: sanitizeFilename(file.name), - mediaType: file.type || 'text/plain', + mediaType: file.type, sizeBytes: file.size, - text, + previewUrl: URL.createObjectURL(file), + file, + status: 'uploading', }); - } catch { - errors.push(`${file.name}: failed to read file.`); + imageCount++; + continue; } + + errors.push(`${file.name}: unsupported file type.`); } return { attachments, errors }; } + +// Uploads an image attachment's bytes to blob storage, returning the committed +// attachment metadata (including the server-assigned `attachmentId`). Throws +// with a human-readable message on failure. +export const uploadImageAttachment = async (file: File): Promise<{ + attachmentId: string; + filename: string; + mediaType: string; + sizeBytes: number; +}> => { + const formData = new FormData(); + formData.append('file', file); + + const response = await fetch('/api/ee/chat/attachments', { + method: 'POST', + body: formData, + headers: { + 'X-Sourcebot-Client-Source': 'sourcebot-web-client', + }, + }); + + if (!response.ok) { + const body = await response.json().catch(() => undefined); + throw new Error(body?.message ?? 'Failed to upload image.'); + } + + return response.json(); +} diff --git a/packages/web/src/features/chat/attachments/filename.ts b/packages/web/src/features/chat/attachments/filename.ts new file mode 100644 index 000000000..fc5a23fdb --- /dev/null +++ b/packages/web/src/features/chat/attachments/filename.ts @@ -0,0 +1,15 @@ +// Normalizes an untrusted filename: keeps only the basename, drops control +// characters (which could break the prompt's `` tag +// or the UI), and collapses whitespace. Lives in a non-client module so both +// the client picker and the server upload route can share one implementation. +export const sanitizeFilename = (name: string): string => { + const basename = name.split(/[\\/]/).pop() ?? name; + return Array.from(basename) + .filter((char) => { + const code = char.charCodeAt(0); + return code >= 32 && code !== 127; + }) + .join('') + .replace(/\s+/g, ' ') + .trim() || 'attachment'; +}; diff --git a/packages/web/src/features/chat/attachments/modality.ts b/packages/web/src/features/chat/attachments/modality.ts new file mode 100644 index 000000000..863e3502d --- /dev/null +++ b/packages/web/src/features/chat/attachments/modality.ts @@ -0,0 +1,33 @@ +import { InputModality } from "../types"; + +// Single-medium modalities an attachment blob can occupy (text is excluded: it +// travels inline in the message, not as a native attachment part). +export type AttachmentModality = Exclude; + +/** + * Maps a stored attachment's media type to the model input modality it occupies, + * or `undefined` when it isn't a recognized single-medium attachment. The single + * source of truth for "what kind of attachment is this"; extend it to add + * PDF/audio/video support. + */ +export const mediaTypeToModality = (mediaType: string): AttachmentModality | undefined => { + if (mediaType.startsWith('image/')) { + return 'image'; + } + if (mediaType.startsWith('audio/')) { + return 'audio'; + } + if (mediaType.startsWith('video/')) { + return 'video'; + } + return undefined; +}; + +/** + * Whether a model that accepts `acceptedModalities` can natively ingest an + * attachment of `mediaType`. + */ +export const isMediaTypeAccepted = (mediaType: string, acceptedModalities: InputModality[]): boolean => { + const modality = mediaTypeToModality(mediaType); + return modality !== undefined && acceptedModalities.includes(modality); +}; diff --git a/packages/web/src/features/chat/attachments/validation.ts b/packages/web/src/features/chat/attachments/validation.ts new file mode 100644 index 000000000..bd8f44a9a --- /dev/null +++ b/packages/web/src/features/chat/attachments/validation.ts @@ -0,0 +1,76 @@ +import 'server-only'; + +import sharp from 'sharp'; +import { ATTACHMENT_ALLOWED_IMAGE_MIME_TYPES, ATTACHMENT_MAX_IMAGE_DIMENSION } from '../constants'; + +export type AllowedImageMediaType = typeof ATTACHMENT_ALLOWED_IMAGE_MIME_TYPES[number]; + +const isAllowedImageMediaType = (mediaType: string): mediaType is AllowedImageMediaType => { + return (ATTACHMENT_ALLOWED_IMAGE_MIME_TYPES as readonly string[]).includes(mediaType); +}; + +// sharp/libvips reports the decoded format by name; map the formats we allow to +// their canonical media type. Anything not in this map (svg, tiff, heif, ...) +// is rejected, so the allowlist is enforced by the absence of an entry here as +// well as by `isAllowedImageMediaType`. +const SHARP_FORMAT_TO_MEDIA_TYPE: Record = { + png: 'image/png', + jpeg: 'image/jpeg', + webp: 'image/webp', + gif: 'image/gif', +}; + +export type AttachmentValidationResult = + | { ok: true; mediaType: AllowedImageMediaType } + | { ok: false; reason: string }; + +/** + * Validates uploaded attachment bytes by decoding the image header with sharp + * (libvips), never trusting the client-supplied MIME type or extension. This + * authoritatively determines the format AND the pixel dimensions, letting us + * reject: + * - non-images / corrupt data (sharp throws), + * - disallowed formats (no entry in SHARP_FORMAT_TO_MEDIA_TYPE), + * - over-`maxBytes` files, and + * - decompression bombs (dimensions over ATTACHMENT_MAX_IMAGE_DIMENSION). + * The returned `mediaType` is the decoded type, which callers persist instead of + * any client-supplied value. + */ +export const validateImageAttachment = async ( + buffer: Buffer, + maxBytes: number, +): Promise => { + if (buffer.length === 0) { + return { ok: false, reason: 'Empty file.' }; + } + if (buffer.length > maxBytes) { + return { ok: false, reason: `Image exceeds the ${Math.round(maxBytes / (1024 * 1024))}MB limit.` }; + } + + let metadata: sharp.Metadata; + try { + // `failOn: 'error'` makes sharp reject truncated/corrupt inputs instead + // of best-effort decoding them. + metadata = await sharp(buffer, { failOn: 'error' }).metadata(); + } catch { + return { ok: false, reason: 'Unsupported or corrupt image. Allowed: PNG, JPEG, WebP, GIF.' }; + } + + const mediaType = metadata.format ? SHARP_FORMAT_TO_MEDIA_TYPE[metadata.format] : undefined; + if (!mediaType || !isAllowedImageMediaType(mediaType)) { + return { ok: false, reason: 'Unsupported image type. Allowed: PNG, JPEG, WebP, GIF.' }; + } + + const { width, height } = metadata; + if (!width || !height) { + return { ok: false, reason: 'Could not determine image dimensions.' }; + } + if (width > ATTACHMENT_MAX_IMAGE_DIMENSION || height > ATTACHMENT_MAX_IMAGE_DIMENSION) { + return { + ok: false, + reason: `Image dimensions exceed the ${ATTACHMENT_MAX_IMAGE_DIMENSION}px limit.`, + }; + } + + return { ok: true, mediaType }; +}; diff --git a/packages/web/src/features/chat/components/chatBox/attachmentButton.tsx b/packages/web/src/features/chat/components/chatBox/attachmentButton.tsx index fef235c06..33d88b6de 100644 --- a/packages/web/src/features/chat/components/chatBox/attachmentButton.tsx +++ b/packages/web/src/features/chat/components/chatBox/attachmentButton.tsx @@ -8,10 +8,11 @@ import { useRef } from "react"; interface AttachmentButtonProps { onAddFiles: (files: File[]) => void; + acceptImages?: boolean; disabled?: boolean; } -export const AttachmentButton = ({ onAddFiles, disabled }: AttachmentButtonProps) => { +export const AttachmentButton = ({ onAddFiles, acceptImages = false, disabled }: AttachmentButtonProps) => { const inputRef = useRef(null); return ( @@ -20,7 +21,7 @@ export const AttachmentButton = ({ onAddFiles, disabled }: AttachmentButtonProps ref={inputRef} type="file" multiple - accept={getAttachmentAcceptAttribute()} + accept={getAttachmentAcceptAttribute(acceptImages)} className="hidden" onChange={(e) => { const files = e.target.files ? Array.from(e.target.files) : []; @@ -46,7 +47,7 @@ export const AttachmentButton = ({ onAddFiles, disabled }: AttachmentButtonProps - Attach text files + {acceptImages ? "Attach text files or images" : "Attach text files"} diff --git a/packages/web/src/features/chat/components/chatBox/attachmentTray.tsx b/packages/web/src/features/chat/components/chatBox/attachmentTray.tsx index 2646fa93b..3547ddf1b 100644 --- a/packages/web/src/features/chat/components/chatBox/attachmentTray.tsx +++ b/packages/web/src/features/chat/components/chatBox/attachmentTray.tsx @@ -1,8 +1,9 @@ 'use client'; import { VscodeFileIcon } from "@/app/components/vscodeFileIcon"; +import { HoverCard, HoverCardContent, HoverCardTrigger } from "@/components/ui/hover-card"; import { cn } from "@/lib/utils"; -import { X } from "lucide-react"; +import { AlertCircle, Loader2, X } from "lucide-react"; import { useState } from "react"; import { PendingAttachment } from "../../attachmentUtils"; import { AttachmentViewerDialog } from "./attachmentViewerDialog"; @@ -26,39 +27,95 @@ export const AttachmentTray = ({ attachments, onRemove, className }: AttachmentT <>
{attachments.map((attachment) => ( -
- + + {onRemove && ( + + )} +
+ + {/* eslint-disable-next-line @next/next/no-img-element */} + {attachment.filename} + + + ) : ( +
- - - {attachment.filename} - - - {onRemove && ( - )} -
+ {onRemove && ( + + )} +
+ ) ))}
!open && setActiveAttachment(null)} filename={activeAttachment?.filename} - text={activeAttachment?.text} + text={activeAttachment?.kind === 'text' ? activeAttachment.text : undefined} + imageSrc={activeAttachment?.kind === 'image' ? activeAttachment.previewUrl : undefined} /> ) diff --git a/packages/web/src/features/chat/components/chatBox/attachmentViewerDialog.tsx b/packages/web/src/features/chat/components/chatBox/attachmentViewerDialog.tsx index 46dc236dc..d57eb68ee 100644 --- a/packages/web/src/features/chat/components/chatBox/attachmentViewerDialog.tsx +++ b/packages/web/src/features/chat/components/chatBox/attachmentViewerDialog.tsx @@ -6,13 +6,17 @@ import { useEffect } from "react"; interface AttachmentViewerDialogProps { filename?: string; text?: string; + // When set, the dialog shows the image at this URL instead of text. Used + // for image attachments (a local object URL pre-send, the serving route + // post-send). + imageSrc?: string; open: boolean; onOpenChange: (open: boolean) => void; } -// Shared viewer for inspecting an inline-text attachment's contents. Used for -// both staged (not-yet-sent) and sent attachments. -export const AttachmentViewerDialog = ({ filename, text, open, onOpenChange }: AttachmentViewerDialogProps) => { +// Shared viewer for inspecting an attachment's contents. Used for both staged +// (not-yet-sent) and sent attachments, and for both text and image kinds. +export const AttachmentViewerDialog = ({ filename, text, imageSrc, open, onOpenChange }: AttachmentViewerDialogProps) => { // The staged viewer is rendered inside the Slate `Editable` subtree, where // Radix's built-in Escape-to-close can get swallowed by the editor's // focus/key handling. A capture-phase listener guarantees Escape closes the @@ -45,9 +49,18 @@ export const AttachmentViewerDialog = ({ filename, text, open, onOpenChange }: A Preview of the attached file{filename ? ` ${filename}` : ''}. -
-                    {text}
-                
+ {imageSrc ? ( + // eslint-disable-next-line @next/next/no-img-element + {filename + ) : ( +
+                        {text}
+                    
+ )} ) diff --git a/packages/web/src/features/chat/components/chatBox/chatBox.tsx b/packages/web/src/features/chat/components/chatBox/chatBox.tsx index 1bd33235f..a7dd7fd31 100644 --- a/packages/web/src/features/chat/components/chatBox/chatBox.tsx +++ b/packages/web/src/features/chat/components/chatBox/chatBox.tsx @@ -5,7 +5,7 @@ import { Button } from "@/components/ui/button"; import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"; import { AttachmentData, CustomEditor, MentionElement, RenderElementPropsFor, SearchScope } from "@/features/chat/types"; import { insertMention, slateContentToString } from "@/features/chat/utils"; -import { createPastedTextAttachment, getSubmittedTextBytes, PendingAttachment, readFilesAsAttachments, shouldAutoConvertPaste, toAttachmentData } from "@/features/chat/attachmentUtils"; +import { createPastedTextAttachment, getSubmittedTextBytes, PendingAttachment, PendingImageAttachment, readFilesAsAttachments, shouldAutoConvertPaste, toAttachmentData, uploadImageAttachment } from "@/features/chat/attachmentUtils"; import { AttachmentButton } from "./attachmentButton"; import { AttachmentTray } from "./attachmentTray"; import { cn } from "@/lib/utils"; @@ -26,7 +26,7 @@ import { SearchContextQuery } from "@/lib/types"; import isEqual from "fast-deep-equal/react"; import { LoginDialog } from "./loginDialog"; import { usePathname } from "next/navigation"; -import { ATTACHMENT_MAX_TURN_TEXT_BYTES, PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY } from "@/features/chat/constants"; +import { ATTACHMENT_MAX_IMAGE_BYTES, ATTACHMENT_MAX_TURN_TEXT_BYTES, PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY } from "@/features/chat/constants"; import useCaptureEvent from "@/hooks/useCaptureEvent"; import { useHasEntitlement } from "@/features/entitlements/useHasEntitlement"; import { UpsellDialog } from "@/features/billing/upsellDialog"; @@ -35,6 +35,15 @@ export interface ChatBoxHandle { addFiles: (files: File[]) => void; } +// Only inline-text attachments survive the login/upgrade redirect: image blobs +// require an authenticated, entitled upload, so a redirected sender can't have +// one, and a stashed blob ref would only fail to commit on re-submit. +const getRedirectSafeAttachments = (attachments: PendingAttachment[]): AttachmentData[] => { + return attachments + .map(toAttachmentData) + .filter((attachment): attachment is AttachmentData => attachment?.kind === 'text'); +} + interface ChatBoxProps { onSubmit: (children: Descendant[], editor: CustomEditor, attachments: AttachmentData[]) => void; onStop?: () => void; @@ -48,6 +57,10 @@ interface ChatBoxProps { searchContexts: SearchContextQuery[]; isLoginWallEnabled: boolean; isAuthenticated: boolean; + // Authoritative per-image byte cap from the server + // (SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES), threaded down for early + // client-side rejection. Defaults to the constant when not provided. + maxImageBytes?: number; } const ChatBoxComponent = ({ @@ -63,6 +76,7 @@ const ChatBoxComponent = ({ isAuthenticated, selectedSearchScopes, searchContexts, + maxImageBytes = ATTACHMENT_MAX_IMAGE_BYTES, }: ChatBoxProps, ref: Ref) => { const suggestionsBoxRef = useRef(null); const [index, setIndex] = useState(0); @@ -97,6 +111,41 @@ const ChatBoxComponent = ({ const [submittedAttachments, setSubmittedAttachments] = useState([]); const pathname = usePathname(); + // Whether the selected model can accept image input (from #1372). Image + // attachments are gated on this; text attachments are always allowed. + const supportsImages = useMemo( + () => selectedLanguageModel?.inputModalities?.includes('image') ?? false, + [selectedLanguageModel], + ); + + // Uploads an image attachment's bytes and reflects the outcome back into the + // tray (status + server attachment id). + const uploadAndTrackImage = useCallback(async (item: PendingImageAttachment) => { + try { + const result = await uploadImageAttachment(item.file); + setAttachments((prev) => prev.map((attachment) => + attachment.id === item.id && attachment.kind === 'image' + ? { + ...attachment, + status: 'uploaded', + attachmentId: result.attachmentId, + mediaType: result.mediaType, + sizeBytes: result.sizeBytes, + } + : attachment)); + } catch (error) { + const message = error instanceof Error ? error.message : 'upload failed.'; + setAttachments((prev) => prev.map((attachment) => + attachment.id === item.id && attachment.kind === 'image' + ? { ...attachment, status: 'error', error: message } + : attachment)); + toast({ + description: `⚠️ ${item.filename}: ${message}`, + variant: "destructive", + }); + } + }, [toast]); + // Set when the user triggers a paste with the OS raw-paste chord // (⌘⇧V / Ctrl+Shift+V). The subsequent `paste` event reads (and clears) // this so the large-paste auto-conversion is skipped for that one paste. @@ -139,7 +188,14 @@ const ChatBoxComponent = ({ return; } - const { attachments: added, errors } = await readFilesAsAttachments(files); + const { attachments: added, errors } = await readFilesAsAttachments( + files, + { + allowImages: supportsImages, + existingImageCount: attachments.filter((attachment) => attachment.kind === 'image').length, + maxImageBytes, + }, + ); if (added.length > 0) { setAttachments((prev) => [...prev, ...added]); } @@ -153,12 +209,47 @@ const ChatBoxComponent = ({ }); } + // Upload image attachments immediately (upload-on-select); their refs + // are included at submit once the upload completes. + for (const item of added) { + if (item.kind === 'image') { + void uploadAndTrackImage(item); + } + } + // Return focus to the prompt input so the user can keep typing. ReactEditor.focus(editor); - }, [attachments, toast, editor, getOverBudgetWarning]); + }, [attachments, toast, editor, supportsImages, uploadAndTrackImage, getOverBudgetWarning, maxImageBytes]); const removeAttachment = useCallback((id: string) => { - setAttachments((prev) => prev.filter((attachment) => attachment.id !== id)); + setAttachments((prev) => { + const target = prev.find((attachment) => attachment.id === id); + if (target?.kind === 'image') { + URL.revokeObjectURL(target.previewUrl); + } + return prev.filter((attachment) => attachment.id !== id); + }); + }, []); + + // Track the set of live image preview object URLs (pending or + // just-submitted) so they can be revoked when the chat box unmounts, + // preventing leaks across SPA navigations. + const liveObjectUrlsRef = useRef>(new Set()); + useEffect(() => { + const urls = new Set(); + for (const attachment of [...attachments, ...submittedAttachments]) { + if (attachment.kind === 'image') { + urls.add(attachment.previewUrl); + } + } + liveObjectUrlsRef.current = urls; + }, [attachments, submittedAttachments]); + useEffect(() => { + return () => { + for (const url of liveObjectUrlsRef.current) { + URL.revokeObjectURL(url); + } + }; }, []); // Allow an ancestor pane-level drop zone to forward dropped files into this @@ -200,7 +291,7 @@ const ChatBoxComponent = ({ const { isSubmitDisabled, isSubmitDisabledReason } = useMemo((): { isSubmitDisabled: true, - isSubmitDisabledReason: "empty" | "too-large" | "redirecting" | "generating" | "no-language-model-selected" + isSubmitDisabledReason: "empty" | "too-large" | "redirecting" | "generating" | "no-language-model-selected" | "uploading" | "upload-error" } | { isSubmitDisabled: false, isSubmitDisabledReason: undefined, @@ -213,7 +304,8 @@ const ChatBoxComponent = ({ } } - // Single per-turn bound on the submitted text (prompt + attachments). + // Single per-turn bound on the submitted inline text (prompt + text + // attachments). Image bytes are uploaded as blobs and excluded here. if (getSubmittedTextBytes(text, attachments) > ATTACHMENT_MAX_TURN_TEXT_BYTES) { return { isSubmitDisabled: true, @@ -221,6 +313,27 @@ const ChatBoxComponent = ({ } } + // Block submission until in-flight image uploads finish so their refs + // are available when the message is built. + if (attachments.some((attachment) => attachment.kind === 'image' && attachment.status === 'uploading')) { + return { + isSubmitDisabled: true, + isSubmitDisabledReason: "uploading", + } + } + + // A failed or ref-less image is dropped from `attachmentData` at submit, + // so block (rather than silently sending without it) until it's removed. + if (attachments.some((attachment) => + attachment.kind === 'image' && + (attachment.status === 'error' || !attachment.attachmentId) + )) { + return { + isSubmitDisabled: true, + isSubmitDisabledReason: "upload-error", + } + } + if (isRedirecting) { return { isSubmitDisabled: true, @@ -276,13 +389,27 @@ const ChatBoxComponent = ({ }); } + if (isSubmitDisabledReason === "uploading") { + toast({ + description: "⚠️ Please wait for image uploads to finish", + variant: "destructive", + }); + } + + if (isSubmitDisabledReason === "upload-error") { + toast({ + description: "⚠️ Remove failed image uploads before sending", + variant: "destructive", + }); + } + return; } if (requiresLogin) { sessionStorage.setItem( PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY, - JSON.stringify({ pathname, children: editor.children, attachments: attachments.map(toAttachmentData) }), + JSON.stringify({ pathname, children: editor.children, attachments: getRedirectSafeAttachments(attachments) }), ); captureEvent('wa_askgh_login_wall_prompted', {}); setIsLoginDialogOpen(true); @@ -292,14 +419,31 @@ const ChatBoxComponent = ({ if (requiresUpgrade) { sessionStorage.setItem( PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY, - JSON.stringify({ pathname, children: editor.children, attachments: attachments.map(toAttachmentData) }), + JSON.stringify({ pathname, children: editor.children, attachments: getRedirectSafeAttachments(attachments) }), ); setIsUpsellDialogOpen(true); return; } - _onSubmit(editor.children, editor, attachments.map(toAttachmentData)); - setSubmittedAttachments(attachments); + const attachmentData = attachments + .map(toAttachmentData) + .filter((attachment): attachment is AttachmentData => attachment !== undefined); + + // The persisted message renders images from the serving route (the + // uploader can read their own bytes pre-commit). The preview object URLs + // are kept alive for the `submittedAttachments` redirect tray and revoked + // on unmount (see the cleanup effect above). + _onSubmit(editor.children, editor, attachmentData); + // Replace the prior submitted batch, revoking its preview URLs so they + // don't accumulate across repeated sends in a long-lived chat box. + setSubmittedAttachments((prev) => { + for (const attachment of prev) { + if (attachment.kind === 'image') { + URL.revokeObjectURL(attachment.previewUrl); + } + } + return attachments; + }); setAttachments([]); }, [ isSubmitDisabled, @@ -484,6 +628,11 @@ const ChatBoxComponent = ({ className="mb-1.5" /> )} + {attachments.some((attachment) => attachment.kind === 'image') && !supportsImages && ( +

+ Images won't be sent: the selected model doesn't support image input. +

+ )} {isRedirecting ? ( diff --git a/packages/web/src/features/chat/components/chatBox/chatPaneDropzone.tsx b/packages/web/src/features/chat/components/chatBox/chatPaneDropzone.tsx index f7b5d0f59..5251b6059 100644 --- a/packages/web/src/features/chat/components/chatBox/chatPaneDropzone.tsx +++ b/packages/web/src/features/chat/components/chatBox/chatPaneDropzone.tsx @@ -27,7 +27,10 @@ export const ChatPaneDropzone = ({ onFilesDropped, disabled, className, children const [dragFileCount, setDragFileCount] = useState(0); const { getRootProps, getInputProps, isDragActive, isDragReject } = useDropzone({ - accept: getAttachmentDropzoneAccept(), + // Accept images at the dropzone layer regardless of model capability; + // the chat box's add handler applies the authoritative image gate (and + // surfaces a precise message when the selected model is text-only). + accept: getAttachmentDropzoneAccept(true), multiple: true, noClick: true, noKeyboard: true, @@ -39,7 +42,7 @@ export const ChatPaneDropzone = ({ onFilesDropped, disabled, className, children } if (fileRejections.length > 0) { toast({ - description: `⚠️ Unsupported file type: ${fileRejections.map((rejection) => rejection.file.name).join(', ')}. Text files only.`, + description: `⚠️ Unsupported file type: ${fileRejections.map((rejection) => rejection.file.name).join(', ')}.`, variant: "destructive", }); } diff --git a/packages/web/src/features/chat/constants.ts b/packages/web/src/features/chat/constants.ts index 1306bb7a5..3a504b961 100644 --- a/packages/web/src/features/chat/constants.ts +++ b/packages/web/src/features/chat/constants.ts @@ -18,6 +18,23 @@ export const MCP_OAUTH_DRAFT_SESSION_STORAGE_KEY = 'mcpOAuthDraft'; // files or a large log while leaving room for retrieval, history, and output. export const ATTACHMENT_MAX_TURN_TEXT_BYTES = 256 * 1024; // 256KB per turn +// Fallback client-side image size cap for early rejection before upload. The +// authoritative cap is SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES, fetched via +// `useAttachmentLimits`; this default is only used while that loads or if it +// fails (and matches the server default). +export const ATTACHMENT_MAX_IMAGE_BYTES = 10 * 1024 * 1024; // 10MB per image + +// Upper bound on an image attachment's pixel dimensions (width and height), +// enforced server-side at upload time. Guards against decompression bombs: a +// small-on-disk file that decodes to an enormous raster would otherwise be +// loaded into memory and shipped to the vision model. +export const ATTACHMENT_MAX_IMAGE_DIMENSION = 12000; // px per side + +// Max image (blob) attachments per message. Enforced server-side in +// `commitMessageAttachments` (mirrored client-side for early feedback) to bound +// per-request memory/cost: each image is loaded and sent to the model. +export const ATTACHMENT_MAX_IMAGE_COUNT = 10; + // A plain-text paste at or above either of these thresholds is automatically // converted into a text attachment instead of being inserted inline export const ATTACHMENT_PASTE_AUTO_CONVERT_MIN_CHARS = 1500; @@ -36,6 +53,17 @@ export const ATTACHMENT_ALLOWED_TEXT_MIME_TYPES = [ 'application/toml', ]; +// Allowlist for binary image attachments. Validated server-side by decoding +// the image (never by client MIME/extension). `image/svg+xml` is intentionally +// excluded (XML/script surface). Used client-side only to build the file +// picker's `accept` filter and to gate the image-attach affordance. +export const ATTACHMENT_ALLOWED_IMAGE_MIME_TYPES = [ + 'image/png', + 'image/jpeg', + 'image/webp', + 'image/gif', +] as const; + export const ATTACHMENT_ALLOWED_TEXT_EXTENSIONS = [ 'txt', 'md', 'markdown', 'log', 'csv', 'tsv', 'json', 'jsonl', 'yaml', 'yml', 'toml', 'ini', 'cfg', 'conf', 'env', 'xml', 'html', 'css', 'scss', diff --git a/packages/web/src/features/chat/modelCapabilities.server.test.ts b/packages/web/src/features/chat/modelCapabilities.server.test.ts index 4cd4121bf..3c75cd011 100644 --- a/packages/web/src/features/chat/modelCapabilities.server.test.ts +++ b/packages/web/src/features/chat/modelCapabilities.server.test.ts @@ -104,29 +104,23 @@ describe('resolveModelCapabilities', () => { vi.unstubAllGlobals(); }); - test('fetches the catalog once in the background and resolves capabilities (incl. provider mapping)', async () => { + test('blocks on the first (cold) fetch and then serves capabilities from cache (incl. provider mapping)', async () => { const fetchMock = vi.fn(async () => ({ ok: true, json: async () => catalog, }) as unknown as Response); vi.stubGlobal('fetch', fetchMock); - // The request path never blocks on the fetch: the first lookup kicks off - // the background fetch and falls back to text-only while it's in flight. + // The genuinely-first resolution blocks on the cold fetch (bounded) so + // capabilities resolve correctly instead of silently degrading to + // text-only right after a process start. expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({ - inputModalities: ['text'], - supportedDocumentTypes: [], - }); - - // Once the background fetch settles, lookups resolve from the cached catalog. - await vi.waitFor(async () => { - expect(await resolveModelCapabilities(model('anthropic', 'claude-sonnet-4-5'))).toEqual({ - inputModalities: ['text', 'image'], - supportedDocumentTypes: ['pdf'], - }); + inputModalities: ['text', 'image'], + supportedDocumentTypes: ['pdf'], }); - // Subsequent lookups reuse the cached catalog rather than refetching. + // Subsequent lookups reuse the cached catalog (incl. provider mapping) + // rather than refetching or blocking again. expect(await resolveModelCapabilities(model('google-generative-ai', 'gemini-2.5-pro'))).toEqual({ inputModalities: ['text', 'image', 'audio', 'video'], supportedDocumentTypes: ['pdf'], diff --git a/packages/web/src/features/chat/modelCapabilities.server.ts b/packages/web/src/features/chat/modelCapabilities.server.ts index 87d2cb131..1dc9e19e7 100644 --- a/packages/web/src/features/chat/modelCapabilities.server.ts +++ b/packages/web/src/features/chat/modelCapabilities.server.ts @@ -59,6 +59,8 @@ export const lookupModelCapabilities = ( export const resolveModelCapabilities = async ( config: Pick, ): Promise => { - const catalog = await loadCatalog(); + // Block on the first (cold) fetch so capabilities resolve correctly instead + // of degrading to text-only right after start. Bounded/one-time (see loadCatalog). + const catalog = await loadCatalog({ awaitWhenEmpty: true }); return lookupModelCapabilities(catalog, config); }; diff --git a/packages/web/src/features/chat/modelsDevCatalog.server.ts b/packages/web/src/features/chat/modelsDevCatalog.server.ts index f2344b6f7..b025fb454 100644 --- a/packages/web/src/features/chat/modelsDevCatalog.server.ts +++ b/packages/web/src/features/chat/modelsDevCatalog.server.ts @@ -18,6 +18,10 @@ const CATALOG_TTL_MS = 6 * 60 * 60 * 1000; // refresh attempts to once per interval during a models.dev outage instead of // kicking one off on (nearly) every request. const NEGATIVE_CACHE_MS = 60 * 1000; +// Max time a single `awaitWhenEmpty` caller blocks on the first fetch (well +// under FETCH_TIMEOUT_MS); past it the caller falls back while the fetch +// continues warming the cache in the background. +const COLD_START_BLOCK_BUDGET_MS = 2500; // Sourcebot provider id -> models.dev top-level catalog key. Only providers // whose Sourcebot id differs from the models.dev id need an entry; everything @@ -81,14 +85,15 @@ const fetchCatalog = async (): Promise => { * catalog is returned immediately (even if stale), or null before the first * successful fetch lands, and any refresh settles in the background. * - * Consequences of never awaiting: - * - For the brief window after a cold start (before the first fetch resolves), - * capability resolution falls back to text-only; it self-heals on the next - * request once the background fetch populates the cache. - * - An unreachable catalog (e.g. an airgapped deployment) costs nothing on the - * request path instead of repeatedly paying the fetch timeout. + * By default the request path NEVER blocks on the network: a cold cache returns + * null (text-only fallback) and an unreachable catalog costs nothing. Callers + * needing a correct answer on a cold cache may pass `awaitWhenEmpty: true`, + * which blocks only on the first-ever fetch and only up to + * COLD_START_BLOCK_BUDGET_MS (see below). */ -export const loadCatalog = async (): Promise => { +export const loadCatalog = async ( + { awaitWhenEmpty = false }: { awaitWhenEmpty?: boolean } = {}, +): Promise => { const now = Date.now(); const isFresh = cachedCatalog !== null && now - catalogFetchedAt <= CATALOG_TTL_MS; const isBackingOff = now - lastFailedAt < NEGATIVE_CACHE_MS; @@ -111,6 +116,19 @@ export const loadCatalog = async (): Promise => { }); } + // Block on the first-ever fetch only (`!hasAttempted`), bounded by + // COLD_START_BLOCK_BUDGET_MS, so a cold cache resolves correctly instead of + // silently degrading. After any attempt we never block again (airgapped pays + // at most one short wait per process); the background refresh self-heals. + const hasAttempted = catalogFetchedAt > 0 || lastFailedAt > 0; + if (awaitWhenEmpty && cachedCatalog === null && inFlightFetch && !hasAttempted) { + return Promise.race([ + inFlightFetch, + new Promise((resolve) => + setTimeout(() => resolve(cachedCatalog), COLD_START_BLOCK_BUDGET_MS)), + ]); + } + // Serve whatever we currently have cached (possibly null on a cold start) // and let any in-flight refresh settle in the background. return cachedCatalog; diff --git a/packages/web/src/features/chat/types.ts b/packages/web/src/features/chat/types.ts index ada706a49..4f09fbc57 100644 --- a/packages/web/src/features/chat/types.ts +++ b/packages/web/src/features/chat/types.ts @@ -104,9 +104,10 @@ export type SBChatMessageToolTypes = { }; // A user-provided file attachment. The `text` variant carries the file's -// extracted text inline (used for text/code/structured files); binary -// attachments (images, PDFs) will later add a `blob` variant that references -// stored bytes by id instead of inlining them. +// extracted text inline (used for text/code/structured files). The `blob` +// variant references stored bytes by id (used for binary attachments like +// images that cannot be inlined as text); the bytes live in the StorageBackend +// and never travel in the `messages` JSON. export const textAttachmentSchema = z.object({ kind: z.literal('text'), // Stable, message-persisted handle for the attachment. Carried through from @@ -120,8 +121,18 @@ export const textAttachmentSchema = z.object({ }); export type TextAttachment = z.infer; +export const blobAttachmentSchema = z.object({ + kind: z.literal('blob'), + attachmentId: z.string(), + filename: z.string(), + mediaType: z.string(), + sizeBytes: z.number(), +}); +export type BlobAttachment = z.infer; + export const attachmentDataSchema = z.discriminatedUnion('kind', [ textAttachmentSchema, + blobAttachmentSchema, ]); export type AttachmentData = z.infer; diff --git a/packages/web/src/features/chat/utils.server.ts b/packages/web/src/features/chat/utils.server.ts index 90c83c859..704ed691e 100644 --- a/packages/web/src/features/chat/utils.server.ts +++ b/packages/web/src/features/chat/utils.server.ts @@ -1,12 +1,14 @@ import 'server-only'; import { getAnonymousId } from '@/lib/anonymousId'; -import { Chat, Prisma, PrismaClient, User } from '@sourcebot/db'; +import { AttachmentStatus, Chat, ChatVisibility, Prisma, PrismaClient, User } from '@sourcebot/db'; import { LanguageModel } from '@sourcebot/schemas/v3/languageModel.type'; -import { createLogger, env, loadConfig } from '@sourcebot/shared'; +import { createLogger, env, getStorageBackend, loadConfig } from '@sourcebot/shared'; import fs from 'fs'; import path from 'path'; -import { LanguageModelInfo, SBChatMessage } from './types'; +import { BlobAttachment, LanguageModelInfo, SBChatMessage } from './types'; +import { getUserMessageAttachments } from './utils'; +import { ATTACHMENT_MAX_IMAGE_COUNT } from './constants'; import { resolveModelCapabilities } from './modelCapabilities.server'; import { loadCatalog } from './modelsDevCatalog.server'; import { hasEntitlement } from '@/lib/entitlements'; @@ -16,6 +18,11 @@ import { StatusCodes } from 'http-status-codes'; const logger = createLogger('chat-utils'); +// Thrown inside the attachment-commit transaction when the atomic claim matches +// fewer rows than expected (a concurrent send claimed one first), so the whole +// transaction rolls back and we can surface a typed 400. +class AttachmentClaimConflictError extends Error {} + /** * Returns a FORBIDDEN ServiceError when the deployment lacks the `ask` * entitlement, or null when Ask is available. Gates the generative chat @@ -81,6 +88,207 @@ export const isChatSharedWithUser = async ({ return share !== null; }; +/** + * Resolves a (possibly anonymous) user's access to a chat. This is the single + * source of truth for the "can view this chat" rule, shared by `getChatInfo` + * and the attachment serving route so the two cannot drift: a PUBLIC chat is + * viewable by anyone in the org; a PRIVATE chat only by its owner or users it + * has been explicitly shared with. + */ +export const resolveChatAccess = async ({ + prisma, chat, user, +}: { + prisma: PrismaClient; + chat: Chat; + user: User | undefined; +}): Promise<{ isOwner: boolean; isSharedWithUser: boolean; canView: boolean }> => { + const isOwner = await isOwnerOfChat(chat, user); + const isSharedWithUser = await isChatSharedWithUser({ prisma, chatId: chat.id, userId: user?.id }); + const canView = chat.visibility !== ChatVisibility.PRIVATE || isOwner || isSharedWithUser; + return { isOwner, isSharedWithUser, canView }; +}; + +/** + * Verifies and commits the binary (blob) attachments referenced by the latest + * user message, then links them to the chat. Each referenced `attachmentId` + * must exist in this org, have been uploaded by this user, and still be + * `PENDING` (never trust client ids). Already-linked ids are treated as a + * no-op so re-sends / approval continuations are idempotent. On success the + * blobs are linked via `ChatAttachment` and flipped to `COMMITTED`. + * + * Returns a `ServiceError` to reject the request, or `null` when there is + * nothing to commit / the commit succeeded. + */ +export const commitMessageAttachments = async ({ + prisma, chatId, orgId, userId, message, +}: { + prisma: PrismaClient; + chatId: string; + orgId: number; + userId: string | undefined; + message: Pick | undefined; +}): Promise => { + if (!message) { + return null; + } + + const blobRefs = getUserMessageAttachments(message) + .filter((attachment): attachment is BlobAttachment => attachment.kind === 'blob'); + + if (blobRefs.length === 0) { + return null; + } + + // Authoritative per-message image cap (the client mirror can't be trusted). + if (blobRefs.length > ATTACHMENT_MAX_IMAGE_COUNT) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `You can attach at most ${ATTACHMENT_MAX_IMAGE_COUNT} images per message.`, + } satisfies ServiceError; + } + + // Anonymous users cannot upload binary attachments, so a blob ref from an + // unauthenticated request can only be a forged/replayed id. + if (!userId) { + return { + statusCode: StatusCodes.FORBIDDEN, + errorCode: ErrorCode.INSUFFICIENT_PERMISSIONS, + message: 'Anonymous users cannot attach files.', + } satisfies ServiceError; + } + + const ids = [...new Set(blobRefs.map((ref) => ref.attachmentId))]; + + const [attachments, existingLinks] = await Promise.all([ + prisma.attachment.findMany({ where: { id: { in: ids }, orgId } }), + prisma.chatAttachment.findMany({ where: { chatId, attachmentId: { in: ids } } }), + ]); + + const attachmentById = new Map(attachments.map((attachment) => [attachment.id, attachment])); + const alreadyLinkedIds = new Set(existingLinks.map((link) => link.attachmentId)); + + const idsToCommit: string[] = []; + for (const id of ids) { + // Already linked to this chat (idempotent re-send): nothing to do. + if (alreadyLinkedIds.has(id)) { + continue; + } + + const attachment = attachmentById.get(id); + if ( + !attachment || + attachment.uploadedById !== userId || + attachment.status !== AttachmentStatus.PENDING + ) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: 'Invalid or unauthorized attachment reference.', + } satisfies ServiceError; + } + idsToCommit.push(id); + } + + if (idsToCommit.length > 0) { + try { + await prisma.$transaction(async (tx) => { + // Atomically claim the uploads: only rows still PENDING and owned + // by this user/org flip to COMMITTED. The pre-checks above can go + // stale, so if fewer rows match than we intend to commit, another + // send already claimed one — abort the whole commit. + const claimed = await tx.attachment.updateMany({ + where: { + id: { in: idsToCommit }, + orgId, + uploadedById: userId, + status: AttachmentStatus.PENDING, + }, + data: { status: AttachmentStatus.COMMITTED }, + }); + if (claimed.count !== idsToCommit.length) { + throw new AttachmentClaimConflictError(); + } + await tx.chatAttachment.createMany({ + data: idsToCommit.map((attachmentId) => ({ chatId, attachmentId })), + skipDuplicates: true, + }); + }); + } catch (error) { + if (error instanceof AttachmentClaimConflictError) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: 'Invalid or unauthorized attachment reference.', + } satisfies ServiceError; + } + throw error; + } + } + + return null; +}; + +/** + * Reclaims any of the given attachments that no longer have a `ChatAttachment` + * link, along with their stored bytes. Bytes are never removed by DB cascade, + * so this is the refcount-aware byte sweep invoked after a chat (and its links) + * is deleted. + * + * Uses the tombstone protocol so a transient storage error can never orphan + * bytes: an orphan is first atomically flipped to `DELETING` (the claim doubles + * as the concurrency guard — a row re-linked by a parallel duplicate-chat keeps + * a link and is skipped), then its bytes are deleted inline for low latency, and + * only then is the row removed. Any row whose byte delete fails is left + * `DELETING` for the backend pruner's reclaim sweep to retry. + */ +export const deleteOrphanedAttachments = async ({ + prisma, attachmentIds, +}: { + prisma: PrismaClient; + attachmentIds: string[]; +}): Promise => { + if (attachmentIds.length === 0) { + return; + } + + // Atomically claim the orphans (committed blobs left with zero links) by + // flipping them to DELETING. A row re-linked concurrently still has a link, + // so `chats: { none: {} }` excludes it and it survives untouched. + const claimed = await prisma.attachment.updateMany({ + where: { + id: { in: attachmentIds }, + status: AttachmentStatus.COMMITTED, + chats: { none: {} }, + }, + data: { status: AttachmentStatus.DELETING }, + }); + + if (claimed.count === 0) { + return; + } + + const tombstoned = await prisma.attachment.findMany({ + where: { id: { in: attachmentIds }, status: AttachmentStatus.DELETING }, + select: { id: true, storageKey: true }, + }); + + // Best-effort inline byte delete for low latency. Rows whose bytes are + // confirmed gone are removed now; the rest stay DELETING for the pruner. + const storage = getStorageBackend(); + const settled = await Promise.allSettled( + tombstoned.map((attachment) => storage.delete(attachment.storageKey))); + const reclaimedIds = tombstoned + .filter((_, index) => settled[index].status === 'fulfilled') + .map((attachment) => attachment.id); + + if (reclaimedIds.length > 0) { + await prisma.attachment.deleteMany({ + where: { id: { in: reclaimedIds }, status: AttachmentStatus.DELETING }, + }); + } +}; + export const updateChatMessages = async ({ prisma, chatId, messages, }: { diff --git a/packages/web/src/features/chat/utils.ts b/packages/web/src/features/chat/utils.ts index 15a4907ff..8c1334e99 100644 --- a/packages/web/src/features/chat/utils.ts +++ b/packages/web/src/features/chat/utils.ts @@ -417,6 +417,18 @@ export const getUserMessageAttachments = (message: Pick) .map((part) => part.data); } +// UTF-8 byte size of a message's inlined text (prompt + text-attachment bodies; +// image blobs are referenced by id, not inlined). Server-side counterpart to +// the chat box's `getSubmittedTextBytes` for enforcing the per-turn text budget. +export const getMessageTextBytes = (message: Pick): number => { + const encoder = new TextEncoder(); + const promptBytes = encoder.encode(getUserMessageText(message)).length; + const attachmentBytes = getUserMessageAttachments(message) + .filter((attachment) => attachment.kind === 'text') + .reduce((sum, attachment) => sum + encoder.encode(attachment.text).length, 0); + return promptBytes + attachmentBytes; +} + // Neutralizes ``/`` sequences in a body so it can't // close its own wrapper early. Unrelated markup (e.g. `
`) is left intact. const escapeAttachmentBody = (text: string): string => { diff --git a/packages/web/src/lib/posthogEvents.ts b/packages/web/src/lib/posthogEvents.ts index b6b84f592..9c67d20fe 100644 --- a/packages/web/src/lib/posthogEvents.ts +++ b/packages/web/src/lib/posthogEvents.ts @@ -204,6 +204,18 @@ export type PosthogEventMap = { */ selectedRepos?: string[], }, + chat_attachment_uploaded: { + source: string, + mediaType: string, + sizeBytes: number, + }, + chat_attachment_degraded: { + chatId: string, + source: string, + droppedImageCount: number, + modelProvider: string, + model: string, + }, ask_mcp_turn_completed: { chatId: string, source?: SourcebotWebClientSource,