diff --git a/CHANGELOG.md b/CHANGELOG.md index 3df571d5e..32a565fc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added language model input-modality and document capability resolution, automatically resolved from the models.dev catalog (falls back to text-only for uncatalogued/self-hosted models). [#1372](https://github.com/sourcebot-dev/sourcebot/pull/1372) - [EE] Added DPoP sender-constrained OAuth tokens for MCP clients. [#1395](https://github.com/sourcebot-dev/sourcebot/pull/1395) - [EE] Added text file attachments to Ask Sourcebot, letting users attach text/code/config files to a chat message via the paperclip button, drag-and-drop, or paste, with large pastes auto-converted to attachments. [#1374](https://github.com/sourcebot-dev/sourcebot/pull/1374) +- [EE] Added image attachments to Ask Sourcebot, letting users attach images to a chat message when the selected model supports image input. [#1375](https://github.com/sourcebot-dev/sourcebot/pull/1375) ### Fixed - Send anonymous server-side PostHog events as personless so unauthenticated requests don't inflate person counts. [#1367](https://github.com/sourcebot-dev/sourcebot/pull/1367) diff --git a/docs/docs/configuration/environment-variables.mdx b/docs/docs/configuration/environment-variables.mdx index 2d4c57acb..68e94c19b 100644 --- a/docs/docs/configuration/environment-variables.mdx +++ b/docs/docs/configuration/environment-variables.mdx @@ -40,6 +40,8 @@ The following environment variables allow you to configure your Sourcebot deploy | `SOURCEBOT_TELEMETRY_DISABLED` | `false` |
Enables/disables telemetry collection in Sourcebot. See [this doc](/docs/misc/telemetry) for more info.
| | `DEFAULT_MAX_MATCH_COUNT` | `10000` |The default maximum number of search results to return when using search in the web app.
| | `ALWAYS_INDEX_FILE_PATTERNS` | - |A comma separated list of glob patterns matching file paths that should always be indexed, regardless of size or number of trigrams.
| +| `SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES` | `10485760` (10 MiB) |Maximum size in bytes of a single image attachment uploaded to Ask Sourcebot. Enforced server-side at upload time.
| +| `SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS` | `24` |How long in hours an uploaded-but-unsent attachment is retained before being deleted by the orphan sweep. Set to `0` to disable the sweep.
| | `NODE_USE_ENV_PROXY` | `0` |Enables Node.js to automatically use `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` environment variables for network requests. Set to `1` to enable or `0` to disable. See [this doc](https://nodejs.org/en/learn/http/enterprise-network-configuration) for more info.
| | `HTTP_PROXY` | - |HTTP proxy URL for routing non-SSL requests through a proxy server (e.g., `http://proxy.company.com:8080`). Requires `NODE_USE_ENV_PROXY=1`.
| | `HTTPS_PROXY` | - |HTTPS proxy URL for routing SSL requests through a proxy server (e.g., `http://proxy.company.com:8080`). Requires `NODE_USE_ENV_PROXY=1`.
| diff --git a/packages/backend/src/attachmentPruner.ts b/packages/backend/src/attachmentPruner.ts new file mode 100644 index 000000000..ffeb534c3 --- /dev/null +++ b/packages/backend/src/attachmentPruner.ts @@ -0,0 +1,156 @@ +import { AttachmentStatus, PrismaClient } from "@sourcebot/db"; +import { createLogger, env, getStorageBackend } from "@sourcebot/shared"; +import { setIntervalAsync } from "./utils.js"; + +const BATCH_SIZE = 1_000; +const ONE_HOUR_MS = 60 * 60 * 1000; + +const logger = createLogger('attachment-pruner'); + +/** + * Periodically reclaims orphaned attachment blobs older than the configured TTL, + * along with their stored bytes, using the `DELETING` tombstone protocol: an + * orphan is first atomically flipped to `DELETING`, then its bytes are deleted, + * and only then is the row removed. Because the row (the only durable handle to + * the bytes) outlives the byte delete, a failed byte delete is always retryable. + * + * Each tick condemns two classes of orphan to `DELETING`, then reclaims all + * tombstones: + * + * 1. PENDING (uploaded-but-never-linked): produced when a user selects a file + * in the chat box but never sends the message. + * 2. COMMITTED with zero links: normally a committed blob is reclaimed inline + * by the chat-delete sweep in the web app, but if that sweep is interrupted + * (process crash / DB error / failed byte delete) the blob is left tombstoned + * or unlinked. This is the backstop for that case. + * + * @note Byte deletion goes through the shared `StorageBackend`, so the web app + * and this worker share one on-disk layout. + */ +export class AttachmentPruner { + private interval?: NodeJS.Timeout; + private readonly storage = getStorageBackend(); + + constructor(private db: PrismaClient) {} + + startScheduler() { + const ttlHours = env.SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS; + if (ttlHours <= 0) { + logger.info('SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS is 0, attachment orphan pruning is disabled.'); + return; + } + + logger.debug(`Attachment pruner started. Reclaiming orphaned attachments older than ${ttlHours} hours.`); + + // Run immediately on startup, then every hour. The startup call isn't + // awaited, so log any failure here: this worker exits on + // unhandledRejection, and the recurring schedule will retry. + this.pruneOrphanedAttachments().catch((error) => { + logger.warn(`Initial attachment prune failed: ${error}`); + }); + this.interval = setIntervalAsync(() => this.pruneOrphanedAttachments(), ONE_HOUR_MS); + } + + async dispose() { + if (this.interval) { + clearInterval(this.interval); + this.interval = undefined; + } + } + + private async pruneOrphanedAttachments() { + const cutoff = new Date(Date.now() - env.SOURCEBOT_CHAT_ATTACHMENT_ORPHAN_TTL_HOURS * ONE_HOUR_MS); + + // Condemn orphans by flipping them to the DELETING tombstone. Each claim + // is atomic, so a PENDING blob committed by a concurrent send (its commit + // matches only PENDING rows) or a zero-link blob re-linked by a concurrent + // duplicate-chat loses the claim and is left intact. + // + // PENDING orphans: uploaded but the message was never sent. + const pendingClaimed = await this.db.attachment.updateMany({ + where: { + status: AttachmentStatus.PENDING, + createdAt: { lt: cutoff }, + }, + data: { status: AttachmentStatus.DELETING }, + }); + + // COMMITTED orphans: blobs left with zero links by an interrupted + // chat-delete sweep in the web app. + const committedClaimed = await this.db.attachment.updateMany({ + where: { + status: AttachmentStatus.COMMITTED, + createdAt: { lt: cutoff }, + chats: { none: {} }, + }, + data: { status: AttachmentStatus.DELETING }, + }); + + // Reclaim every tombstone: delete bytes, then the row. This also picks up + // tombstones left behind by the web app's inline reclaim (or a crashed + // earlier tick) whose byte delete failed. + const reclaimed = await this.reclaimTombstonedAttachments(); + + if (pendingClaimed.count > 0 || committedClaimed.count > 0 || reclaimed > 0) { + logger.debug( + `Attachment prune: condemned ${pendingClaimed.count} PENDING + ` + + `${committedClaimed.count} COMMITTED orphan(s), reclaimed ${reclaimed} tombstone(s).`, + ); + } + } + + /** + * Deletes the bytes for every `DELETING` tombstone, then removes the row. + * The row (the only durable handle to the bytes) is removed only after its + * bytes are confirmed gone, so a failed byte delete leaves the tombstone in + * place to be retried on the next tick — bytes can never be orphaned by a + * transient storage error. Rows whose byte delete fails this run are + * excluded from subsequent batches so a persistent failure can't spin the + * loop. + * + * @returns the number of tombstones fully reclaimed (bytes + row). + */ + private async reclaimTombstonedAttachments(): Promise
- {text}
-
+ {imageSrc ? (
+ // eslint-disable-next-line @next/next/no-img-element
+
+ {text}
+
+ )}
)
diff --git a/packages/web/src/features/chat/components/chatBox/chatBox.tsx b/packages/web/src/features/chat/components/chatBox/chatBox.tsx
index 1bd33235f..a7dd7fd31 100644
--- a/packages/web/src/features/chat/components/chatBox/chatBox.tsx
+++ b/packages/web/src/features/chat/components/chatBox/chatBox.tsx
@@ -5,7 +5,7 @@ import { Button } from "@/components/ui/button";
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip";
import { AttachmentData, CustomEditor, MentionElement, RenderElementPropsFor, SearchScope } from "@/features/chat/types";
import { insertMention, slateContentToString } from "@/features/chat/utils";
-import { createPastedTextAttachment, getSubmittedTextBytes, PendingAttachment, readFilesAsAttachments, shouldAutoConvertPaste, toAttachmentData } from "@/features/chat/attachmentUtils";
+import { createPastedTextAttachment, getSubmittedTextBytes, PendingAttachment, PendingImageAttachment, readFilesAsAttachments, shouldAutoConvertPaste, toAttachmentData, uploadImageAttachment } from "@/features/chat/attachmentUtils";
import { AttachmentButton } from "./attachmentButton";
import { AttachmentTray } from "./attachmentTray";
import { cn } from "@/lib/utils";
@@ -26,7 +26,7 @@ import { SearchContextQuery } from "@/lib/types";
import isEqual from "fast-deep-equal/react";
import { LoginDialog } from "./loginDialog";
import { usePathname } from "next/navigation";
-import { ATTACHMENT_MAX_TURN_TEXT_BYTES, PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY } from "@/features/chat/constants";
+import { ATTACHMENT_MAX_IMAGE_BYTES, ATTACHMENT_MAX_TURN_TEXT_BYTES, PENDING_CHAT_SUBMISSION_SESSION_STORAGE_KEY } from "@/features/chat/constants";
import useCaptureEvent from "@/hooks/useCaptureEvent";
import { useHasEntitlement } from "@/features/entitlements/useHasEntitlement";
import { UpsellDialog } from "@/features/billing/upsellDialog";
@@ -35,6 +35,15 @@ export interface ChatBoxHandle {
addFiles: (files: File[]) => void;
}
+// Only inline-text attachments survive the login/upgrade redirect: image blobs
+// require an authenticated, entitled upload, so a redirected sender can't have
+// one, and a stashed blob ref would only fail to commit on re-submit.
+const getRedirectSafeAttachments = (attachments: PendingAttachment[]): AttachmentData[] => {
+ return attachments
+ .map(toAttachmentData)
+ .filter((attachment): attachment is AttachmentData => attachment?.kind === 'text');
+}
+
interface ChatBoxProps {
onSubmit: (children: Descendant[], editor: CustomEditor, attachments: AttachmentData[]) => void;
onStop?: () => void;
@@ -48,6 +57,10 @@ interface ChatBoxProps {
searchContexts: SearchContextQuery[];
isLoginWallEnabled: boolean;
isAuthenticated: boolean;
+ // Authoritative per-image byte cap from the server
+ // (SOURCEBOT_CHAT_ATTACHMENT_MAX_IMAGE_BYTES), threaded down for early
+ // client-side rejection. Defaults to the constant when not provided.
+ maxImageBytes?: number;
}
const ChatBoxComponent = ({
@@ -63,6 +76,7 @@ const ChatBoxComponent = ({
isAuthenticated,
selectedSearchScopes,
searchContexts,
+ maxImageBytes = ATTACHMENT_MAX_IMAGE_BYTES,
}: ChatBoxProps, ref: Ref+ Images won't be sent: the selected model doesn't support image input. +
+ )}