Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions base/audit_events.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,16 @@ const (
AuditIDISGRAllRead AuditID = 54421

// Documents events
AuditIDDocumentCreate AuditID = 55000
AuditIDDocumentRead AuditID = 55001
AuditIDDocumentUpdate AuditID = 55002
AuditIDDocumentDelete AuditID = 55003
AuditIDDocumentMetadataRead AuditID = 55004
AuditIDDocumentImport AuditID = 55005
AuditIDDocumentResync AuditID = 55006
AuditIDDocumentRevoke AuditID = 55007
AuditIDDocumentCreate AuditID = 55000
AuditIDDocumentRead AuditID = 55001
AuditIDDocumentUpdate AuditID = 55002
AuditIDDocumentDelete AuditID = 55003
AuditIDDocumentMetadataRead AuditID = 55004
AuditIDDocumentImport AuditID = 55005
AuditIDDocumentResync AuditID = 55006
AuditIDDocumentRevoke AuditID = 55007
AuditIDDocumentChannelHistoryCompact AuditID = 55008

// Document attachments events
AuditIDAttachmentCreate AuditID = 55010
AuditIDAttachmentRead AuditID = 55011
Expand Down Expand Up @@ -1157,6 +1159,25 @@ var AuditEvents = events{
FilteringPermitted: true,
EventType: eventTypeData,
},
AuditIDDocumentChannelHistoryCompact: {
Name: "Compact document channel history",
Description: "Document channel history was compacted",
MandatoryFields: AuditFields{
AuditFieldDocID: "document id",
AuditFieldChannels: []string{"list", "of", "channels"},
AuditFieldSequence: "sequence",
},
mandatoryFieldGroups: []fieldGroup{
fieldGroupAuthenticated,
fieldGroupKeyspace,
},
optionalFieldGroups: []fieldGroup{
fieldGroupRequest,
},
EnabledByDefault: false,
FilteringPermitted: true,
EventType: eventTypeData,
},
AuditIDAttachmentCreate: {
Name: "Create attachment",
Description: "A new attachment was created",
Expand Down
1 change: 1 addition & 0 deletions base/audit_events_fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,5 @@ const (
AuditFieldDocIDs = "doc_ids"
AuditFieldFeedType = "feed_type"
AuditFieldIncludeDocs = "include_docs"
AuditFieldSequence = "seq"
)
156 changes: 156 additions & 0 deletions db/crud.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"fmt"
"math"
"net/http"
"slices"
"strings"
"time"

Expand Down Expand Up @@ -196,6 +197,161 @@ func (c *DatabaseCollection) GetDocSyncData(ctx context.Context, docid string) (

}

type ChannelHistory map[string]map[uint64]struct{}

func (ch ChannelHistory) addChannelHistoryEntry(name string, seq uint64) {
if _, ok := ch[name]; !ok {
ch[name] = make(map[uint64]struct{})
}
if _, ok := ch[name][seq]; !ok {
ch[name][seq] = struct{}{}
}
}

func (ch ChannelHistory) getChannelHistoryAsMap() map[string][]uint64 {
response := make(map[string][]uint64)
for chanName, chanEntry := range ch {
response[chanName] = make([]uint64, 0)
for seq, _ := range chanEntry {
response[chanName] = append(response[chanName], seq)
}
slices.Sort(response[chanName])
slices.Reverse(response[chanName])
}
return response
}

// GetDocChannelHistory returns the channel revocation history for the given document as a map
// from channel name to the sequences at which the document was removed from that channel.
// It collects revocation sequences from the active Channels map, the ChannelSet, and the
// ChannelSetHistory (overflow). Only channels that have been revoked at least once appear in
// the result; active memberships with no revocation history are omitted, even though a currently
// assigned channel can still appear if it was revoked and later re-added.
func (c *DatabaseCollection) GetDocChannelHistory(ctx context.Context, docid string) (map[string][]uint64, error) {

chanHistory := make(ChannelHistory)
syncData, err := c.GetDocSyncData(ctx, docid)
if err != nil {
return nil, err
}
for chanName, chanVal := range syncData.Channels {
if chanVal != nil && chanVal.Seq != 0 {
chanHistory.addChannelHistoryEntry(chanName, chanVal.Seq)
}
}
for _, chanSetEntry := range syncData.ChannelSet {
if chanSetEntry.End != 0 {
chanHistory.addChannelHistoryEntry(chanSetEntry.Name, chanSetEntry.End)
}
}
for _, chanSetEntry := range syncData.ChannelSetHistory {
if chanSetEntry.End != 0 {
chanHistory.addChannelHistoryEntry(chanSetEntry.Name, chanSetEntry.End)
}
}

return chanHistory.getChannelHistoryAsMap(), nil
}

// CompactDocChannelHistory removes channel history entries that ended at or before the given sequence number.
// This is used to prune stale channel assignment history to reduce storage overhead.
func (c *DatabaseCollection) CompactDocChannelHistory(ctx context.Context, docid string, seq uint64) ([]string, error) {
key := realDocID(docid)
if key == "" {
return nil, base.HTTPErrorf(400, "Invalid doc ID")
}

xattrKeys := []string{base.SyncXattrName, base.MouXattrName}
rawDoc, xattrs, cas, err := c.dataStore.GetWithXattrs(ctx, key, xattrKeys)
if err != nil {
return nil, err
}

doc, err := c.unmarshalDocumentWithXattrs(ctx, key, nil, xattrs, cas, DocUnmarshalSync)
if err != nil {
return nil, err
}

isSgWrite, crc32Match, _ := doc.IsSGWrite(ctx, rawDoc)
if crc32Match {
c.dbStats().Database().Crc32MatchCount.Add(1)
}

if !isSgWrite {
var importErr error

doc, importErr = c.OnDemandImportForGet(ctx, docid, doc, rawDoc, xattrs, cas)
if importErr != nil {
return nil, importErr
}
if doc == nil {
return nil, fmt.Errorf("skipping compaction of document %s, %v ", base.UD(docid), base.ErrNotFound)
}
Comment on lines +287 to +289
cas = doc.Cas
}

compactedChannels := make(base.Set)

doc.SyncData.ChannelSetHistory = slices.DeleteFunc(doc.SyncData.ChannelSetHistory, func(channel ChannelSetEntry) bool {
del := channel.End <= seq
if del {
compactedChannels.Add(channel.Name)
}
return del
})

doc.SyncData.ChannelSet = slices.DeleteFunc(doc.SyncData.ChannelSet, func(channel ChannelSetEntry) bool {
del := channel.End != 0 && channel.End <= seq
if del {
compactedChannels.Add(channel.Name)
}
return del
})

for chanName, chanEntry := range doc.SyncData.Channels {
if chanEntry != nil && chanEntry.Seq <= seq {
compactedChannels.Add(chanName)
delete(doc.SyncData.Channels, chanName)
}
}

// Exit early if no compaction occurred
if len(compactedChannels) == 0 {
return []string{}, nil
}

rawSyncXattr, err := base.JSONMarshal(doc.SyncData)
if err != nil {
return nil, base.RedactErrorf("failed to marshal sync data when trying to compact channel history for doc:%s. Error: %v", base.UD(docid), err)
}

metadataOnlyUpdate := computeMetadataOnlyUpdate(doc.Cas, doc.metadataOnlyUpdate)

rawMouXattr, err := base.JSONMarshal(metadataOnlyUpdate)
if err != nil {
return nil, base.RedactErrorf("failed to marshal _mou when attempting to compact channel history for doc: %s. Error: %v", base.UD(docid), err)
}

// build macro expansion for sync data. This will avoid the update to xattrs causing an extra import event (i.e. sync cas will be == to doc cas)
opts := &sgbucket.MutateInOptions{}
// Only update _sync.cas and _mou.cas if the pre-compaction doc had already been imported by SGW
opts.MacroExpansion = []sgbucket.MacroExpansionSpec{
Comment on lines +335 to +338
sgbucket.NewMacroExpansionSpec(xattrCasPath(base.MouXattrName), sgbucket.MacroCas),
sgbucket.NewMacroExpansionSpec(xattrCasPath(base.SyncXattrName), sgbucket.MacroCas),
}
opts.PreserveExpiry = true // if doc has expiry, we should preserve this

updatedXattr := map[string][]byte{
base.SyncXattrName: rawSyncXattr,
base.MouXattrName: rawMouXattr,
}
_, err = c.dataStore.UpdateXattrs(ctx, key, 0, cas, updatedXattr, opts)
compactedChannelArray := compactedChannels.ToArray()
slices.Sort(compactedChannelArray)
return compactedChannelArray, err
}

// GetDocSyncDataNoImport returns unmarshalled value of the _sync xattr.
// This gets *just* the Sync Metadata (_sync field) rather than the entire doc, for efficiency
// reasons. Unlike GetDocSyncData it does not check for on-demand import; this means it does not
// need to read the doc body from the bucket.
Expand Down
4 changes: 4 additions & 0 deletions docs/api/admin.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ paths:
$ref: './paths/admin/db-_index_init.yaml'
'/{keyspace}/_purge':
$ref: './paths/admin/keyspace-_purge.yaml'
'/{keyspace}/_channel_history/{docid}':
$ref: './paths/admin/keyspace-_channel_history.yaml'
'/{keyspace}/_channel_history/{docid}/compact':
$ref: './paths/admin/keyspace-_channel_history-compact.yaml'
'/{db}/_flush':
$ref: './paths/admin/db-_flush.yaml'
'/{db}/_online':
Expand Down
6 changes: 6 additions & 0 deletions docs/api/components/responses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ Unauthorized:
application/json:
schema:
$ref: ./schemas.yaml#/HTTP-Error
Unauthorized-database:
description: User does not have access to the database resource, or database resource does not exist
content:
application/json:
schema:
$ref: ./schemas.yaml#/HTTP-Error
Conflict:
description: Resource already exists under that name
content:
Expand Down
74 changes: 74 additions & 0 deletions docs/api/paths/admin/keyspace-_channel_history-compact.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright 2026-Present Couchbase, Inc.
#
# Use of this software is governed by the Business Source License included
# in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
# in that file, in accordance with the Business Source License, use of this
# software will be governed by the Apache License, Version 2.0, included in
# the file licenses/APL2.txt.
parameters:
- $ref: ../../components/parameters.yaml#/keyspace
- $ref: ../../components/parameters.yaml#/docid
post:
summary: Compact Channel History of Document
description: |-
Compacts channel history for a specified document. Channel history older than the specified sequence will be removed.

This endpoint removes all channel entries (for sequences before the specified sequence number where the document left the channel),
effectively cleaning up historical channel membership information while preserving active channels and recent changes.
This can be useful for reducing metadata size for documents that frequently gain and lose access to channels.
Comment on lines +14 to +18

Required Sync Gateway RBAC roles:

* Sync Gateway Application
requestBody:
content:
application/json:
schema:
type: object
required:
- seq
properties:
seq:
description: |-
Channel history having end sequences earlier than this sequence will be removed from the specified document's metadata.
type: integer
Comment on lines +32 to +34
format: int64
minimum: 1
example: 12345
responses:
'200':
description: |-
Successfully compacted channel history from the specified document.
Returns a list of channels that were compacted.

If the response has an empty array, it means either no channels were compacted.

content:
application/json:
schema:
type: object
additionalProperties:
x-additionalPropertiesName: doc_id
type: array
items:
type: string
description: |-
Array of channel names that were compacted.
description: |-
A array of all the compacted channels
example:
compacted_channels:
- channel1
- channel2
Comment on lines +48 to +62
'400':
description: 'Bad request. This could be due to invalid request parameters such as invalid seq value.'
content:
application/json:
schema:
$ref: ../../components/schemas.yaml#/HTTP-Error
'403':
$ref: ../../components/responses.yaml#/Unauthorized-database

tags:
- Document
operationId: post_keyspace-_history-compact
Comment on lines +72 to +74
62 changes: 62 additions & 0 deletions docs/api/paths/admin/keyspace-_channel_history.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright 2026-Present Couchbase, Inc.
#
# Use of this software is governed by the Business Source License included
# in the file licenses/BSL-Couchbase.txt. As of the Change Date specified
# in that file, in accordance with the Business Source License, use of this
# software will be governed by the Apache License, Version 2.0, included in
# the file licenses/APL2.txt.
parameters:
- $ref: ../../components/parameters.yaml#/keyspace
- $ref: ../../components/parameters.yaml#/docid
get:
summary: Get Channel History of Document
description: |-
Returns the channel revocation history for the specified document as a map of channel
names to the array of sequences at which the document was removed from each channel. Only channels
that have been revoked at least once are included; channels the document is currently
assigned may be included if they were previously revoked.

Multiple sequences for a given channel indicate that the document has lost access to that channel
more than once — each sequence represents a point in time at which the document was removed from
the channel.

Required Sync Gateway RBAC roles:

* Sync Gateway Application
* Sync Gateway Application Read Only
responses:
'200':
description: |-
Successfully retrieved the channel revocation history for the specified document.
Returns a JSON object mapping each channel name to an array of sequences at which
the document was removed from that channel.
content:
application/json:
schema:
type: object
additionalProperties:
type: array
items:
type: integer
Comment on lines +36 to +40
format: int64
minimum: 1
description: Sequences at which the document was removed from this channel.
description: Map of channel names to their revocation sequences.
example:
channel1:
- 3
- 7
channel2:
- 5
'404':
description: Document not found.
content:
application/json:
schema:
$ref: ../../components/schemas.yaml#/HTTP-Error
'403':
$ref: ../../components/responses.yaml#/Unauthorized-database

tags:
- Document
operationId: get_keyspace-_channel_history
Loading
Loading