From 1961bb5cc93157b2a79ade90a733acd6fa6c5284 Mon Sep 17 00:00:00 2001 From: Mick Letofsky Date: Fri, 27 Feb 2026 18:07:49 +0100 Subject: [PATCH] Craft density modeling for Seeded vaults (#7102) --- .../CreateCollectionsStepTests.cs | 214 ++++++++++++++++++ .../DensityModel/CreateGroupsStepTests.cs | 138 +++++++++++ util/Seeder/CLAUDE.md | 19 ++ .../Distributions/PermissionDistributions.cs | 118 ++++++++++ .../Seeder/Data/Enums/CipherCollectionSkew.cs | 6 + .../Data/Enums/CollectionFanOutShape.cs | 6 + .../Data/Enums/MembershipDistributionShape.cs | 6 + util/Seeder/Data/Enums/PermissionWeight.cs | 6 + util/Seeder/Models/SeedPreset.cs | 1 + util/Seeder/Models/SeedPresetDensity.cs | 65 ++++++ util/Seeder/Options/DensityProfile.cs | 62 +++++ .../Options/OrganizationVaultOptions.cs | 6 + util/Seeder/Pipeline/PresetLoader.cs | 63 +++++- .../Pipeline/RecipeBuilderExtensions.cs | 14 +- util/Seeder/Pipeline/RecipeOrchestrator.cs | 6 +- .../fixtures/presets/validation/README.md | 153 +++++++++++++ .../density-modeling-empty-groups-test.json | 21 ++ .../density-modeling-mega-group-test.json | 27 +++ .../density-modeling-no-density-test.json | 12 + .../density-modeling-power-law-test.json | 29 +++ util/Seeder/Seeds/schemas/preset.schema.json | 112 ++++++++- util/Seeder/Steps/CreateCollectionsStep.cs | 178 +++++++++++++-- util/Seeder/Steps/CreateGroupsStep.cs | 118 +++++++++- util/Seeder/Steps/GenerateCiphersStep.cs | 76 +++++-- 24 files changed, 1390 insertions(+), 66 deletions(-) create mode 100644 test/SeederApi.IntegrationTest/DensityModel/CreateCollectionsStepTests.cs create mode 100644 test/SeederApi.IntegrationTest/DensityModel/CreateGroupsStepTests.cs create mode 100644 util/Seeder/Data/Distributions/PermissionDistributions.cs create mode 100644 util/Seeder/Data/Enums/CipherCollectionSkew.cs create mode 100644 util/Seeder/Data/Enums/CollectionFanOutShape.cs create mode 100644 util/Seeder/Data/Enums/MembershipDistributionShape.cs create mode 100644 util/Seeder/Data/Enums/PermissionWeight.cs create mode 100644 util/Seeder/Models/SeedPresetDensity.cs create mode 100644 util/Seeder/Options/DensityProfile.cs create mode 100644 util/Seeder/Seeds/fixtures/presets/validation/README.md create mode 100644 util/Seeder/Seeds/fixtures/presets/validation/density-modeling-empty-groups-test.json create mode 100644 util/Seeder/Seeds/fixtures/presets/validation/density-modeling-mega-group-test.json create mode 100644 util/Seeder/Seeds/fixtures/presets/validation/density-modeling-no-density-test.json create mode 100644 util/Seeder/Seeds/fixtures/presets/validation/density-modeling-power-law-test.json diff --git a/test/SeederApi.IntegrationTest/DensityModel/CreateCollectionsStepTests.cs b/test/SeederApi.IntegrationTest/DensityModel/CreateCollectionsStepTests.cs new file mode 100644 index 0000000000..630d7f56a9 --- /dev/null +++ b/test/SeederApi.IntegrationTest/DensityModel/CreateCollectionsStepTests.cs @@ -0,0 +1,214 @@ +using Bit.Core.Entities; +using Bit.Seeder.Data.Distributions; +using Bit.Seeder.Data.Enums; +using Bit.Seeder.Options; +using Bit.Seeder.Steps; +using Xunit; + +namespace Bit.SeederApi.IntegrationTest.DensityModel; + +public class CreateCollectionsStepTests +{ + private static readonly List _collectionIds = + [.. Enumerable.Range(1, 10).Select(i => new Guid($"00000000-0000-0000-0000-{i:D12}"))]; + + private static readonly List _groupIds = + [.. Enumerable.Range(1, 5).Select(i => new Guid($"11111111-0000-0000-0000-{i:D12}"))]; + + private static readonly List _userIds = + [.. Enumerable.Range(1, 20).Select(i => new Guid($"22222222-0000-0000-0000-{i:D12}"))]; + + private static readonly Distribution EvenPermissions = new( + (PermissionWeight.ReadOnly, 0.25), + (PermissionWeight.ReadWrite, 0.25), + (PermissionWeight.Manage, 0.25), + (PermissionWeight.HidePasswords, 0.25)); + + [Fact] + public void ApplyGroupPermissions_EvenSplit_DistributesAllFourTypes() + { + var assignments = Enumerable.Range(0, 100) + .Select(_ => new CollectionGroup { CollectionId = Guid.NewGuid(), GroupId = Guid.NewGuid() }) + .ToList(); + + CreateCollectionsStep.ApplyGroupPermissions(assignments, EvenPermissions); + + Assert.Equal(25, assignments.Count(a => a.ReadOnly)); + Assert.Equal(25, assignments.Count(a => a.Manage)); + Assert.Equal(25, assignments.Count(a => a.HidePasswords)); + Assert.Equal(25, assignments.Count(a => !a.ReadOnly && !a.Manage && !a.HidePasswords)); + } + + [Fact] + public void ApplyGroupPermissions_MutuallyExclusiveFlags() + { + var assignments = Enumerable.Range(0, 100) + .Select(_ => new CollectionGroup { CollectionId = Guid.NewGuid(), GroupId = Guid.NewGuid() }) + .ToList(); + + CreateCollectionsStep.ApplyGroupPermissions(assignments, EvenPermissions); + + Assert.All(assignments, a => + { + var flagCount = (a.ReadOnly ? 1 : 0) + (a.HidePasswords ? 1 : 0) + (a.Manage ? 1 : 0); + Assert.True(flagCount <= 1, "At most one permission flag should be true"); + }); + } + + [Fact] + public void ApplyGroupPermissions_ReadOnlyHeavy_MajorityAreReadOnly() + { + var assignments = Enumerable.Range(0, 100) + .Select(_ => new CollectionGroup { CollectionId = Guid.NewGuid(), GroupId = Guid.NewGuid() }) + .ToList(); + + CreateCollectionsStep.ApplyGroupPermissions(assignments, PermissionDistributions.Enterprise); + + var readOnlyCount = assignments.Count(a => a.ReadOnly); + Assert.True(readOnlyCount >= 80, $"Expected >= 80 ReadOnly, got {readOnlyCount}"); + } + + [Fact] + public void ApplyUserPermissions_EvenSplit_DistributesAllFourTypes() + { + var assignments = Enumerable.Range(0, 100) + .Select(_ => new CollectionUser { CollectionId = Guid.NewGuid(), OrganizationUserId = Guid.NewGuid() }) + .ToList(); + + CreateCollectionsStep.ApplyUserPermissions(assignments, EvenPermissions); + + Assert.Equal(25, assignments.Count(a => a.ReadOnly)); + Assert.Equal(25, assignments.Count(a => a.Manage)); + Assert.Equal(25, assignments.Count(a => a.HidePasswords)); + Assert.Equal(25, assignments.Count(a => !a.ReadOnly && !a.Manage && !a.HidePasswords)); + } + + [Fact] + public void BuildCollectionGroups_ClampsToAvailableGroups() + { + var twoGroups = _groupIds.Take(2).ToList(); + var step = CreateStep(CollectionFanOutShape.Uniform, min: 5, max: 5); + + var result = step.BuildCollectionGroups(_collectionIds, twoGroups); + + Assert.All(result, cg => Assert.Contains(cg.GroupId, twoGroups)); + Assert.Equal(20, result.Count); + } + + [Fact] + public void BuildCollectionGroups_NoDuplicateGroupPerCollection() + { + var step = CreateStep(CollectionFanOutShape.Uniform, min: 3, max: 3); + + var result = step.BuildCollectionGroups(_collectionIds, _groupIds); + + foreach (var collectionId in _collectionIds) + { + var groupsForCollection = result.Where(cg => cg.CollectionId == collectionId) + .Select(cg => cg.GroupId).ToList(); + Assert.Equal(groupsForCollection.Count, groupsForCollection.Distinct().Count()); + } + } + + [Fact] + public void BuildCollectionGroups_Uniform_AssignsGroupsToEveryCollection() + { + var step = CreateStep(CollectionFanOutShape.Uniform, min: 2, max: 2); + + var result = step.BuildCollectionGroups(_collectionIds, _groupIds); + + Assert.Equal(20, result.Count); + Assert.All(result, cg => Assert.Contains(cg.GroupId, _groupIds)); + } + + [Fact] + public void BuildCollectionUsers_AllCollectionIdsAreValid() + { + var result = CreateCollectionsStep.BuildCollectionUsers(_collectionIds, _userIds, 10); + + Assert.All(result, cu => Assert.Contains(cu.CollectionId, _collectionIds)); + } + + [Fact] + public void BuildCollectionUsers_AssignsOneToThreeCollectionsPerUser() + { + var result = CreateCollectionsStep.BuildCollectionUsers(_collectionIds, _userIds, 10); + + var perUser = result.GroupBy(cu => cu.OrganizationUserId).ToList(); + Assert.All(perUser, group => Assert.InRange(group.Count(), 1, 3)); + } + + [Fact] + public void BuildCollectionUsers_RespectsDirectUserCount() + { + var result = CreateCollectionsStep.BuildCollectionUsers(_collectionIds, _userIds, 5); + + var distinctUsers = result.Select(cu => cu.OrganizationUserId).Distinct().ToList(); + Assert.Equal(5, distinctUsers.Count); + } + + [Fact] + public void ComputeFanOut_FrontLoaded_FirstTenPercentGetMax() + { + var step = CreateStep(CollectionFanOutShape.FrontLoaded, min: 1, max: 5); + + Assert.Equal(5, step.ComputeFanOut(0, 100, 1, 5)); + Assert.Equal(5, step.ComputeFanOut(9, 100, 1, 5)); + Assert.Equal(1, step.ComputeFanOut(10, 100, 1, 5)); + Assert.Equal(1, step.ComputeFanOut(99, 100, 1, 5)); + } + + [Fact] + public void ComputeFanOut_MinEqualsMax_AlwaysReturnsMin() + { + var step = CreateStep(CollectionFanOutShape.Uniform, min: 3, max: 3); + + Assert.Equal(3, step.ComputeFanOut(0, 10, 3, 3)); + Assert.Equal(3, step.ComputeFanOut(5, 10, 3, 3)); + Assert.Equal(3, step.ComputeFanOut(9, 10, 3, 3)); + } + + [Fact] + public void ComputeFanOut_PowerLaw_FirstCollectionGetsMax() + { + var step = CreateStep(CollectionFanOutShape.PowerLaw, min: 1, max: 5); + + Assert.Equal(5, step.ComputeFanOut(0, 100, 1, 5)); + } + + [Fact] + public void ComputeFanOut_PowerLaw_LaterCollectionsDecay() + { + var step = CreateStep(CollectionFanOutShape.PowerLaw, min: 1, max: 5); + + var first = step.ComputeFanOut(0, 100, 1, 5); + var middle = step.ComputeFanOut(50, 100, 1, 5); + var last = step.ComputeFanOut(99, 100, 1, 5); + + Assert.True(first > middle, "First collection should have more fan-out than middle"); + Assert.True(middle >= last, "Middle should have >= fan-out than last"); + Assert.True(last >= 1, "Last collection should have at least min fan-out"); + } + + [Fact] + public void ComputeFanOut_Uniform_CyclesThroughRange() + { + var step = CreateStep(CollectionFanOutShape.Uniform, min: 1, max: 3); + + Assert.Equal(1, step.ComputeFanOut(0, 10, 1, 3)); + Assert.Equal(2, step.ComputeFanOut(1, 10, 1, 3)); + Assert.Equal(3, step.ComputeFanOut(2, 10, 1, 3)); + Assert.Equal(1, step.ComputeFanOut(3, 10, 1, 3)); + } + + private static CreateCollectionsStep CreateStep(CollectionFanOutShape shape, int min, int max) + { + var density = new DensityProfile + { + FanOutShape = shape, + CollectionFanOutMin = min, + CollectionFanOutMax = max + }; + return CreateCollectionsStep.FromCount(0, density); + } +} diff --git a/test/SeederApi.IntegrationTest/DensityModel/CreateGroupsStepTests.cs b/test/SeederApi.IntegrationTest/DensityModel/CreateGroupsStepTests.cs new file mode 100644 index 0000000000..81e12f4cce --- /dev/null +++ b/test/SeederApi.IntegrationTest/DensityModel/CreateGroupsStepTests.cs @@ -0,0 +1,138 @@ +using Bit.Seeder.Data.Enums; +using Bit.Seeder.Options; +using Bit.Seeder.Steps; +using Xunit; + +namespace Bit.SeederApi.IntegrationTest.DensityModel; + +public class CreateGroupsStepTests +{ + [Fact] + public void ComputeUsersPerGroup_MegaGroup_GroupZeroDoesNotParticipateInRemainder() + { + var step = CreateStep(MembershipDistributionShape.MegaGroup, skew: 0.5); + + var allocations = step.ComputeUsersPerGroup(5, 100); + + var megaFraction = 0.5 + 0.5 * 0.45; // 0.725 + var expectedMega = (int)(100 * megaFraction); // 72 + Assert.Equal(expectedMega, allocations[0]); + } + + [Fact] + public void ComputeUsersPerGroup_MegaGroup_RemainderGoesToNonMegaGroups() + { + var step = CreateStep(MembershipDistributionShape.MegaGroup, skew: 0.5); + + var allocations = step.ComputeUsersPerGroup(5, 100); + + var nonMegaTotal = allocations[1] + allocations[2] + allocations[3] + allocations[4]; + Assert.Equal(100 - allocations[0], nonMegaTotal); + Assert.True(allocations[1] > 0, "Non-mega groups should have members"); + } + + [Fact] + public void ComputeUsersPerGroup_MegaGroup_SingleGroup_AllUsersAssigned() + { + var step = CreateStep(MembershipDistributionShape.MegaGroup, skew: 0.9); + + var allocations = step.ComputeUsersPerGroup(1, 100); + + Assert.Single(allocations); + Assert.Equal(100, allocations[0]); + } + + [Fact] + public void ComputeUsersPerGroup_MegaGroup_SingleUser_SingleGroup() + { + var step = CreateStep(MembershipDistributionShape.MegaGroup, skew: 1.0); + + var allocations = step.ComputeUsersPerGroup(1, 1); + + Assert.Equal(1, allocations[0]); + } + + [Fact] + public void ComputeUsersPerGroup_MegaGroup_SumsToUserCount() + { + var step = CreateStep(MembershipDistributionShape.MegaGroup, skew: 0.8); + + var allocations = step.ComputeUsersPerGroup(10, 100); + + Assert.Equal(100, allocations.Sum()); + } + + [Fact] + public void ComputeUsersPerGroup_PowerLaw_FirstGroupIsLargest() + { + var step = CreateStep(MembershipDistributionShape.PowerLaw, skew: 0.8); + + var allocations = step.ComputeUsersPerGroup(10, 100); + + Assert.Equal(allocations.Max(), allocations[0]); + } + + [Fact] + public void ComputeUsersPerGroup_PowerLaw_HighSkewMoreConcentrated() + { + var gentle = CreateStep(MembershipDistributionShape.PowerLaw, skew: 0.0); + var steep = CreateStep(MembershipDistributionShape.PowerLaw, skew: 1.0); + + var gentleAllocations = gentle.ComputeUsersPerGroup(10, 100); + var steepAllocations = steep.ComputeUsersPerGroup(10, 100); + + Assert.True(steepAllocations[0] > gentleAllocations[0], + $"Steep skew group 0 ({steepAllocations[0]}) should be larger than gentle ({gentleAllocations[0]})"); + } + + [Fact] + public void ComputeUsersPerGroup_PowerLaw_MoreGroupsThanUsers_NoNegativeAllocations() + { + var step = CreateStep(MembershipDistributionShape.PowerLaw, skew: 1.0); + + var allocations = step.ComputeUsersPerGroup(20, 5); + + Assert.All(allocations, a => Assert.True(a >= 0, $"Allocation should be >= 0, got {a}")); + Assert.Equal(5, allocations.Sum()); + } + + [Fact] + public void ComputeUsersPerGroup_PowerLaw_SumsToUserCount() + { + var step = CreateStep(MembershipDistributionShape.PowerLaw, skew: 0.5); + + var allocations = step.ComputeUsersPerGroup(10, 100); + + Assert.Equal(100, allocations.Sum()); + } + + [Fact] + public void ComputeUsersPerGroup_Uniform_EvenDistribution() + { + var step = CreateStep(MembershipDistributionShape.Uniform); + + var allocations = step.ComputeUsersPerGroup(5, 100); + + Assert.All(allocations, a => Assert.Equal(20, a)); + } + + [Fact] + public void ComputeUsersPerGroup_Uniform_SumsToUserCount() + { + var step = CreateStep(MembershipDistributionShape.Uniform); + + var allocations = step.ComputeUsersPerGroup(7, 100); + + Assert.Equal(100, allocations.Sum()); + } + + private static CreateGroupsStep CreateStep(MembershipDistributionShape shape, double skew = 0.0) + { + var density = new DensityProfile + { + MembershipShape = shape, + MembershipSkew = skew + }; + return new CreateGroupsStep(0, density); + } +} diff --git a/util/Seeder/CLAUDE.md b/util/Seeder/CLAUDE.md index 5ea8211f81..f8006eaca6 100644 --- a/util/Seeder/CLAUDE.md +++ b/util/Seeder/CLAUDE.md @@ -49,6 +49,25 @@ Need to create test data? See `Pipeline/` folder for implementation. +## Density Profiles + +Steps accept an optional `DensityProfile` that controls relationship patterns between users, groups, collections, and ciphers. When null, steps use the original round-robin behavior. When present, steps branch into density-aware algorithms. + +**Key files**: + +- `Options/DensityProfile.cs` — strongly-typed options (public class) +- `Models/SeedPresetDensity.cs` — JSON preset deserialization targets (internal records) +- `Data/Enums/MembershipDistributionShape.cs` — Uniform, PowerLaw, MegaGroup +- `Data/Enums/CollectionFanOutShape.cs` — Uniform, PowerLaw, FrontLoaded +- `Data/Enums/CipherCollectionSkew.cs` — Uniform, HeavyRight +- `Data/Distributions/PermissionDistributions.cs` — 11 named distributions by org tier + +**Backward compatibility contract**: `DensityProfile? == null` MUST produce identical output to the original code. Every step guards this with `if (_density == null) { /* original path */ }`. + +**Preset JSON**: Add an optional `"density": { ... }` block. See `Seeds/schemas/preset.schema.json` for the full schema. + +**Validation presets**: `Seeds/fixtures/presets/validation/` contains presets that verify density algorithms produce correct distributions. See the README in that folder for queries and expected results. + ## The Recipe Contract Recipes follow strict rules: diff --git a/util/Seeder/Data/Distributions/PermissionDistributions.cs b/util/Seeder/Data/Distributions/PermissionDistributions.cs new file mode 100644 index 0000000000..3b06c4b8d0 --- /dev/null +++ b/util/Seeder/Data/Distributions/PermissionDistributions.cs @@ -0,0 +1,118 @@ +using Bit.Seeder.Data.Enums; + +namespace Bit.Seeder.Data.Distributions; + +/// +/// Pre-configured permission type distributions organized by org size and pendulum position. +/// Every distribution guarantees at least 5% Manage and 5% ReadWrite. +/// +public static class PermissionDistributions +{ + /// + /// Enterprise, read-heavy. Our production baseline. Pendulum swings hard toward ReadOnly. + /// + public static Distribution Enterprise { get; } = new( + (PermissionWeight.ReadOnly, 0.82), + (PermissionWeight.ReadWrite, 0.09), + (PermissionWeight.Manage, 0.05), + (PermissionWeight.HidePasswords, 0.04) + ); + + /// + /// Enterprise, write-heavy. Engineering-driven orgs where most users need to edit shared credentials. + /// + public static Distribution EnterpriseWriteHeavy { get; } = new( + (PermissionWeight.ReadWrite, 0.55), + (PermissionWeight.ReadOnly, 0.25), + (PermissionWeight.Manage, 0.10), + (PermissionWeight.HidePasswords, 0.10) + ); + + /// + /// Enterprise, manage-heavy. Decentralized admin model with many collection managers. + /// + public static Distribution EnterpriseManageHeavy { get; } = new( + (PermissionWeight.Manage, 0.30), + (PermissionWeight.ReadWrite, 0.30), + (PermissionWeight.ReadOnly, 0.30), + (PermissionWeight.HidePasswords, 0.10) + ); + + /// + /// Mid-market, read-heavy. Structured org where most users consume, leads manage. + /// + public static Distribution MidMarket { get; } = new( + (PermissionWeight.ReadOnly, 0.55), + (PermissionWeight.ReadWrite, 0.20), + (PermissionWeight.Manage, 0.15), + (PermissionWeight.HidePasswords, 0.10) + ); + + /// + /// Mid-market, write-heavy. Collaborative teams where most users create and edit. + /// + public static Distribution MidMarketWriteHeavy { get; } = new( + (PermissionWeight.ReadWrite, 0.50), + (PermissionWeight.Manage, 0.20), + (PermissionWeight.ReadOnly, 0.20), + (PermissionWeight.HidePasswords, 0.10) + ); + + /// + /// Mid-market, manage-heavy. Flat org where many people own their collections. + /// + public static Distribution MidMarketManageHeavy { get; } = new( + (PermissionWeight.Manage, 0.40), + (PermissionWeight.ReadWrite, 0.30), + (PermissionWeight.ReadOnly, 0.20), + (PermissionWeight.HidePasswords, 0.10) + ); + + /// + /// Small business, read-heavy. Tighter controls despite small size — onboarding, contractors. + /// + public static Distribution SmallBusiness { get; } = new( + (PermissionWeight.ReadOnly, 0.40), + (PermissionWeight.ReadWrite, 0.30), + (PermissionWeight.Manage, 0.25), + (PermissionWeight.HidePasswords, 0.05) + ); + + /// + /// Small business, write-heavy. High-trust team where most people edit freely. + /// + public static Distribution SmallBusinessWriteHeavy { get; } = new( + (PermissionWeight.ReadWrite, 0.45), + (PermissionWeight.Manage, 0.35), + (PermissionWeight.ReadOnly, 0.15), + (PermissionWeight.HidePasswords, 0.05) + ); + + /// + /// Small business, manage-heavy. Founders and senior staff own most collections. + /// + public static Distribution SmallBusinessManageHeavy { get; } = new( + (PermissionWeight.Manage, 0.50), + (PermissionWeight.ReadWrite, 0.30), + (PermissionWeight.ReadOnly, 0.15), + (PermissionWeight.HidePasswords, 0.05) + ); + + /// + /// Teams Starter. Tiny high-trust team — heavy Manage, everyone contributes. + /// + public static Distribution TeamsStarter { get; } = new( + (PermissionWeight.Manage, 0.50), + (PermissionWeight.ReadWrite, 0.40), + (PermissionWeight.ReadOnly, 0.10) + ); + + /// + /// Families plan. Shared household — nearly everyone manages everything. + /// + public static Distribution Family { get; } = new( + (PermissionWeight.Manage, 0.70), + (PermissionWeight.ReadWrite, 0.20), + (PermissionWeight.ReadOnly, 0.10) + ); +} diff --git a/util/Seeder/Data/Enums/CipherCollectionSkew.cs b/util/Seeder/Data/Enums/CipherCollectionSkew.cs new file mode 100644 index 0000000000..aa1645d274 --- /dev/null +++ b/util/Seeder/Data/Enums/CipherCollectionSkew.cs @@ -0,0 +1,6 @@ +namespace Bit.Seeder.Data.Enums; + +/// +/// Skew shape for cipher-to-collection assignment distribution. +/// +public enum CipherCollectionSkew { Uniform, HeavyRight } diff --git a/util/Seeder/Data/Enums/CollectionFanOutShape.cs b/util/Seeder/Data/Enums/CollectionFanOutShape.cs new file mode 100644 index 0000000000..4d472ffbb5 --- /dev/null +++ b/util/Seeder/Data/Enums/CollectionFanOutShape.cs @@ -0,0 +1,6 @@ +namespace Bit.Seeder.Data.Enums; + +/// +/// Distribution shape for group-to-collection fan-out. +/// +public enum CollectionFanOutShape { Uniform, PowerLaw, FrontLoaded } diff --git a/util/Seeder/Data/Enums/MembershipDistributionShape.cs b/util/Seeder/Data/Enums/MembershipDistributionShape.cs new file mode 100644 index 0000000000..268a679138 --- /dev/null +++ b/util/Seeder/Data/Enums/MembershipDistributionShape.cs @@ -0,0 +1,6 @@ +namespace Bit.Seeder.Data.Enums; + +/// +/// Distribution shape for user-to-group membership assignment. +/// +public enum MembershipDistributionShape { Uniform, PowerLaw, MegaGroup } diff --git a/util/Seeder/Data/Enums/PermissionWeight.cs b/util/Seeder/Data/Enums/PermissionWeight.cs new file mode 100644 index 0000000000..7f8e35dbcc --- /dev/null +++ b/util/Seeder/Data/Enums/PermissionWeight.cs @@ -0,0 +1,6 @@ +namespace Bit.Seeder.Data.Enums; + +/// +/// Collection access permission types for distribution weighting. +/// +public enum PermissionWeight { ReadWrite, ReadOnly, HidePasswords, Manage } diff --git a/util/Seeder/Models/SeedPreset.cs b/util/Seeder/Models/SeedPreset.cs index 64afa26f71..d9c22d9408 100644 --- a/util/Seeder/Models/SeedPreset.cs +++ b/util/Seeder/Models/SeedPreset.cs @@ -10,6 +10,7 @@ internal record SeedPreset public bool? Folders { get; init; } public SeedPresetCiphers? Ciphers { get; init; } public SeedPresetPersonalCiphers? PersonalCiphers { get; init; } + public SeedPresetDensity? Density { get; init; } } internal record SeedPresetOrganization diff --git a/util/Seeder/Models/SeedPresetDensity.cs b/util/Seeder/Models/SeedPresetDensity.cs new file mode 100644 index 0000000000..22adeb9b59 --- /dev/null +++ b/util/Seeder/Models/SeedPresetDensity.cs @@ -0,0 +1,65 @@ +namespace Bit.Seeder.Models; + +/// +/// Top-level density block in a preset JSON. Controls relationship patterns between entities. +/// +internal record SeedPresetDensity +{ + public SeedPresetMembership? Membership { get; init; } + + public SeedPresetCollectionFanOut? CollectionFanOut { get; init; } + + public double? DirectAccessRatio { get; init; } + + public SeedPresetPermissions? Permissions { get; init; } + + public SeedPresetCipherAssignment? CipherAssignment { get; init; } +} + +/// +/// How users are distributed across groups (uniform, powerLaw, megaGroup) and skew intensity. +/// +internal record SeedPresetMembership +{ + public string? Shape { get; init; } + + public double? Skew { get; init; } +} + +/// +/// How collections are assigned to groups: range, distribution shape, and empty group rate. +/// +internal record SeedPresetCollectionFanOut +{ + public int? Min { get; init; } + + public int? Max { get; init; } + + public string? Shape { get; init; } + + public double? EmptyGroupRate { get; init; } +} + +/// +/// Permission type weights for collection access assignments. Must sum to 1.0. +/// +internal record SeedPresetPermissions +{ + public double? Manage { get; init; } + + public double? ReadOnly { get; init; } + + public double? HidePasswords { get; init; } + + public double? ReadWrite { get; init; } +} + +/// +/// How ciphers are distributed across collections: skew shape and orphan rate. +/// +internal record SeedPresetCipherAssignment +{ + public string? Skew { get; init; } + + public double? OrphanRate { get; init; } +} diff --git a/util/Seeder/Options/DensityProfile.cs b/util/Seeder/Options/DensityProfile.cs new file mode 100644 index 0000000000..0b6317bef4 --- /dev/null +++ b/util/Seeder/Options/DensityProfile.cs @@ -0,0 +1,62 @@ +using Bit.Seeder.Data.Distributions; +using Bit.Seeder.Data.Enums; + +namespace Bit.Seeder.Options; + +/// +/// Controls relationship density between users, groups, collections, and ciphers within a seeded organization. +/// When null on , steps use default round-robin behavior. +/// +public class DensityProfile +{ + /// + /// User-to-group membership distribution shape. Defaults to Uniform (round-robin). + /// + public MembershipDistributionShape MembershipShape { get; init; } = MembershipDistributionShape.Uniform; + + /// + /// Skew intensity for PowerLaw and MegaGroup shapes (0.0-1.0). Ignored for Uniform. + /// + public double MembershipSkew { get; init; } + + /// + /// Minimum collections assigned per non-empty group. + /// + public int CollectionFanOutMin { get; init; } = 1; + + /// + /// Maximum collections assigned per non-empty group. + /// + public int CollectionFanOutMax { get; init; } = 3; + + /// + /// Distribution shape for group-to-collection fan-out. + /// + public CollectionFanOutShape FanOutShape { get; init; } = CollectionFanOutShape.Uniform; + + /// + /// Fraction of groups with zero members (0.0-1.0). + /// + public double EmptyGroupRate { get; init; } + + /// + /// Fraction of access paths that are direct CollectionUser assignments (0.0-1.0). + /// 1.0 = all direct (current default), 0.0 = all group-mediated. + /// + public double DirectAccessRatio { get; init; } = 1.0; + + /// + /// Permission type weighting for collection access assignments. + /// + public Distribution PermissionDistribution { get; init; } = PermissionDistributions.Enterprise; + + /// + /// Cipher-to-collection assignment skew shape. + /// + public CipherCollectionSkew CipherSkew { get; init; } = CipherCollectionSkew.Uniform; + + /// + /// Fraction of org ciphers with no collection assignment (0.0-1.0). + /// + public double OrphanCipherRate { get; init; } +} diff --git a/util/Seeder/Options/OrganizationVaultOptions.cs b/util/Seeder/Options/OrganizationVaultOptions.cs index 0f6cfb9c11..18ce00e6f0 100644 --- a/util/Seeder/Options/OrganizationVaultOptions.cs +++ b/util/Seeder/Options/OrganizationVaultOptions.cs @@ -78,6 +78,12 @@ public class OrganizationVaultOptions /// public Distribution CipherTypeDistribution { get; init; } = CipherTypeDistributions.Realistic; + /// + /// Density profile controlling entity relationship patterns. + /// When null, steps use default round-robin behavior. + /// + public DensityProfile? Density { get; init; } + /// /// Seed for deterministic data generation. When null, derived from Domain hash. /// diff --git a/util/Seeder/Pipeline/PresetLoader.cs b/util/Seeder/Pipeline/PresetLoader.cs index e882dd31db..b2ce21f493 100644 --- a/util/Seeder/Pipeline/PresetLoader.cs +++ b/util/Seeder/Pipeline/PresetLoader.cs @@ -1,5 +1,8 @@ -using Bit.Seeder.Factories; +using Bit.Seeder.Data.Distributions; +using Bit.Seeder.Data.Enums; +using Bit.Seeder.Factories; using Bit.Seeder.Models; +using Bit.Seeder.Options; using Bit.Seeder.Services; using Microsoft.Extensions.DependencyInjection; @@ -83,14 +86,16 @@ internal static class PresetLoader builder.AddUsers(preset.Users.Count, preset.Users.RealisticStatusMix); } + var density = ParseDensity(preset.Density); + if (preset.Groups is not null) { - builder.AddGroups(preset.Groups.Count); + builder.AddGroups(preset.Groups.Count, density); } if (preset.Collections is not null) { - builder.AddCollections(preset.Collections.Count); + builder.AddCollections(preset.Collections.Count, density); } if (preset.Folders == true) @@ -104,7 +109,7 @@ internal static class PresetLoader } else if (preset.Ciphers is not null && preset.Ciphers.Count > 0) { - builder.AddCiphers(preset.Ciphers.Count, assignFolders: preset.Ciphers.AssignFolders); + builder.AddCiphers(preset.Ciphers.Count, assignFolders: preset.Ciphers.AssignFolders, density: density); } if (preset.PersonalCiphers is not null && preset.PersonalCiphers.CountPerUser > 0) @@ -114,4 +119,54 @@ internal static class PresetLoader builder.Validate(); } + + private static DensityProfile? ParseDensity(SeedPresetDensity? preset) + { + if (preset is null) + { + return null; + } + + return new DensityProfile + { + MembershipShape = ParseEnum(preset.Membership?.Shape, MembershipDistributionShape.Uniform), + MembershipSkew = preset.Membership?.Skew ?? 0, + CollectionFanOutMin = preset.CollectionFanOut?.Min ?? 1, + CollectionFanOutMax = preset.CollectionFanOut?.Max ?? 3, + FanOutShape = ParseEnum(preset.CollectionFanOut?.Shape, CollectionFanOutShape.Uniform), + EmptyGroupRate = preset.CollectionFanOut?.EmptyGroupRate ?? 0, + DirectAccessRatio = preset.DirectAccessRatio ?? 1.0, + PermissionDistribution = ParsePermissions(preset.Permissions), + CipherSkew = ParseEnum(preset.CipherAssignment?.Skew, CipherCollectionSkew.Uniform), + OrphanCipherRate = preset.CipherAssignment?.OrphanRate ?? 0, + }; + } + + private static Distribution ParsePermissions(SeedPresetPermissions? permissions) + { + if (permissions is null) + { + return PermissionDistributions.Enterprise; + } + + var readOnly = permissions.ReadOnly ?? 0; + var readWrite = permissions.ReadWrite ?? 0; + var manage = permissions.Manage ?? 0; + var hidePasswords = permissions.HidePasswords ?? 0; + + // Empty permissions block (all nulls → zeros) — fall back to Enterprise defaults + if (readOnly + readWrite + manage + hidePasswords < 0.001) + { + return PermissionDistributions.Enterprise; + } + + return new Distribution( + (PermissionWeight.ReadOnly, readOnly), + (PermissionWeight.ReadWrite, readWrite), + (PermissionWeight.Manage, manage), + (PermissionWeight.HidePasswords, hidePasswords)); + } + + private static T ParseEnum(string? value, T defaultValue) where T : struct, Enum => + value is not null && Enum.TryParse(value, ignoreCase: true, out var result) ? result : defaultValue; } diff --git a/util/Seeder/Pipeline/RecipeBuilderExtensions.cs b/util/Seeder/Pipeline/RecipeBuilderExtensions.cs index c465d9d713..58a93fd4f6 100644 --- a/util/Seeder/Pipeline/RecipeBuilderExtensions.cs +++ b/util/Seeder/Pipeline/RecipeBuilderExtensions.cs @@ -3,6 +3,7 @@ using Bit.Core.Vault.Enums; using Bit.Seeder.Data.Distributions; using Bit.Seeder.Data.Enums; using Bit.Seeder.Models; +using Bit.Seeder.Options; using Bit.Seeder.Services; using Bit.Seeder.Steps; @@ -127,7 +128,7 @@ public static class RecipeBuilderExtensions /// Number of groups to generate /// The builder for fluent chaining /// Thrown when no users exist - public static RecipeBuilder AddGroups(this RecipeBuilder builder, int count) + public static RecipeBuilder AddGroups(this RecipeBuilder builder, int count, DensityProfile? density = null) { if (!builder.HasRosterUsers && !builder.HasGeneratedUsers) { @@ -135,7 +136,7 @@ public static class RecipeBuilderExtensions "Groups require users. Call UseRoster() or AddUsers() first."); } - builder.AddStep(_ => new CreateGroupsStep(count)); + builder.AddStep(_ => new CreateGroupsStep(count, density)); return builder; } @@ -146,7 +147,7 @@ public static class RecipeBuilderExtensions /// Number of collections to generate /// The builder for fluent chaining /// Thrown when no users exist - public static RecipeBuilder AddCollections(this RecipeBuilder builder, int count) + public static RecipeBuilder AddCollections(this RecipeBuilder builder, int count, DensityProfile? density = null) { if (!builder.HasRosterUsers && !builder.HasGeneratedUsers) { @@ -154,7 +155,7 @@ public static class RecipeBuilderExtensions "Collections require users. Call UseRoster() or AddUsers() first."); } - builder.AddStep(_ => CreateCollectionsStep.FromCount(count)); + builder.AddStep(_ => CreateCollectionsStep.FromCount(count, density)); return builder; } @@ -228,7 +229,8 @@ public static class RecipeBuilderExtensions int count, Distribution? typeDist = null, Distribution? pwDist = null, - bool assignFolders = false) + bool assignFolders = false, + DensityProfile? density = null) { if (builder.HasFixtureCiphers) { @@ -241,7 +243,7 @@ public static class RecipeBuilderExtensions { builder.HasCipherFolderAssignment = true; } - builder.AddStep(_ => new GenerateCiphersStep(count, typeDist, pwDist, assignFolders)); + builder.AddStep(_ => new GenerateCiphersStep(count, typeDist, pwDist, assignFolders, density)); return builder; } diff --git a/util/Seeder/Pipeline/RecipeOrchestrator.cs b/util/Seeder/Pipeline/RecipeOrchestrator.cs index c1bfbb5349..1cebfe210f 100644 --- a/util/Seeder/Pipeline/RecipeOrchestrator.cs +++ b/util/Seeder/Pipeline/RecipeOrchestrator.cs @@ -68,7 +68,7 @@ internal sealed class RecipeOrchestrator(DatabaseContext db, IMapper mapper) if (options.Groups > 0) { - builder.AddGroups(options.Groups); + builder.AddGroups(options.Groups, options.Density); } if (options.StructureModel.HasValue) @@ -77,13 +77,13 @@ internal sealed class RecipeOrchestrator(DatabaseContext db, IMapper mapper) } else if (options.Ciphers > 0) { - builder.AddCollections(1); + builder.AddCollections(1, options.Density); } if (options.Ciphers > 0) { builder.AddFolders(); - builder.AddCiphers(options.Ciphers, options.CipherTypeDistribution, options.PasswordDistribution); + builder.AddCiphers(options.Ciphers, options.CipherTypeDistribution, options.PasswordDistribution, density: options.Density); } builder.Validate(); diff --git a/util/Seeder/Seeds/fixtures/presets/validation/README.md b/util/Seeder/Seeds/fixtures/presets/validation/README.md new file mode 100644 index 0000000000..b4693febcc --- /dev/null +++ b/util/Seeder/Seeds/fixtures/presets/validation/README.md @@ -0,0 +1,153 @@ +# Density Modeling Validation Presets + +These presets validate that the Seeder's density distribution algorithms produce correct relationship patterns. Run them, query the DB, and compare against the expected results below. + +Always use the `--mangle` flag to avoid collisions with existing data. + +## Verification Queries + +Run the first query to get the Organization ID, then paste it into the remaining queries. + +### Find the Organization ID + +```sql +SELECT Id, [Name] +FROM [dbo].[Organization] WITH (NOLOCK) +WHERE [Name] = 'PASTE_ORG_NAME_HERE'; +``` + +### Group Membership Distribution + +```sql +DECLARE @OrgId UNIQUEIDENTIFIER = 'PASTE_ORG_ID_HERE'; + +SELECT + G.[Name], + COUNT(GU.OrganizationUserId) AS Members +FROM [dbo].[Group] G WITH (NOLOCK) +LEFT JOIN [dbo].[GroupUser] GU WITH (NOLOCK) ON G.Id = GU.GroupId +WHERE G.OrganizationId = @OrgId +GROUP BY G.[Name] +ORDER BY Members DESC; +``` + +### CollectionGroup Count + +```sql +DECLARE @OrgId UNIQUEIDENTIFIER = 'PASTE_ORG_ID_HERE'; + +SELECT COUNT(*) AS CollectionGroupCount +FROM [dbo].[CollectionGroup] CG WITH (NOLOCK) +INNER JOIN [dbo].[Collection] C WITH (NOLOCK) ON CG.CollectionId = C.Id +WHERE C.OrganizationId = @OrgId; +``` + +### Permission Distribution + +```sql +DECLARE @OrgId UNIQUEIDENTIFIER = 'PASTE_ORG_ID_HERE'; + +SELECT + 'CollectionUser' AS [Source], + COUNT(*) AS Total, + SUM(CASE WHEN CU.ReadOnly = 1 THEN 1 ELSE 0 END) AS ReadOnly, + SUM(CASE WHEN CU.Manage = 1 THEN 1 ELSE 0 END) AS Manage, + SUM(CASE WHEN CU.HidePasswords = 1 THEN 1 ELSE 0 END) AS HidePasswords, + SUM(CASE WHEN CU.ReadOnly = 0 AND CU.Manage = 0 AND CU.HidePasswords = 0 THEN 1 ELSE 0 END) AS ReadWrite +FROM [dbo].[CollectionUser] CU WITH (NOLOCK) +INNER JOIN [dbo].[OrganizationUser] OU WITH (NOLOCK) ON CU.OrganizationUserId = OU.Id +WHERE OU.OrganizationId = @OrgId +UNION ALL +SELECT + 'CollectionGroup', + COUNT(*), + SUM(CASE WHEN CG.ReadOnly = 1 THEN 1 ELSE 0 END), + SUM(CASE WHEN CG.Manage = 1 THEN 1 ELSE 0 END), + SUM(CASE WHEN CG.HidePasswords = 1 THEN 1 ELSE 0 END), + SUM(CASE WHEN CG.ReadOnly = 0 AND CG.Manage = 0 AND CG.HidePasswords = 0 THEN 1 ELSE 0 END) +FROM [dbo].[CollectionGroup] CG WITH (NOLOCK) +INNER JOIN [dbo].[Collection] C WITH (NOLOCK) ON CG.CollectionId = C.Id +WHERE C.OrganizationId = @OrgId; +``` + +### Orphan Ciphers + +```sql +DECLARE @OrgId UNIQUEIDENTIFIER = 'PASTE_ORG_ID_HERE'; + +SELECT + COUNT(*) AS TotalCiphers, + SUM(CASE WHEN CC.CipherId IS NULL THEN 1 ELSE 0 END) AS Orphans +FROM [dbo].[Cipher] CI WITH (NOLOCK) +LEFT JOIN (SELECT DISTINCT CipherId FROM [dbo].[CollectionCipher] WITH (NOLOCK)) CC + ON CI.Id = CC.CipherId +WHERE CI.OrganizationId = @OrgId; +``` + +--- + +## Presets + +### 1. Power-Law Distribution + +Tests skewed group membership, CollectionGroup generation, permission distribution, and cipher orphans. + +```bash +cd util/SeederUtility +dotnet run -- seed --preset validation.density-modeling-power-law-test --mangle +``` + +| Check | Expected | +| ----------------- | -------------------------------------------------------------------------------------- | +| Groups | 10 groups. First has ~50 members, decays to 1. Last 2 have 0 members (20% empty rate). | +| CollectionGroups | > 0 records. First collections have more groups assigned (PowerLaw fan-out). | +| Permissions | ~50% ReadOnly, ~30% ReadWrite, ~15% Manage, ~5% HidePasswords. | +| Orphan ciphers | ~50 of 500 (10% orphan rate). | +| DirectAccessRatio | 0.6 — roughly 60% of access paths are direct CollectionUser. | + +### 2. MegaGroup Distribution + +Tests one dominant group with all-group access (no direct CollectionUser). + +```bash +cd util/SeederUtility +dotnet run -- seed --preset validation.density-modeling-mega-group-test --mangle +``` + +| Check | Expected | +| ---------------- | ------------------------------------------------------------------------ | +| Groups | 5 groups. Group 1 has ~90 members (90.5%). Groups 2-5 split ~10 members. | +| CollectionUsers | 0 records. DirectAccessRatio is 0.0 — all access via groups. | +| CollectionGroups | > 0. First 10 collections get 3 groups (FrontLoaded), rest get 1. | +| Permissions | 25% each for ReadOnly, ReadWrite, Manage, HidePasswords (even split). | + +### 3. Empty Groups + +Tests that EmptyGroupRate produces memberless groups excluded from CollectionGroup assignment. + +```bash +cd util/SeederUtility +dotnet run -- seed --preset validation.density-modeling-empty-groups-test --mangle +``` + +| Check | Expected | +| ----------------- | ---------------------------------------------------------------------------------- | +| Groups | 10 groups total. 5 with ~10 members each, 5 with 0 members (50% empty). | +| CollectionGroups | Only reference the 5 non-empty groups. Run `SELECT DISTINCT CG.GroupId` to verify. | +| DirectAccessRatio | 0.5 — roughly half of users get direct CollectionUser records. | + +### 4. No Density (Baseline) + +Confirms backward compatibility. No `density` block = original round-robin behavior. + +```bash +cd util/SeederUtility +dotnet run -- seed --preset validation.density-modeling-no-density-test --mangle +``` + +| Check | Expected | +| ---------------- | ---------------------------------------------------------------------------------------- | +| Groups | 5 groups with ~10 members each (uniform round-robin). | +| CollectionGroups | 0 records. No density = no CollectionGroup generation. | +| Permissions | First assignment per user is Manage, subsequent are ReadOnly (original cycling pattern). | +| Orphan ciphers | 0. Every cipher assigned to at least one collection. | diff --git a/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-empty-groups-test.json b/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-empty-groups-test.json new file mode 100644 index 0000000000..f7076c759c --- /dev/null +++ b/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-empty-groups-test.json @@ -0,0 +1,21 @@ +{ + "$schema": "../../../schemas/preset.schema.json", + "organization": { + "name": "Density Empty Groups Test", + "domain": "density-empty-groups.example", + "planType": "enterprise-annually" + }, + "users": { "count": 50 }, + "groups": { "count": 10 }, + "collections": { "count": 20 }, + "density": { + "membership": { "shape": "uniform" }, + "collectionFanOut": { + "min": 2, + "max": 4, + "emptyGroupRate": 0.5 + }, + "directAccessRatio": 0.5 + }, + "ciphers": { "count": 200 } +} diff --git a/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-mega-group-test.json b/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-mega-group-test.json new file mode 100644 index 0000000000..b1be44b9a7 --- /dev/null +++ b/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-mega-group-test.json @@ -0,0 +1,27 @@ +{ + "$schema": "../../../schemas/preset.schema.json", + "organization": { + "name": "Density MegaGroup Test", + "domain": "density-megagroup.example", + "planType": "enterprise-annually" + }, + "users": { "count": 100 }, + "groups": { "count": 5 }, + "collections": { "count": 100 }, + "density": { + "membership": { "shape": "megaGroup", "skew": 0.9 }, + "collectionFanOut": { + "min": 1, + "max": 3, + "shape": "frontLoaded" + }, + "directAccessRatio": 0.0, + "permissions": { + "readOnly": 0.25, + "readWrite": 0.25, + "manage": 0.25, + "hidePasswords": 0.25 + } + }, + "ciphers": { "count": 1000 } +} diff --git a/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-no-density-test.json b/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-no-density-test.json new file mode 100644 index 0000000000..76b8a5695b --- /dev/null +++ b/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-no-density-test.json @@ -0,0 +1,12 @@ +{ + "$schema": "../../../schemas/preset.schema.json", + "organization": { + "name": "No Density Baseline Test", + "domain": "no-density-baseline.example", + "planType": "enterprise-annually" + }, + "users": { "count": 50 }, + "groups": { "count": 5 }, + "collections": { "count": 20 }, + "ciphers": { "count": 200 } +} diff --git a/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-power-law-test.json b/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-power-law-test.json new file mode 100644 index 0000000000..baa0ed9927 --- /dev/null +++ b/util/Seeder/Seeds/fixtures/presets/validation/density-modeling-power-law-test.json @@ -0,0 +1,29 @@ +{ + "$schema": "../../../schemas/preset.schema.json", + "organization": { + "name": "Density Test Org", + "domain": "density-test.example", + "planType": "enterprise-annually" + }, + "users": { "count": 100, "realisticStatusMix": true }, + "groups": { "count": 10 }, + "collections": { "count": 50 }, + "density": { + "membership": { "shape": "powerLaw", "skew": 0.8 }, + "collectionFanOut": { + "min": 1, + "max": 5, + "shape": "powerLaw", + "emptyGroupRate": 0.2 + }, + "directAccessRatio": 0.6, + "permissions": { + "readOnly": 0.5, + "readWrite": 0.3, + "manage": 0.15, + "hidePasswords": 0.05 + }, + "cipherAssignment": { "skew": "heavyRight", "orphanRate": 0.1 } + }, + "ciphers": { "count": 500 } +} diff --git a/util/Seeder/Seeds/schemas/preset.schema.json b/util/Seeder/Seeds/schemas/preset.schema.json index 4371d5c6b2..f8a6cdced3 100644 --- a/util/Seeder/Seeds/schemas/preset.schema.json +++ b/util/Seeder/Seeds/schemas/preset.schema.json @@ -71,7 +71,7 @@ }, "groups": { "type": "object", - "description": "Generate random groups with round-robin user assignment.", + "description": "Generate random groups with user assignment. Distribution shape is configurable via the density block.", "additionalProperties": false, "properties": { "count": { @@ -84,7 +84,7 @@ }, "collections": { "type": "object", - "description": "Generate random collections with user assignments.", + "description": "Generate random collections with user and group assignments. Access patterns are configurable via the density block.", "additionalProperties": false, "properties": { "count": { @@ -176,6 +176,114 @@ "description": "Identity provider protocol." } } + }, + "density": { + "type": "object", + "description": "Density profile controlling how users, groups, collections, and ciphers relate within the seeded organization.", + "additionalProperties": false, + "properties": { + "membership": { + "type": "object", + "description": "How users are distributed across groups.", + "additionalProperties": false, + "properties": { + "shape": { + "type": "string", + "enum": ["uniform", "powerLaw", "megaGroup"], + "description": "How users spread across groups. 'uniform' = equal sizes, 'powerLaw' = few large groups and many small, 'megaGroup' = one group gets 90%+ of users." + }, + "skew": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Skew intensity for powerLaw and megaGroup shapes. Ignored for uniform." + } + } + }, + "collectionFanOut": { + "type": "object", + "description": "How collections are assigned to groups.", + "additionalProperties": false, + "properties": { + "min": { + "type": "integer", + "minimum": 0, + "description": "Minimum collections assigned per non-empty group." + }, + "max": { + "type": "integer", + "minimum": 1, + "description": "Maximum collections assigned per non-empty group." + }, + "shape": { + "type": "string", + "enum": ["uniform", "powerLaw", "frontLoaded"], + "description": "How collections spread across groups. 'uniform' = equal counts, 'powerLaw' = few groups get many collections, 'frontLoaded' = first 10% of collections get max fan-out, rest get min." + }, + "emptyGroupRate": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Fraction of groups that have zero users assigned to them (0.0-1.0)." + } + } + }, + "directAccessRatio": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Fraction of access paths that are direct CollectionUser assignments. 1.0 = all direct, 0.0 = all group-mediated." + }, + "permissions": { + "type": "object", + "description": "How Manage, ReadOnly, HidePasswords, and ReadWrite permissions are distributed across collection access assignments.", + "additionalProperties": false, + "properties": { + "manage": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for Manage permission (0.0-1.0). All four weights must sum to 1.0." + }, + "readOnly": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for ReadOnly permission (0.0-1.0). All four weights must sum to 1.0." + }, + "hidePasswords": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for HidePasswords permission (0.0-1.0). All four weights must sum to 1.0." + }, + "readWrite": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Weight for ReadWrite permission (0.0-1.0). All four weights must sum to 1.0." + } + } + }, + "cipherAssignment": { + "type": "object", + "description": "How ciphers are distributed across collections.", + "additionalProperties": false, + "properties": { + "skew": { + "type": "string", + "enum": ["uniform", "heavyRight"], + "description": "How ciphers spread across collections. 'uniform' = equal counts, 'heavyRight' = few collections hold most ciphers while most collections are sparse." + }, + "orphanRate": { + "type": "number", + "minimum": 0.0, + "maximum": 1.0, + "description": "Fraction of org ciphers with no collection assignment." + } + } + } + } } } } diff --git a/util/Seeder/Steps/CreateCollectionsStep.cs b/util/Seeder/Steps/CreateCollectionsStep.cs index a6dd859c6e..9747e2eb79 100644 --- a/util/Seeder/Steps/CreateCollectionsStep.cs +++ b/util/Seeder/Steps/CreateCollectionsStep.cs @@ -1,7 +1,9 @@ using Bit.Core.Entities; +using Bit.Seeder.Data.Distributions; using Bit.Seeder.Data.Enums; using Bit.Seeder.Data.Static; using Bit.Seeder.Factories; +using Bit.Seeder.Options; using Bit.Seeder.Pipeline; namespace Bit.Seeder.Steps; @@ -10,14 +12,16 @@ internal sealed class CreateCollectionsStep : IStep { private readonly int _count; private readonly OrgStructureModel? _structure; + private readonly DensityProfile? _density; - private CreateCollectionsStep(int count, OrgStructureModel? structure) + private CreateCollectionsStep(int count, OrgStructureModel? structure, DensityProfile? density = null) { _count = count; _structure = structure; + _density = density; } - internal static CreateCollectionsStep FromCount(int count) => new(count, null); + internal static CreateCollectionsStep FromCount(int count, DensityProfile? density = null) => new(count, null, density); internal static CreateCollectionsStep FromStructure(OrgStructureModel structure) => new(0, structure); @@ -44,27 +48,159 @@ internal sealed class CreateCollectionsStep : IStep } var collectionIds = collections.Select(c => c.Id).ToList(); - var collectionUsers = new List(); - - // User assignment: cycling 1-3 collections per user - if (collections.Count > 0 && hardenedOrgUserIds.Count > 0) - { - foreach (var (orgUserId, userIndex) in hardenedOrgUserIds.Select((id, i) => (id, i))) - { - var maxAssignments = Math.Min((userIndex % 3) + 1, collections.Count); - for (var j = 0; j < maxAssignments; j++) - { - collectionUsers.Add(CollectionUserSeeder.Create( - collections[(userIndex + j) % collections.Count].Id, - orgUserId, - readOnly: j > 0, - manage: j == 0)); - } - } - } context.Collections.AddRange(collections); context.Registry.CollectionIds.AddRange(collectionIds); - context.CollectionUsers.AddRange(collectionUsers); + + if (collections.Count == 0) + { + return; + } + + if (_density == null) + { + var collectionUsers = new List(); + if (hardenedOrgUserIds.Count > 0) + { + foreach (var (orgUserId, userIndex) in hardenedOrgUserIds.Select((id, i) => (id, i))) + { + var maxAssignments = Math.Min((userIndex % 3) + 1, collections.Count); + for (var j = 0; j < maxAssignments; j++) + { + collectionUsers.Add(CollectionUserSeeder.Create( + collections[(userIndex + j) % collections.Count].Id, + orgUserId, + readOnly: j > 0, + manage: j == 0)); + } + } + } + context.CollectionUsers.AddRange(collectionUsers); + return; + } + + var groupIds = context.Registry.GroupIds; + + if (_density.DirectAccessRatio < 1.0 && groupIds.Count > 0) + { + var collectionGroups = BuildCollectionGroups(collectionIds, groupIds); + ApplyGroupPermissions(collectionGroups, _density.PermissionDistribution); + context.CollectionGroups.AddRange(collectionGroups); + } + + var directUserCount = (int)(hardenedOrgUserIds.Count * _density.DirectAccessRatio); + if (directUserCount > 0) + { + var directUsers = BuildCollectionUsers(collectionIds, hardenedOrgUserIds, directUserCount); + ApplyUserPermissions(directUsers, _density.PermissionDistribution); + context.CollectionUsers.AddRange(directUsers); + } + } + + internal List BuildCollectionGroups(List collectionIds, List groupIds) + { + var min = _density!.CollectionFanOutMin; + var max = _density.CollectionFanOutMax; + var result = new List(collectionIds.Count * (min + max + 1) / 2); + + for (var c = 0; c < collectionIds.Count; c++) + { + var fanOut = ComputeFanOut(c, collectionIds.Count, min, max); + fanOut = Math.Min(fanOut, groupIds.Count); + + for (var g = 0; g < fanOut; g++) + { + result.Add(CollectionGroupSeeder.Create( + collectionIds[c], + groupIds[(c + g) % groupIds.Count])); + } + } + + return result; + } + + internal int ComputeFanOut(int collectionIndex, int collectionCount, int min, int max) + { + var range = max - min + 1; + if (range <= 1) + { + return min; + } + + switch (_density!.FanOutShape) + { + case CollectionFanOutShape.PowerLaw: + // Zipf weight normalized against index 0 (where weight = 1.0), scaled to [min, max] + var weight = 1.0 / Math.Pow(collectionIndex + 1, 0.8); + return min + (int)(weight * (range - 1) + 0.5); + + case CollectionFanOutShape.FrontLoaded: + // First 10% of collections get max fan-out, rest get min + var topCount = Math.Max(1, collectionCount / 10); + return collectionIndex < topCount ? max : min; + + case CollectionFanOutShape.Uniform: + return min + (collectionIndex % range); + + default: + throw new InvalidOperationException( + $"Unhandled CollectionFanOutShape: {_density.FanOutShape}"); + } + } + + internal static List BuildCollectionUsers( + List collectionIds, List userIds, int directUserCount) + { + var result = new List(directUserCount * 2); + for (var i = 0; i < directUserCount; i++) + { + var maxAssignments = Math.Min((i % 3) + 1, collectionIds.Count); + for (var j = 0; j < maxAssignments; j++) + { + result.Add(CollectionUserSeeder.Create( + collectionIds[(i + j) % collectionIds.Count], + userIds[i])); + } + } + return result; + } + + private static (bool ReadOnly, bool HidePasswords, bool Manage) ResolvePermission( + Distribution distribution, int index, int total) + { + var weight = distribution.Select(index, total); + return weight switch + { + PermissionWeight.ReadOnly => (true, false, false), + PermissionWeight.HidePasswords => (false, true, false), + PermissionWeight.Manage => (false, false, true), + PermissionWeight.ReadWrite => (false, false, false), + _ => throw new InvalidOperationException( + $"Unhandled PermissionWeight: {weight}") + }; + } + + internal static void ApplyGroupPermissions( + List assignments, Distribution distribution) + { + for (var i = 0; i < assignments.Count; i++) + { + var (readOnly, hidePasswords, manage) = ResolvePermission(distribution, i, assignments.Count); + assignments[i].ReadOnly = readOnly; + assignments[i].HidePasswords = hidePasswords; + assignments[i].Manage = manage; + } + } + + internal static void ApplyUserPermissions( + List assignments, Distribution distribution) + { + for (var i = 0; i < assignments.Count; i++) + { + var (readOnly, hidePasswords, manage) = ResolvePermission(distribution, i, assignments.Count); + assignments[i].ReadOnly = readOnly; + assignments[i].HidePasswords = hidePasswords; + assignments[i].Manage = manage; + } } } diff --git a/util/Seeder/Steps/CreateGroupsStep.cs b/util/Seeder/Steps/CreateGroupsStep.cs index 7daa621017..892169901d 100644 --- a/util/Seeder/Steps/CreateGroupsStep.cs +++ b/util/Seeder/Steps/CreateGroupsStep.cs @@ -1,11 +1,14 @@ using Bit.Core.AdminConsole.Entities; using Bit.Seeder.Factories; +using Bit.Seeder.Options; using Bit.Seeder.Pipeline; namespace Bit.Seeder.Steps; -internal sealed class CreateGroupsStep(int count) : IStep +internal sealed class CreateGroupsStep(int count, DensityProfile? density = null) : IStep { + private readonly DensityProfile? _density = density; + public void Execute(SeederContext context) { var orgId = context.RequireOrgId(); @@ -13,7 +16,7 @@ internal sealed class CreateGroupsStep(int count) : IStep var groups = new List(count); var groupIds = new List(count); - var groupUsers = new List(); + var groupUsers = new List(hardenedOrgUserIds.Count); for (var i = 0; i < count; i++) { @@ -22,18 +25,115 @@ internal sealed class CreateGroupsStep(int count) : IStep groupIds.Add(group.Id); } - // Round-robin user assignment - if (groups.Count > 0 && hardenedOrgUserIds.Count > 0) + context.Groups.AddRange(groups); + + if (_density == null) { - for (var i = 0; i < hardenedOrgUserIds.Count; i++) + if (groups.Count > 0 && hardenedOrgUserIds.Count > 0) { - var groupId = groupIds[i % groups.Count]; - groupUsers.Add(GroupUserSeeder.Create(groupId, hardenedOrgUserIds[i])); + for (var i = 0; i < hardenedOrgUserIds.Count; i++) + { + var groupId = groupIds[i % groups.Count]; + groupUsers.Add(GroupUserSeeder.Create(groupId, hardenedOrgUserIds[i])); + } + } + + context.Registry.GroupIds.AddRange(groupIds); + } + else + { + var emptyCount = (int)(groups.Count * _density.EmptyGroupRate); + var activeGroupIds = groupIds.Take(groups.Count - emptyCount).ToList(); + + context.Registry.GroupIds.AddRange(activeGroupIds); + + if (activeGroupIds.Count > 0 && hardenedOrgUserIds.Count > 0) + { + var allocations = ComputeUsersPerGroup(activeGroupIds.Count, hardenedOrgUserIds.Count); + var userIndex = 0; + for (var g = 0; g < activeGroupIds.Count; g++) + { + for (var u = 0; u < allocations[g]; u++) + { + groupUsers.Add(GroupUserSeeder.Create(activeGroupIds[g], hardenedOrgUserIds[userIndex++])); + } + } } } - context.Groups.AddRange(groups); - context.Registry.GroupIds.AddRange(groupIds); context.GroupUsers.AddRange(groupUsers); } + + internal int[] ComputeUsersPerGroup(int groupCount, int userCount) + { + var allocations = new int[groupCount]; + + switch (_density!.MembershipShape) + { + case Data.Enums.MembershipDistributionShape.Uniform: + for (var i = 0; i < userCount; i++) + { + allocations[i % groupCount]++; + } + break; + + case Data.Enums.MembershipDistributionShape.PowerLaw: + // Maps MembershipSkew [0,1] to Zipf exponent [0.5, 2.0] + var exponent = 0.5 + _density.MembershipSkew * 1.5; + var fractional = new double[groupCount]; + var totalWeight = 0.0; + for (var i = 0; i < groupCount; i++) + { + fractional[i] = 1.0 / Math.Pow(i + 1, exponent); + totalWeight += fractional[i]; + } + + var assigned = 0; + for (var i = 0; i < groupCount; i++) + { + fractional[i] = fractional[i] / totalWeight * userCount; + allocations[i] = (int)fractional[i]; + assigned += allocations[i]; + } + + // Largest-remainder: give +1 to groups that lost the most from truncation + var remainder = userCount - assigned; + if (remainder > 0) + { + var indices = Enumerable.Range(0, groupCount) + .OrderByDescending(i => fractional[i] - allocations[i]) + .Take(remainder); + foreach (var i in indices) + { + allocations[i]++; + } + } + break; + + case Data.Enums.MembershipDistributionShape.MegaGroup: + // Maps MembershipSkew [0,1] to mega group share [50%, 95%] + var megaFraction = 0.5 + _density.MembershipSkew * 0.45; + var megaCount = (int)(userCount * megaFraction); + allocations[0] = megaCount; + var remaining = userCount - megaCount; + if (groupCount > 1) + { + for (var i = 0; i < remaining; i++) + { + allocations[1 + (i % (groupCount - 1))]++; + } + } + else + { + allocations[0] += remaining; + } + break; + + default: + throw new InvalidOperationException( + $"Unhandled MembershipDistributionShape: {_density.MembershipShape}"); + } + + return allocations; + } } diff --git a/util/Seeder/Steps/GenerateCiphersStep.cs b/util/Seeder/Steps/GenerateCiphersStep.cs index d5c0bea99c..517750db65 100644 --- a/util/Seeder/Steps/GenerateCiphersStep.cs +++ b/util/Seeder/Steps/GenerateCiphersStep.cs @@ -6,6 +6,7 @@ using Bit.Seeder.Data.Distributions; using Bit.Seeder.Data.Enums; using Bit.Seeder.Data.Static; using Bit.Seeder.Factories; +using Bit.Seeder.Options; using Bit.Seeder.Pipeline; namespace Bit.Seeder.Steps; @@ -16,7 +17,7 @@ namespace Bit.Seeder.Steps; /// /// Requires to have run first. Picks cipher types (login, card, /// identity, secureNote, sshKey) from a configurable distribution, delegates to the existing -/// cipher factories, and assigns each cipher to collections round-robin. Designed for load +/// cipher factories, and assigns ciphers to collections (configurable via density profile). Designed for load /// testing scenarios where you need thousands of realistic vault items. /// /// @@ -25,8 +26,11 @@ internal sealed class GenerateCiphersStep( int count, Distribution? typeDist = null, Distribution? pwDist = null, - bool assignFolders = false) : IStep + bool assignFolders = false, + DensityProfile? density = null) : IStep { + private readonly DensityProfile? _density = density; + public void Execute(SeederContext context) { if (count == 0) @@ -48,7 +52,7 @@ internal sealed class GenerateCiphersStep( var ciphers = new List(count); var cipherIds = new List(count); - var collectionCiphers = new List(); + var collectionCiphers = new List(count + count / 3); for (var i = 0; i < count; i++) { @@ -63,27 +67,57 @@ internal sealed class GenerateCiphersStep( ciphers.Add(cipher); cipherIds.Add(cipher.Id); + } - // Collection assignment - if (collectionIds.Count == 0) + if (collectionIds.Count > 0) + { + if (_density == null) { - continue; - } - - collectionCiphers.Add(new CollectionCipher - { - CipherId = cipher.Id, - CollectionId = collectionIds[i % collectionIds.Count] - }); - - // Every 3rd cipher gets assigned to an additional collection - if (i % 3 == 0 && collectionIds.Count > 1) - { - collectionCiphers.Add(new CollectionCipher + for (var i = 0; i < ciphers.Count; i++) { - CipherId = cipher.Id, - CollectionId = collectionIds[(i + 1) % collectionIds.Count] - }); + collectionCiphers.Add(new CollectionCipher + { + CipherId = ciphers[i].Id, + CollectionId = collectionIds[i % collectionIds.Count] + }); + + if (i % 3 == 0 && collectionIds.Count > 1) + { + collectionCiphers.Add(new CollectionCipher + { + CipherId = ciphers[i].Id, + CollectionId = collectionIds[(i + 1) % collectionIds.Count] + }); + } + } + } + else + { + var orphanCount = (int)(count * _density.OrphanCipherRate); + var nonOrphanCount = count - orphanCount; + + for (var i = 0; i < nonOrphanCount; i++) + { + int collectionIndex; + if (_density.CipherSkew == CipherCollectionSkew.HeavyRight) + { + // Sqrt curve: later collections accumulate more ciphers (right-heavy skew) + var normalized = Math.Pow((double)i / nonOrphanCount, 0.5); + collectionIndex = Math.Min((int)(normalized * collectionIds.Count), collectionIds.Count - 1); + } + else + { + collectionIndex = i % collectionIds.Count; + } + + var collectionId = collectionIds[collectionIndex]; + + collectionCiphers.Add(new CollectionCipher + { + CipherId = ciphers[i].Id, + CollectionId = collectionId + }); + } } }