fix: tag clean up query and add tests (#22633)

* fix delete empty tags query

* rewrite as a single statement

* create tag service medium test

* single tag exists, connected to one asset, and is not deleted

* do not delete parent tag if children have an asset

* hierarchical tag tests

* fix query to match 3 test

* remove transaction and format:fix

* remove transaction and format:fix

* simplify query, handle nested empty tag

* unused helper

---------

Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com>
This commit is contained in:
Jorge Montejo 2025-10-16 00:51:57 +02:00 committed by GitHub
parent 74a9be4a0e
commit 9d639607c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 171 additions and 17 deletions

View file

@ -163,22 +163,22 @@ export class TagRepository {
}
async deleteEmptyTags() {
// TODO rewrite as a single statement
await this.db.transaction().execute(async (tx) => {
const result = await tx
.selectFrom('asset')
.innerJoin('tag_asset', 'tag_asset.assetsId', 'asset.id')
.innerJoin('tag_closure', 'tag_closure.id_descendant', 'tag_asset.tagsId')
.innerJoin('tag', 'tag.id', 'tag_closure.id_descendant')
.select((eb) => ['tag.id', eb.fn.count<number>('asset.id').as('count')])
.groupBy('tag.id')
.execute();
const result = await this.db
.deleteFrom('tag')
.where(({ not, exists, selectFrom }) =>
not(
exists(
selectFrom('tag_closure')
.whereRef('tag.id', '=', 'tag_closure.id_ancestor')
.innerJoin('tag_asset', 'tag_closure.id_descendant', 'tag_asset.tagsId'),
),
),
)
.executeTakeFirst();
const ids = result.filter(({ count }) => count === 0).map(({ id }) => id);
if (ids.length > 0) {
await this.db.deleteFrom('tag').where('id', 'in', ids).execute();
this.logger.log(`Deleted ${ids.length} empty tags`);
}
});
const deletedRows = Number(result.numDeletedRows);
if (deletedRows > 0) {
this.logger.log(`Deleted ${deletedRows} empty tags`);
}
}
}

View file

@ -39,6 +39,7 @@ import { StorageRepository } from 'src/repositories/storage.repository';
import { SyncCheckpointRepository } from 'src/repositories/sync-checkpoint.repository';
import { SyncRepository } from 'src/repositories/sync.repository';
import { SystemMetadataRepository } from 'src/repositories/system-metadata.repository';
import { TagRepository } from 'src/repositories/tag.repository';
import { TelemetryRepository } from 'src/repositories/telemetry.repository';
import { UserRepository } from 'src/repositories/user.repository';
import { VersionHistoryRepository } from 'src/repositories/version-history.repository';
@ -52,6 +53,8 @@ import { MemoryTable } from 'src/schema/tables/memory.table';
import { PersonTable } from 'src/schema/tables/person.table';
import { SessionTable } from 'src/schema/tables/session.table';
import { StackTable } from 'src/schema/tables/stack.table';
import { TagAssetTable } from 'src/schema/tables/tag-asset.table';
import { TagTable } from 'src/schema/tables/tag.table';
import { UserTable } from 'src/schema/tables/user.table';
import { BASE_SERVICE_DEPENDENCIES, BaseService } from 'src/services/base.service';
import { SyncService } from 'src/services/sync.service';
@ -240,6 +243,18 @@ export class MediumTestContext<S extends BaseService = BaseService> {
user,
};
}
async newTagAsset(tagBulkAssets: { tagIds: string[]; assetIds: string[] }) {
const tagsAssets: Insertable<TagAssetTable>[] = [];
for (const tagsId of tagBulkAssets.tagIds) {
for (const assetsId of tagBulkAssets.assetIds) {
tagsAssets.push({ tagsId, assetsId });
}
}
const result = await this.get(TagRepository).upsertAssetIds(tagsAssets);
return { tagsAssets, result };
}
}
export class SyncTestContext extends MediumTestContext<SyncService> {
@ -318,6 +333,10 @@ const newRealRepository = <T>(key: ClassConstructor<T>, db: Kysely<DB>): T => {
return new key(LoggingRepository.create());
}
case TagRepository: {
return new key(db, LoggingRepository.create());
}
case LoggingRepository as unknown as ClassConstructor<LoggingRepository>: {
return new key() as unknown as T;
}
@ -345,7 +364,8 @@ const newMockRepository = <T>(key: ClassConstructor<T>) => {
case SyncCheckpointRepository:
case SystemMetadataRepository:
case UserRepository:
case VersionHistoryRepository: {
case VersionHistoryRepository:
case TagRepository: {
return automock(key);
}
@ -567,6 +587,23 @@ const memoryInsert = (memory: Partial<Insertable<MemoryTable>> = {}) => {
return { ...defaults, ...memory, id };
};
const tagInsert = (tag: Partial<Insertable<TagTable>>) => {
const id = tag.id || newUuid();
const defaults: Insertable<TagTable> = {
id,
userId: '',
value: '',
createdAt: newDate(),
updatedAt: newDate(),
color: '',
parentId: null,
updateId: newUuid(),
};
return { ...defaults, ...tag, id };
};
class CustomWritable extends Writable {
private data = '';
@ -619,4 +656,5 @@ export const mediumFactory = {
memoryInsert,
loginDetails,
loginResponse,
tagInsert,
};

View file

@ -0,0 +1,116 @@
import { Kysely } from 'kysely';
import { JobStatus } from 'src/enum';
import { AccessRepository } from 'src/repositories/access.repository';
import { LoggingRepository } from 'src/repositories/logging.repository';
import { TagRepository } from 'src/repositories/tag.repository';
import { DB } from 'src/schema';
import { TagService } from 'src/services/tag.service';
import { upsertTags } from 'src/utils/tag';
import { newMediumService } from 'test/medium.factory';
import { getKyselyDB } from 'test/utils';
let defaultDatabase: Kysely<DB>;
const setup = (db?: Kysely<DB>) => {
return newMediumService(TagService, {
database: db || defaultDatabase,
real: [TagRepository, AccessRepository],
mock: [LoggingRepository],
});
};
beforeAll(async () => {
defaultDatabase = await getKyselyDB();
});
describe(TagService.name, () => {
describe('deleteEmptyTags', () => {
it('single tag exists, not connected to any assets, and is deleted', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const tagRepo = ctx.get(TagRepository);
const [tag] = await upsertTags(tagRepo, { userId: user.id, tags: ['tag-1'] });
await expect(tagRepo.getByValue(user.id, 'tag-1')).resolves.toEqual(expect.objectContaining({ id: tag.id }));
await expect(sut.handleTagCleanup()).resolves.toBe(JobStatus.Success);
await expect(tagRepo.getByValue(user.id, 'tag-1')).resolves.toBeUndefined();
});
it('single tag exists, connected to one asset, and is not deleted', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const { asset } = await ctx.newAsset({ ownerId: user.id });
const tagRepo = ctx.get(TagRepository);
const [tag] = await upsertTags(tagRepo, { userId: user.id, tags: ['tag-1'] });
await ctx.newTagAsset({ tagIds: [tag.id], assetIds: [asset.id] });
await expect(tagRepo.getByValue(user.id, 'tag-1')).resolves.toEqual(expect.objectContaining({ id: tag.id }));
await expect(sut.handleTagCleanup()).resolves.toBe(JobStatus.Success);
await expect(tagRepo.getByValue(user.id, 'tag-1')).resolves.toEqual(expect.objectContaining({ id: tag.id }));
});
it('hierarchical tag exists, and the parent is connected to an asset, and the child is deleted', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const { asset } = await ctx.newAsset({ ownerId: user.id });
const tagRepo = ctx.get(TagRepository);
const [parentTag, childTag] = await upsertTags(tagRepo, { userId: user.id, tags: ['parent', 'parent/child'] });
await ctx.newTagAsset({ tagIds: [parentTag.id], assetIds: [asset.id] });
await expect(tagRepo.getByValue(user.id, 'parent')).resolves.toEqual(
expect.objectContaining({ id: parentTag.id }),
);
await expect(tagRepo.getByValue(user.id, 'parent/child')).resolves.toEqual(
expect.objectContaining({ id: childTag.id }),
);
await expect(sut.handleTagCleanup()).resolves.toBe(JobStatus.Success);
await expect(tagRepo.getByValue(user.id, 'parent')).resolves.toEqual(
expect.objectContaining({ id: parentTag.id }),
);
await expect(tagRepo.getByValue(user.id, 'parent/child')).resolves.toBeUndefined();
});
it('hierarchical tag exists, and only the child is connected to an asset, and nothing is deleted', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const { asset } = await ctx.newAsset({ ownerId: user.id });
const tagRepo = ctx.get(TagRepository);
const [parentTag, childTag] = await upsertTags(tagRepo, { userId: user.id, tags: ['parent', 'parent/child'] });
await ctx.newTagAsset({ tagIds: [childTag.id], assetIds: [asset.id] });
await expect(tagRepo.getByValue(user.id, 'parent')).resolves.toEqual(
expect.objectContaining({ id: parentTag.id }),
);
await expect(tagRepo.getByValue(user.id, 'parent/child')).resolves.toEqual(
expect.objectContaining({ id: childTag.id }),
);
await expect(sut.handleTagCleanup()).resolves.toBe(JobStatus.Success);
await expect(tagRepo.getByValue(user.id, 'parent')).resolves.toEqual(
expect.objectContaining({ id: parentTag.id }),
);
await expect(tagRepo.getByValue(user.id, 'parent/child')).resolves.toEqual(
expect.objectContaining({ id: childTag.id }),
);
});
it('hierarchical tag exists, and neither parent nor child is connected to an asset, and both are deleted', async () => {
const { sut, ctx } = setup();
const { user } = await ctx.newUser();
const tagRepo = ctx.get(TagRepository);
const [parentTag, childTag] = await upsertTags(tagRepo, { userId: user.id, tags: ['parent', 'parent/child'] });
await expect(tagRepo.getByValue(user.id, 'parent')).resolves.toEqual(
expect.objectContaining({ id: parentTag.id }),
);
await expect(tagRepo.getByValue(user.id, 'parent/child')).resolves.toEqual(
expect.objectContaining({ id: childTag.id }),
);
await expect(sut.handleTagCleanup()).resolves.toBe(JobStatus.Success);
await expect(tagRepo.getByValue(user.id, 'parent/child')).resolves.toBeUndefined();
await expect(tagRepo.getByValue(user.id, 'parent')).resolves.toBeUndefined();
});
});
});