diff --git a/mobile/openapi/README.md b/mobile/openapi/README.md index 7442f0c750..02bb0bc0c0 100644 --- a/mobile/openapi/README.md +++ b/mobile/openapi/README.md @@ -104,6 +104,7 @@ Class | Method | HTTP request | Description *AssetsApi* | [**getAssetInfo**](doc//AssetsApi.md#getassetinfo) | **GET** /assets/{id} | *AssetsApi* | [**getAssetMetadata**](doc//AssetsApi.md#getassetmetadata) | **GET** /assets/{id}/metadata | *AssetsApi* | [**getAssetMetadataByKey**](doc//AssetsApi.md#getassetmetadatabykey) | **GET** /assets/{id}/metadata/{key} | +*AssetsApi* | [**getAssetOcr**](doc//AssetsApi.md#getassetocr) | **GET** /assets/{id}/ocr | *AssetsApi* | [**getAssetStatistics**](doc//AssetsApi.md#getassetstatistics) | **GET** /assets/statistics | *AssetsApi* | [**getRandom**](doc//AssetsApi.md#getrandom) | **GET** /assets/random | *AssetsApi* | [**playAssetVideo**](doc//AssetsApi.md#playassetvideo) | **GET** /assets/{id}/video/playback | @@ -340,6 +341,7 @@ Class | Method | HTTP request | Description - [AssetMetadataResponseDto](doc//AssetMetadataResponseDto.md) - [AssetMetadataUpsertDto](doc//AssetMetadataUpsertDto.md) - [AssetMetadataUpsertItemDto](doc//AssetMetadataUpsertItemDto.md) + - [AssetOcrResponseDto](doc//AssetOcrResponseDto.md) - [AssetOrder](doc//AssetOrder.md) - [AssetResponseDto](doc//AssetResponseDto.md) - [AssetStackResponseDto](doc//AssetStackResponseDto.md) diff --git a/mobile/openapi/lib/api.dart b/mobile/openapi/lib/api.dart index e6d52b6426..503a71ecb8 100644 --- a/mobile/openapi/lib/api.dart +++ b/mobile/openapi/lib/api.dart @@ -110,6 +110,7 @@ part 'model/asset_metadata_key.dart'; part 'model/asset_metadata_response_dto.dart'; part 'model/asset_metadata_upsert_dto.dart'; part 'model/asset_metadata_upsert_item_dto.dart'; +part 'model/asset_ocr_response_dto.dart'; part 'model/asset_order.dart'; part 'model/asset_response_dto.dart'; part 'model/asset_stack_response_dto.dart'; diff --git a/mobile/openapi/lib/api/assets_api.dart b/mobile/openapi/lib/api/assets_api.dart index 063f9ea43b..384fe0d72a 100644 --- a/mobile/openapi/lib/api/assets_api.dart +++ b/mobile/openapi/lib/api/assets_api.dart @@ -532,6 +532,62 @@ class AssetsApi { return null; } + /// This endpoint requires the `asset.read` permission. + /// + /// Note: This method returns the HTTP [Response]. + /// + /// Parameters: + /// + /// * [String] id (required): + Future getAssetOcrWithHttpInfo(String id,) async { + // ignore: prefer_const_declarations + final apiPath = r'/assets/{id}/ocr' + .replaceAll('{id}', id); + + // ignore: prefer_final_locals + Object? postBody; + + final queryParams = []; + final headerParams = {}; + final formParams = {}; + + const contentTypes = []; + + + return apiClient.invokeAPI( + apiPath, + 'GET', + queryParams, + postBody, + headerParams, + formParams, + contentTypes.isEmpty ? null : contentTypes.first, + ); + } + + /// This endpoint requires the `asset.read` permission. + /// + /// Parameters: + /// + /// * [String] id (required): + Future?> getAssetOcr(String id,) async { + final response = await getAssetOcrWithHttpInfo(id,); + if (response.statusCode >= HttpStatus.badRequest) { + throw ApiException(response.statusCode, await _decodeBodyBytes(response)); + } + // When a remote server returns no body with a status of 204, we shall not decode it. + // At the time of writing this, `dart:convert` will throw an "Unexpected end of input" + // FormatException when trying to decode an empty string. + if (response.body.isNotEmpty && response.statusCode != HttpStatus.noContent) { + final responseBody = await _decodeBodyBytes(response); + return (await apiClient.deserializeAsync(responseBody, 'List') as List) + .cast() + .toList(growable: false); + + } + return null; + } + /// This endpoint requires the `asset.statistics` permission. /// /// Note: This method returns the HTTP [Response]. diff --git a/mobile/openapi/lib/api_client.dart b/mobile/openapi/lib/api_client.dart index 43057f25ad..b20c04a2bf 100644 --- a/mobile/openapi/lib/api_client.dart +++ b/mobile/openapi/lib/api_client.dart @@ -274,6 +274,8 @@ class ApiClient { return AssetMetadataUpsertDto.fromJson(value); case 'AssetMetadataUpsertItemDto': return AssetMetadataUpsertItemDto.fromJson(value); + case 'AssetOcrResponseDto': + return AssetOcrResponseDto.fromJson(value); case 'AssetOrder': return AssetOrderTypeTransformer().decode(value); case 'AssetResponseDto': diff --git a/mobile/openapi/lib/model/asset_ocr_response_dto.dart b/mobile/openapi/lib/model/asset_ocr_response_dto.dart new file mode 100644 index 0000000000..c7937c6eb2 --- /dev/null +++ b/mobile/openapi/lib/model/asset_ocr_response_dto.dart @@ -0,0 +1,206 @@ +// +// AUTO-GENERATED FILE, DO NOT MODIFY! +// +// @dart=2.18 + +// ignore_for_file: unused_element, unused_import +// ignore_for_file: always_put_required_named_parameters_first +// ignore_for_file: constant_identifier_names +// ignore_for_file: lines_longer_than_80_chars + +part of openapi.api; + +class AssetOcrResponseDto { + /// Returns a new [AssetOcrResponseDto] instance. + AssetOcrResponseDto({ + required this.assetId, + required this.boxScore, + required this.id, + required this.text, + required this.textScore, + required this.x1, + required this.x2, + required this.x3, + required this.x4, + required this.y1, + required this.y2, + required this.y3, + required this.y4, + }); + + String assetId; + + /// Confidence score for text detection box + double boxScore; + + String id; + + /// Recognized text + String text; + + /// Confidence score for text recognition + double textScore; + + /// Normalized x coordinate of box corner 1 (0-1) + double x1; + + /// Normalized x coordinate of box corner 2 (0-1) + double x2; + + /// Normalized x coordinate of box corner 3 (0-1) + double x3; + + /// Normalized x coordinate of box corner 4 (0-1) + double x4; + + /// Normalized y coordinate of box corner 1 (0-1) + double y1; + + /// Normalized y coordinate of box corner 2 (0-1) + double y2; + + /// Normalized y coordinate of box corner 3 (0-1) + double y3; + + /// Normalized y coordinate of box corner 4 (0-1) + double y4; + + @override + bool operator ==(Object other) => identical(this, other) || other is AssetOcrResponseDto && + other.assetId == assetId && + other.boxScore == boxScore && + other.id == id && + other.text == text && + other.textScore == textScore && + other.x1 == x1 && + other.x2 == x2 && + other.x3 == x3 && + other.x4 == x4 && + other.y1 == y1 && + other.y2 == y2 && + other.y3 == y3 && + other.y4 == y4; + + @override + int get hashCode => + // ignore: unnecessary_parenthesis + (assetId.hashCode) + + (boxScore.hashCode) + + (id.hashCode) + + (text.hashCode) + + (textScore.hashCode) + + (x1.hashCode) + + (x2.hashCode) + + (x3.hashCode) + + (x4.hashCode) + + (y1.hashCode) + + (y2.hashCode) + + (y3.hashCode) + + (y4.hashCode); + + @override + String toString() => 'AssetOcrResponseDto[assetId=$assetId, boxScore=$boxScore, id=$id, text=$text, textScore=$textScore, x1=$x1, x2=$x2, x3=$x3, x4=$x4, y1=$y1, y2=$y2, y3=$y3, y4=$y4]'; + + Map toJson() { + final json = {}; + json[r'assetId'] = this.assetId; + json[r'boxScore'] = this.boxScore; + json[r'id'] = this.id; + json[r'text'] = this.text; + json[r'textScore'] = this.textScore; + json[r'x1'] = this.x1; + json[r'x2'] = this.x2; + json[r'x3'] = this.x3; + json[r'x4'] = this.x4; + json[r'y1'] = this.y1; + json[r'y2'] = this.y2; + json[r'y3'] = this.y3; + json[r'y4'] = this.y4; + return json; + } + + /// Returns a new [AssetOcrResponseDto] instance and imports its values from + /// [value] if it's a [Map], null otherwise. + // ignore: prefer_constructors_over_static_methods + static AssetOcrResponseDto? fromJson(dynamic value) { + upgradeDto(value, "AssetOcrResponseDto"); + if (value is Map) { + final json = value.cast(); + + return AssetOcrResponseDto( + assetId: mapValueOfType(json, r'assetId')!, + boxScore: (mapValueOfType(json, r'boxScore')!).toDouble(), + id: mapValueOfType(json, r'id')!, + text: mapValueOfType(json, r'text')!, + textScore: (mapValueOfType(json, r'textScore')!).toDouble(), + x1: (mapValueOfType(json, r'x1')!).toDouble(), + x2: (mapValueOfType(json, r'x2')!).toDouble(), + x3: (mapValueOfType(json, r'x3')!).toDouble(), + x4: (mapValueOfType(json, r'x4')!).toDouble(), + y1: (mapValueOfType(json, r'y1')!).toDouble(), + y2: (mapValueOfType(json, r'y2')!).toDouble(), + y3: (mapValueOfType(json, r'y3')!).toDouble(), + y4: (mapValueOfType(json, r'y4')!).toDouble(), + ); + } + return null; + } + + static List listFromJson(dynamic json, {bool growable = false,}) { + final result = []; + if (json is List && json.isNotEmpty) { + for (final row in json) { + final value = AssetOcrResponseDto.fromJson(row); + if (value != null) { + result.add(value); + } + } + } + return result.toList(growable: growable); + } + + static Map mapFromJson(dynamic json) { + final map = {}; + if (json is Map && json.isNotEmpty) { + json = json.cast(); // ignore: parameter_assignments + for (final entry in json.entries) { + final value = AssetOcrResponseDto.fromJson(entry.value); + if (value != null) { + map[entry.key] = value; + } + } + } + return map; + } + + // maps a json object with a list of AssetOcrResponseDto-objects as value to a dart map + static Map> mapListFromJson(dynamic json, {bool growable = false,}) { + final map = >{}; + if (json is Map && json.isNotEmpty) { + // ignore: parameter_assignments + json = json.cast(); + for (final entry in json.entries) { + map[entry.key] = AssetOcrResponseDto.listFromJson(entry.value, growable: growable,); + } + } + return map; + } + + /// The list of required keys that must be present in a JSON. + static const requiredKeys = { + 'assetId', + 'boxScore', + 'id', + 'text', + 'textScore', + 'x1', + 'x2', + 'x3', + 'x4', + 'y1', + 'y2', + 'y3', + 'y4', + }; +} + diff --git a/open-api/immich-openapi-specs.json b/open-api/immich-openapi-specs.json index bc81fad2a3..29503b1ef0 100644 --- a/open-api/immich-openapi-specs.json +++ b/open-api/immich-openapi-specs.json @@ -2491,6 +2491,53 @@ "description": "This endpoint requires the `asset.read` permission." } }, + "/assets/{id}/ocr": { + "get": { + "operationId": "getAssetOcr", + "parameters": [ + { + "name": "id", + "required": true, + "in": "path", + "schema": { + "format": "uuid", + "type": "string" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "items": { + "$ref": "#/components/schemas/AssetOcrResponseDto" + }, + "type": "array" + } + } + }, + "description": "" + } + }, + "security": [ + { + "bearer": [] + }, + { + "cookie": [] + }, + { + "api_key": [] + } + ], + "tags": [ + "Assets" + ], + "x-immich-permission": "asset.read", + "description": "This endpoint requires the `asset.read` permission." + } + }, "/assets/{id}/original": { "get": { "operationId": "downloadAsset", @@ -11117,6 +11164,88 @@ ], "type": "object" }, + "AssetOcrResponseDto": { + "properties": { + "assetId": { + "format": "uuid", + "type": "string" + }, + "boxScore": { + "description": "Confidence score for text detection box", + "format": "double", + "type": "number" + }, + "id": { + "format": "uuid", + "type": "string" + }, + "text": { + "description": "Recognized text", + "type": "string" + }, + "textScore": { + "description": "Confidence score for text recognition", + "format": "double", + "type": "number" + }, + "x1": { + "description": "Normalized x coordinate of box corner 1 (0-1)", + "format": "double", + "type": "number" + }, + "x2": { + "description": "Normalized x coordinate of box corner 2 (0-1)", + "format": "double", + "type": "number" + }, + "x3": { + "description": "Normalized x coordinate of box corner 3 (0-1)", + "format": "double", + "type": "number" + }, + "x4": { + "description": "Normalized x coordinate of box corner 4 (0-1)", + "format": "double", + "type": "number" + }, + "y1": { + "description": "Normalized y coordinate of box corner 1 (0-1)", + "format": "double", + "type": "number" + }, + "y2": { + "description": "Normalized y coordinate of box corner 2 (0-1)", + "format": "double", + "type": "number" + }, + "y3": { + "description": "Normalized y coordinate of box corner 3 (0-1)", + "format": "double", + "type": "number" + }, + "y4": { + "description": "Normalized y coordinate of box corner 4 (0-1)", + "format": "double", + "type": "number" + } + }, + "required": [ + "assetId", + "boxScore", + "id", + "text", + "textScore", + "x1", + "x2", + "x3", + "x4", + "y1", + "y2", + "y3", + "y4" + ], + "type": "object" + }, "AssetOrder": { "enum": [ "asc", diff --git a/open-api/typescript-sdk/src/fetch-client.ts b/open-api/typescript-sdk/src/fetch-client.ts index a20fa9925b..f4801a1922 100644 --- a/open-api/typescript-sdk/src/fetch-client.ts +++ b/open-api/typescript-sdk/src/fetch-client.ts @@ -546,6 +546,32 @@ export type AssetMetadataResponseDto = { export type AssetMetadataUpsertDto = { items: AssetMetadataUpsertItemDto[]; }; +export type AssetOcrResponseDto = { + assetId: string; + /** Confidence score for text detection box */ + boxScore: number; + id: string; + /** Recognized text */ + text: string; + /** Confidence score for text recognition */ + textScore: number; + /** Normalized x coordinate of box corner 1 (0-1) */ + x1: number; + /** Normalized x coordinate of box corner 2 (0-1) */ + x2: number; + /** Normalized x coordinate of box corner 3 (0-1) */ + x3: number; + /** Normalized x coordinate of box corner 4 (0-1) */ + x4: number; + /** Normalized y coordinate of box corner 1 (0-1) */ + y1: number; + /** Normalized y coordinate of box corner 2 (0-1) */ + y2: number; + /** Normalized y coordinate of box corner 3 (0-1) */ + y3: number; + /** Normalized y coordinate of box corner 4 (0-1) */ + y4: number; +}; export type AssetMediaReplaceDto = { assetData: Blob; deviceAssetId: string; @@ -2390,6 +2416,19 @@ export function getAssetMetadataByKey({ id, key }: { ...opts })); } +/** + * This endpoint requires the `asset.read` permission. + */ +export function getAssetOcr({ id }: { + id: string; +}, opts?: Oazapfts.RequestOpts) { + return oazapfts.ok(oazapfts.fetchJson<{ + status: 200; + data: AssetOcrResponseDto[]; + }>(`/assets/${encodeURIComponent(id)}/ocr`, { + ...opts + })); +} /** * This endpoint requires the `asset.download` permission. */ diff --git a/server/src/controllers/asset.controller.ts b/server/src/controllers/asset.controller.ts index 1f320f6595..c57dc4ed28 100644 --- a/server/src/controllers/asset.controller.ts +++ b/server/src/controllers/asset.controller.ts @@ -16,6 +16,7 @@ import { UpdateAssetDto, } from 'src/dtos/asset.dto'; import { AuthDto } from 'src/dtos/auth.dto'; +import { AssetOcrResponseDto } from 'src/dtos/ocr.dto'; import { Permission, RouteKey } from 'src/enum'; import { Auth, Authenticated } from 'src/middleware/auth.guard'; import { AssetService } from 'src/services/asset.service'; @@ -95,6 +96,12 @@ export class AssetController { return this.service.getMetadata(auth, id); } + @Get(':id/ocr') + @Authenticated({ permission: Permission.AssetRead }) + getAssetOcr(@Auth() auth: AuthDto, @Param() { id }: UUIDParamDto): Promise { + return this.service.getOcr(auth, id); + } + @Put(':id/metadata') @Authenticated({ permission: Permission.AssetUpdate }) updateAssetMetadata( diff --git a/server/src/dtos/ocr.dto.ts b/server/src/dtos/ocr.dto.ts new file mode 100644 index 0000000000..1e838d0ec0 --- /dev/null +++ b/server/src/dtos/ocr.dto.ts @@ -0,0 +1,42 @@ +import { ApiProperty } from '@nestjs/swagger'; + +export class AssetOcrResponseDto { + @ApiProperty({ type: 'string', format: 'uuid' }) + id!: string; + + @ApiProperty({ type: 'string', format: 'uuid' }) + assetId!: string; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 1 (0-1)' }) + x1!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 1 (0-1)' }) + y1!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 2 (0-1)' }) + x2!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 2 (0-1)' }) + y2!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 3 (0-1)' }) + x3!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 3 (0-1)' }) + y3!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized x coordinate of box corner 4 (0-1)' }) + x4!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Normalized y coordinate of box corner 4 (0-1)' }) + y4!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text detection box' }) + boxScore!: number; + + @ApiProperty({ type: 'number', format: 'double', description: 'Confidence score for text recognition' }) + textScore!: number; + + @ApiProperty({ type: 'string', description: 'Recognized text' }) + text!: string; +} diff --git a/server/src/services/asset.service.spec.ts b/server/src/services/asset.service.spec.ts index 93861149c3..4b0086c957 100755 --- a/server/src/services/asset.service.spec.ts +++ b/server/src/services/asset.service.spec.ts @@ -700,6 +700,42 @@ describe(AssetService.name, () => { }); }); + describe('getOcr', () => { + it('should require asset read permission', async () => { + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set()); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).rejects.toBeInstanceOf(BadRequestException); + + expect(mocks.ocr.getByAssetId).not.toHaveBeenCalled(); + }); + + it('should return OCR data for an asset', async () => { + const ocr1 = factory.assetOcr({ text: 'Hello World' }); + const ocr2 = factory.assetOcr({ text: 'Test Image' }); + + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); + mocks.ocr.getByAssetId.mockResolvedValue([ocr1, ocr2]); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([ocr1, ocr2]); + + expect(mocks.access.asset.checkOwnerAccess).toHaveBeenCalledWith( + authStub.admin.user.id, + new Set(['asset-1']), + undefined, + ); + expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1'); + }); + + it('should return empty array when no OCR data exists', async () => { + mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); + mocks.ocr.getByAssetId.mockResolvedValue([]); + + await expect(sut.getOcr(authStub.admin, 'asset-1')).resolves.toEqual([]); + + expect(mocks.ocr.getByAssetId).toHaveBeenCalledWith('asset-1'); + }); + }); + describe('run', () => { it('should run the refresh faces job', async () => { mocks.access.asset.checkOwnerAccess.mockResolvedValue(new Set(['asset-1'])); diff --git a/server/src/services/asset.service.ts b/server/src/services/asset.service.ts index 6cb0219745..eb66c326e1 100644 --- a/server/src/services/asset.service.ts +++ b/server/src/services/asset.service.ts @@ -16,6 +16,7 @@ import { mapStats, } from 'src/dtos/asset.dto'; import { AuthDto } from 'src/dtos/auth.dto'; +import { AssetOcrResponseDto } from 'src/dtos/ocr.dto'; import { AssetMetadataKey, AssetStatus, AssetVisibility, JobName, JobStatus, Permission, QueueName } from 'src/enum'; import { BaseService } from 'src/services/base.service'; import { ISidecarWriteJob, JobItem, JobOf } from 'src/types'; @@ -289,6 +290,11 @@ export class AssetService extends BaseService { return this.assetRepository.getMetadata(id); } + async getOcr(auth: AuthDto, id: string): Promise { + await this.requireAccess({ auth, permission: Permission.AssetRead, ids: [id] }); + return this.ocrRepository.getByAssetId(id); + } + async upsertMetadata(auth: AuthDto, id: string, dto: AssetMetadataUpsertDto): Promise { await this.requireAccess({ auth, permission: Permission.AssetUpdate, ids: [id] }); return this.assetRepository.upsertMetadata(id, dto.items); diff --git a/server/test/small.factory.ts b/server/test/small.factory.ts index 09e7988f8f..ea0df585ea 100644 --- a/server/test/small.factory.ts +++ b/server/test/small.factory.ts @@ -309,10 +309,44 @@ const assetSidecarWriteFactory = (asset: Partial = {}) => ({ ...asset, }); +const assetOcrFactory = ( + ocr: { + id?: string; + assetId?: string; + x1?: number; + y1?: number; + x2?: number; + y2?: number; + x3?: number; + y3?: number; + x4?: number; + y4?: number; + boxScore?: number; + textScore?: number; + text?: string; + } = {}, +) => ({ + id: newUuid(), + assetId: newUuid(), + x1: 0.1, + y1: 0.2, + x2: 0.3, + y2: 0.2, + x3: 0.3, + y3: 0.4, + x4: 0.1, + y4: 0.4, + boxScore: 0.95, + textScore: 0.92, + text: 'Sample Text', + ...ocr, +}); + export const factory = { activity: activityFactory, apiKey: apiKeyFactory, asset: assetFactory, + assetOcr: assetOcrFactory, auth: authFactory, authApiKey: authApiKeyFactory, authUser: authUserFactory,