mirror of
https://github.com/immich-app/immich
synced 2025-11-14 17:36:12 +00:00
feat: add OCR functionality and related configurations
This commit is contained in:
parent
23fb2e0fae
commit
0e8ca1c159
64 changed files with 3998 additions and 1669 deletions
108
server/src/services/ocr.service.ts
Normal file
108
server/src/services/ocr.service.ts
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
import { Injectable } from '@nestjs/common';
|
||||
import { JOBS_ASSET_PAGINATION_SIZE } from 'src/constants';
|
||||
import { OnJob } from 'src/decorators';
|
||||
import {
|
||||
JobName,
|
||||
JobStatus,
|
||||
QueueName,
|
||||
} from 'src/enum';
|
||||
import { BaseService } from 'src/services/base.service';
|
||||
import { JobItem, JobOf } from 'src/types';
|
||||
import { getAssetFiles } from 'src/utils/asset.util';
|
||||
import { isOcrEnabled } from 'src/utils/misc';
|
||||
|
||||
@Injectable()
|
||||
export class OcrService extends BaseService {
|
||||
@OnJob({ name: JobName.OCR_CLEANUP, queue: QueueName.BACKGROUND_TASK })
|
||||
async handleOcrCleanup(): Promise<JobStatus> {
|
||||
const ocr = await this.ocrRepository.getOcrWithoutText();
|
||||
await this.ocrRepository.delete(ocr);
|
||||
return JobStatus.SUCCESS;
|
||||
}
|
||||
|
||||
@OnJob({ name: JobName.QUEUE_OCR, queue: QueueName.OCR })
|
||||
async handleQueueOcr({ force, nightly }: JobOf<JobName.QUEUE_OCR>): Promise<JobStatus> {
|
||||
const { machineLearning } = await this.getConfig({ withCache: false });
|
||||
if (!isOcrEnabled(machineLearning)) {
|
||||
return JobStatus.SKIPPED;
|
||||
}
|
||||
|
||||
if (force) {
|
||||
await this.ocrRepository.deleteAllOcr();
|
||||
}
|
||||
|
||||
let jobs: JobItem[] = [];
|
||||
const assets = this.assetJobRepository.streamForOcrJob(force);
|
||||
|
||||
for await (const asset of assets) {
|
||||
jobs.push({ name: JobName.OCR, data: { id: asset.id } });
|
||||
|
||||
if (jobs.length >= JOBS_ASSET_PAGINATION_SIZE) {
|
||||
await this.jobRepository.queueAll(jobs);
|
||||
jobs = [];
|
||||
}
|
||||
}
|
||||
|
||||
await this.jobRepository.queueAll(jobs);
|
||||
|
||||
if (force === undefined) {
|
||||
await this.jobRepository.queue({ name: JobName.OCR_CLEANUP });
|
||||
}
|
||||
|
||||
return JobStatus.SUCCESS;
|
||||
}
|
||||
|
||||
@OnJob({ name: JobName.OCR, queue: QueueName.OCR })
|
||||
async handleOcr({ id }: JobOf<JobName.OCR>): Promise<JobStatus> {
|
||||
const { machineLearning } = await this.getConfig({ withCache: true });
|
||||
if (!isOcrEnabled(machineLearning)) {
|
||||
return JobStatus.SKIPPED;
|
||||
}
|
||||
|
||||
const relations = { files: true };
|
||||
const asset = await this.assetRepository.getById(id, relations);
|
||||
if (!asset) {
|
||||
return JobStatus.FAILED;
|
||||
}
|
||||
if (!asset.files) {
|
||||
return JobStatus.FAILED;
|
||||
}
|
||||
const { previewFile } = getAssetFiles(asset.files);
|
||||
if (!previewFile) {
|
||||
return JobStatus.FAILED;
|
||||
}
|
||||
const ocrResults = await this.machineLearningRepository.ocr(
|
||||
machineLearning.urls,
|
||||
previewFile.path,
|
||||
machineLearning.ocr
|
||||
);
|
||||
|
||||
if (!ocrResults || ocrResults.text.length === 0) {
|
||||
this.logger.warn(`No OCR results for document ${id}`);
|
||||
await this.assetRepository.upsertJobStatus({
|
||||
assetId: asset.id,
|
||||
ocrAt: new Date(),
|
||||
});
|
||||
return JobStatus.SUCCESS;
|
||||
}
|
||||
|
||||
this.logger.debug(`OCR ${id} has OCR results`);
|
||||
|
||||
const ocr = await this.ocrRepository.getOcrById(id);
|
||||
if (ocr) {
|
||||
this.logger.debug(`Updating OCR for ${id}`);
|
||||
await this.ocrRepository.updateOcrData(id, ocrResults.text);
|
||||
} else {
|
||||
this.logger.debug(`Inserting OCR for ${id}`);
|
||||
await this.ocrRepository.insertOcrData(id, ocrResults.text);
|
||||
}
|
||||
|
||||
await this.assetRepository.upsertJobStatus({
|
||||
assetId: asset.id,
|
||||
ocrAt: new Date(),
|
||||
});
|
||||
this.logger.debug(`Processed OCR for ${id}`);
|
||||
return JobStatus.SUCCESS;
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue