| import datetime |
| import hashlib |
| import uuid |
| from typing import Any, Literal, Union |
|
|
| from flask_login import current_user |
| from werkzeug.exceptions import NotFound |
|
|
| from configs import dify_config |
| from constants import ( |
| AUDIO_EXTENSIONS, |
| DOCUMENT_EXTENSIONS, |
| IMAGE_EXTENSIONS, |
| VIDEO_EXTENSIONS, |
| ) |
| from core.file import helpers as file_helpers |
| from core.rag.extractor.extract_processor import ExtractProcessor |
| from extensions.ext_database import db |
| from extensions.ext_storage import storage |
| from models.account import Account |
| from models.enums import CreatedByRole |
| from models.model import EndUser, UploadFile |
|
|
| from .errors.file import FileTooLargeError, UnsupportedFileTypeError |
|
|
| PREVIEW_WORDS_LIMIT = 3000 |
|
|
|
|
| class FileService: |
| @staticmethod |
| def upload_file( |
| *, |
| filename: str, |
| content: bytes, |
| mimetype: str, |
| user: Union[Account, EndUser, Any], |
| source: Literal["datasets"] | None = None, |
| source_url: str = "", |
| ) -> UploadFile: |
| |
| extension = filename.split(".")[-1].lower() |
| if len(filename) > 200: |
| filename = filename.split(".")[0][:200] + "." + extension |
|
|
| if source == "datasets" and extension not in DOCUMENT_EXTENSIONS: |
| raise UnsupportedFileTypeError() |
|
|
| |
| file_size = len(content) |
|
|
| |
| if not FileService.is_file_size_within_limit(extension=extension, file_size=file_size): |
| raise FileTooLargeError |
|
|
| |
| file_uuid = str(uuid.uuid4()) |
|
|
| if isinstance(user, Account): |
| current_tenant_id = user.current_tenant_id |
| else: |
| |
| current_tenant_id = user.tenant_id |
|
|
| file_key = "upload_files/" + current_tenant_id + "/" + file_uuid + "." + extension |
|
|
| |
| storage.save(file_key, content) |
|
|
| |
| upload_file = UploadFile( |
| tenant_id=current_tenant_id, |
| storage_type=dify_config.STORAGE_TYPE, |
| key=file_key, |
| name=filename, |
| size=file_size, |
| extension=extension, |
| mime_type=mimetype, |
| created_by_role=(CreatedByRole.ACCOUNT if isinstance(user, Account) else CreatedByRole.END_USER), |
| created_by=user.id, |
| created_at=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None), |
| used=False, |
| hash=hashlib.sha3_256(content).hexdigest(), |
| source_url=source_url, |
| ) |
|
|
| db.session.add(upload_file) |
| db.session.commit() |
|
|
| return upload_file |
|
|
| @staticmethod |
| def is_file_size_within_limit(*, extension: str, file_size: int) -> bool: |
| if extension in IMAGE_EXTENSIONS: |
| file_size_limit = dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT * 1024 * 1024 |
| elif extension in VIDEO_EXTENSIONS: |
| file_size_limit = dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT * 1024 * 1024 |
| elif extension in AUDIO_EXTENSIONS: |
| file_size_limit = dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT * 1024 * 1024 |
| else: |
| file_size_limit = dify_config.UPLOAD_FILE_SIZE_LIMIT * 1024 * 1024 |
|
|
| return file_size <= file_size_limit |
|
|
| @staticmethod |
| def upload_text(text: str, text_name: str) -> UploadFile: |
| if len(text_name) > 200: |
| text_name = text_name[:200] |
| |
| file_uuid = str(uuid.uuid4()) |
| file_key = "upload_files/" + current_user.current_tenant_id + "/" + file_uuid + ".txt" |
|
|
| |
| storage.save(file_key, text.encode("utf-8")) |
|
|
| |
| upload_file = UploadFile( |
| tenant_id=current_user.current_tenant_id, |
| storage_type=dify_config.STORAGE_TYPE, |
| key=file_key, |
| name=text_name, |
| size=len(text), |
| extension="txt", |
| mime_type="text/plain", |
| created_by=current_user.id, |
| created_by_role=CreatedByRole.ACCOUNT, |
| created_at=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None), |
| used=True, |
| used_by=current_user.id, |
| used_at=datetime.datetime.now(datetime.timezone.utc).replace(tzinfo=None), |
| ) |
|
|
| db.session.add(upload_file) |
| db.session.commit() |
|
|
| return upload_file |
|
|
| @staticmethod |
| def get_file_preview(file_id: str): |
| upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first() |
|
|
| if not upload_file: |
| raise NotFound("File not found") |
|
|
| |
| extension = upload_file.extension |
| if extension.lower() not in DOCUMENT_EXTENSIONS: |
| raise UnsupportedFileTypeError() |
|
|
| text = ExtractProcessor.load_from_upload_file(upload_file, return_text=True) |
| text = text[0:PREVIEW_WORDS_LIMIT] if text else "" |
|
|
| return text |
|
|
| @staticmethod |
| def get_image_preview(file_id: str, timestamp: str, nonce: str, sign: str): |
| result = file_helpers.verify_image_signature( |
| upload_file_id=file_id, timestamp=timestamp, nonce=nonce, sign=sign |
| ) |
| if not result: |
| raise NotFound("File not found or signature is invalid") |
|
|
| upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first() |
|
|
| if not upload_file: |
| raise NotFound("File not found or signature is invalid") |
|
|
| |
| extension = upload_file.extension |
| if extension.lower() not in IMAGE_EXTENSIONS: |
| raise UnsupportedFileTypeError() |
|
|
| generator = storage.load(upload_file.key, stream=True) |
|
|
| return generator, upload_file.mime_type |
|
|
| @staticmethod |
| def get_file_generator_by_file_id(file_id: str, timestamp: str, nonce: str, sign: str): |
| result = file_helpers.verify_file_signature(upload_file_id=file_id, timestamp=timestamp, nonce=nonce, sign=sign) |
| if not result: |
| raise NotFound("File not found or signature is invalid") |
|
|
| upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first() |
|
|
| if not upload_file: |
| raise NotFound("File not found or signature is invalid") |
|
|
| generator = storage.load(upload_file.key, stream=True) |
|
|
| return generator, upload_file |
|
|
| @staticmethod |
| def get_public_image_preview(file_id: str): |
| upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first() |
|
|
| if not upload_file: |
| raise NotFound("File not found or signature is invalid") |
|
|
| |
| extension = upload_file.extension |
| if extension.lower() not in IMAGE_EXTENSIONS: |
| raise UnsupportedFileTypeError() |
|
|
| generator = storage.load(upload_file.key) |
|
|
| return generator, upload_file.mime_type |
|
|