| 'use client' |
| import { useTranslation } from 'react-i18next' |
| import { formatFileSize, formatNumber, formatTime } from '@/utils/format' |
| import type { DocType } from '@/models/datasets' |
| import useTimestamp from '@/hooks/use-timestamp' |
|
|
| export type inputType = 'input' | 'select' | 'textarea' |
| export type metadataType = DocType | 'originInfo' | 'technicalParameters' |
|
|
| type MetadataMap = |
| Record< |
| metadataType, |
| { |
| text: string |
| allowEdit?: boolean |
| icon?: React.ReactNode |
| iconName?: string |
| subFieldsMap: Record< |
| string, |
| { |
| label: string |
| inputType?: inputType |
| field?: string |
| render?: (value: any, total?: number) => React.ReactNode | string |
| } |
| > |
| } |
| > |
|
|
| const fieldPrefix = 'datasetDocuments.metadata.field' |
|
|
| export const useMetadataMap = (): MetadataMap => { |
| const { t } = useTranslation() |
| const { formatTime: formatTimestamp } = useTimestamp() |
|
|
| return { |
| book: { |
| text: t('datasetDocuments.metadata.type.book'), |
| iconName: 'bookOpen', |
| subFieldsMap: { |
| title: { label: t(`${fieldPrefix}.book.title`) }, |
| language: { |
| label: t(`${fieldPrefix}.book.language`), |
| inputType: 'select', |
| }, |
| author: { label: t(`${fieldPrefix}.book.author`) }, |
| publisher: { label: t(`${fieldPrefix}.book.publisher`) }, |
| publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) }, |
| isbn: { label: t(`${fieldPrefix}.book.ISBN`) }, |
| category: { |
| label: t(`${fieldPrefix}.book.category`), |
| inputType: 'select', |
| }, |
| }, |
| }, |
| web_page: { |
| text: t('datasetDocuments.metadata.type.webPage'), |
| iconName: 'globe', |
| subFieldsMap: { |
| 'title': { label: t(`${fieldPrefix}.webPage.title`) }, |
| 'url': { label: t(`${fieldPrefix}.webPage.url`) }, |
| 'language': { |
| label: t(`${fieldPrefix}.webPage.language`), |
| inputType: 'select', |
| }, |
| 'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) }, |
| 'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) }, |
| 'topics/keywords': { label: t(`${fieldPrefix}.webPage.topicsKeywords`) }, |
| 'description': { label: t(`${fieldPrefix}.webPage.description`) }, |
| }, |
| }, |
| paper: { |
| text: t('datasetDocuments.metadata.type.paper'), |
| iconName: 'graduationHat', |
| subFieldsMap: { |
| 'title': { label: t(`${fieldPrefix}.paper.title`) }, |
| 'language': { |
| label: t(`${fieldPrefix}.paper.language`), |
| inputType: 'select', |
| }, |
| 'author': { label: t(`${fieldPrefix}.paper.author`) }, |
| 'publish_date': { label: t(`${fieldPrefix}.paper.publishDate`) }, |
| 'journal/conference_name': { |
| label: t(`${fieldPrefix}.paper.journalConferenceName`), |
| }, |
| 'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) }, |
| 'doi': { label: t(`${fieldPrefix}.paper.DOI`) }, |
| 'topics/keywords': { label: t(`${fieldPrefix}.paper.topicsKeywords`) }, |
| 'abstract': { |
| label: t(`${fieldPrefix}.paper.abstract`), |
| inputType: 'textarea', |
| }, |
| }, |
| }, |
| social_media_post: { |
| text: t('datasetDocuments.metadata.type.socialMediaPost'), |
| iconName: 'atSign', |
| subFieldsMap: { |
| 'platform': { label: t(`${fieldPrefix}.socialMediaPost.platform`) }, |
| 'author/username': { |
| label: t(`${fieldPrefix}.socialMediaPost.authorUsername`), |
| }, |
| 'publish_date': { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) }, |
| 'post_url': { label: t(`${fieldPrefix}.socialMediaPost.postURL`) }, |
| 'topics/tags': { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) }, |
| }, |
| }, |
| personal_document: { |
| text: t('datasetDocuments.metadata.type.personalDocument'), |
| iconName: 'file', |
| subFieldsMap: { |
| 'title': { label: t(`${fieldPrefix}.personalDocument.title`) }, |
| 'author': { label: t(`${fieldPrefix}.personalDocument.author`) }, |
| 'creation_date': { |
| label: t(`${fieldPrefix}.personalDocument.creationDate`), |
| }, |
| 'last_modified_date': { |
| label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`), |
| }, |
| 'document_type': { |
| label: t(`${fieldPrefix}.personalDocument.documentType`), |
| inputType: 'select', |
| }, |
| 'tags/category': { |
| label: t(`${fieldPrefix}.personalDocument.tagsCategory`), |
| }, |
| }, |
| }, |
| business_document: { |
| text: t('datasetDocuments.metadata.type.businessDocument'), |
| iconName: 'briefcase', |
| subFieldsMap: { |
| 'title': { label: t(`${fieldPrefix}.businessDocument.title`) }, |
| 'author': { label: t(`${fieldPrefix}.businessDocument.author`) }, |
| 'creation_date': { |
| label: t(`${fieldPrefix}.businessDocument.creationDate`), |
| }, |
| 'last_modified_date': { |
| label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`), |
| }, |
| 'document_type': { |
| label: t(`${fieldPrefix}.businessDocument.documentType`), |
| inputType: 'select', |
| }, |
| 'department/team': { |
| label: t(`${fieldPrefix}.businessDocument.departmentTeam`), |
| }, |
| }, |
| }, |
| im_chat_log: { |
| text: t('datasetDocuments.metadata.type.IMChat'), |
| iconName: 'messageTextCircle', |
| subFieldsMap: { |
| 'chat_platform': { label: t(`${fieldPrefix}.IMChat.chatPlatform`) }, |
| 'chat_participants/group_name': { |
| label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`), |
| }, |
| 'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) }, |
| 'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) }, |
| 'participants': { label: t(`${fieldPrefix}.IMChat.participants`) }, |
| 'topicsKeywords': { |
| label: t(`${fieldPrefix}.IMChat.topicsKeywords`), |
| inputType: 'textarea', |
| }, |
| 'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) }, |
| }, |
| }, |
| wikipedia_entry: { |
| text: t('datasetDocuments.metadata.type.wikipediaEntry'), |
| allowEdit: false, |
| subFieldsMap: { |
| 'title': { label: t(`${fieldPrefix}.wikipediaEntry.title`) }, |
| 'language': { |
| label: t(`${fieldPrefix}.wikipediaEntry.language`), |
| inputType: 'select', |
| }, |
| 'web_page_url': { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) }, |
| 'editor/contributor': { |
| label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`), |
| }, |
| 'last_edit_date': { |
| label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`), |
| }, |
| 'summary/introduction': { |
| label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`), |
| inputType: 'textarea', |
| }, |
| }, |
| }, |
| synced_from_notion: { |
| text: t('datasetDocuments.metadata.type.notion'), |
| allowEdit: false, |
| subFieldsMap: { |
| 'title': { label: t(`${fieldPrefix}.notion.title`) }, |
| 'language': { label: t(`${fieldPrefix}.notion.lang`), inputType: 'select' }, |
| 'author/creator': { label: t(`${fieldPrefix}.notion.author`) }, |
| 'creation_date': { label: t(`${fieldPrefix}.notion.createdTime`) }, |
| 'last_modified_date': { |
| label: t(`${fieldPrefix}.notion.lastModifiedTime`), |
| }, |
| 'notion_page_link': { label: t(`${fieldPrefix}.notion.url`) }, |
| 'category/tags': { label: t(`${fieldPrefix}.notion.tag`) }, |
| 'description': { label: t(`${fieldPrefix}.notion.desc`) }, |
| }, |
| }, |
| synced_from_github: { |
| text: t('datasetDocuments.metadata.type.github'), |
| allowEdit: false, |
| subFieldsMap: { |
| 'repository_name': { label: t(`${fieldPrefix}.github.repoName`) }, |
| 'repository_description': { label: t(`${fieldPrefix}.github.repoDesc`) }, |
| 'repository_owner/organization': { label: t(`${fieldPrefix}.github.repoOwner`) }, |
| 'code_filename': { label: t(`${fieldPrefix}.github.fileName`) }, |
| 'code_file_path': { label: t(`${fieldPrefix}.github.filePath`) }, |
| 'programming_language': { label: t(`${fieldPrefix}.github.programmingLang`) }, |
| 'github_link': { label: t(`${fieldPrefix}.github.url`) }, |
| 'open_source_license': { label: t(`${fieldPrefix}.github.license`) }, |
| 'commit_date': { label: t(`${fieldPrefix}.github.lastCommitTime`) }, |
| 'commit_author': { |
| label: t(`${fieldPrefix}.github.lastCommitAuthor`), |
| }, |
| }, |
| }, |
| originInfo: { |
| text: '', |
| allowEdit: false, |
| subFieldsMap: { |
| 'name': { label: t(`${fieldPrefix}.originInfo.originalFilename`) }, |
| 'data_source_info.upload_file.size': { |
| label: t(`${fieldPrefix}.originInfo.originalFileSize`), |
| render: value => formatFileSize(value), |
| }, |
| 'created_at': { |
| label: t(`${fieldPrefix}.originInfo.uploadDate`), |
| render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string), |
| }, |
| 'completed_at': { |
| label: t(`${fieldPrefix}.originInfo.lastUpdateDate`), |
| render: value => formatTimestamp(value, t('datasetDocuments.metadata.dateTimeFormat') as string), |
| }, |
| 'data_source_type': { |
| label: t(`${fieldPrefix}.originInfo.source`), |
| render: value => t(`datasetDocuments.metadata.source.${value}`), |
| }, |
| }, |
| }, |
| technicalParameters: { |
| text: t('datasetDocuments.metadata.type.technicalParameters'), |
| allowEdit: false, |
| subFieldsMap: { |
| 'dataset_process_rule.mode': { |
| label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`), |
| render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string), |
| }, |
| 'dataset_process_rule.rules.segmentation.max_tokens': { |
| label: t(`${fieldPrefix}.technicalParameters.segmentLength`), |
| render: value => formatNumber(value), |
| }, |
| 'average_segment_length': { |
| label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`), |
| render: value => `${formatNumber(value)} characters`, |
| }, |
| 'segment_count': { |
| label: t(`${fieldPrefix}.technicalParameters.paragraphs`), |
| render: value => `${formatNumber(value)} paragraphs`, |
| }, |
| 'hit_count': { |
| label: t(`${fieldPrefix}.technicalParameters.hitCount`), |
| render: (value, total) => { |
| const v = value || 0 |
| return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})` |
| }, |
| }, |
| 'indexing_latency': { |
| label: t(`${fieldPrefix}.technicalParameters.embeddingTime`), |
| render: value => formatTime(value), |
| }, |
| 'tokens': { |
| label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`), |
| render: value => `${formatNumber(value)} tokens`, |
| }, |
| }, |
| }, |
| } |
| } |
|
|
| const langPrefix = 'datasetDocuments.metadata.languageMap.' |
|
|
| export const useLanguages = () => { |
| const { t } = useTranslation() |
| return { |
| zh: t(`${langPrefix}zh`), |
| en: t(`${langPrefix}en`), |
| es: t(`${langPrefix}es`), |
| fr: t(`${langPrefix}fr`), |
| de: t(`${langPrefix}de`), |
| ja: t(`${langPrefix}ja`), |
| ko: t(`${langPrefix}ko`), |
| ru: t(`${langPrefix}ru`), |
| ar: t(`${langPrefix}ar`), |
| pt: t(`${langPrefix}pt`), |
| it: t(`${langPrefix}it`), |
| nl: t(`${langPrefix}nl`), |
| pl: t(`${langPrefix}pl`), |
| sv: t(`${langPrefix}sv`), |
| tr: t(`${langPrefix}tr`), |
| he: t(`${langPrefix}he`), |
| hi: t(`${langPrefix}hi`), |
| da: t(`${langPrefix}da`), |
| fi: t(`${langPrefix}fi`), |
| no: t(`${langPrefix}no`), |
| hu: t(`${langPrefix}hu`), |
| el: t(`${langPrefix}el`), |
| cs: t(`${langPrefix}cs`), |
| th: t(`${langPrefix}th`), |
| id: t(`${langPrefix}id`), |
| ro: t(`${langPrefix}ro`), |
| } |
| } |
|
|
| const bookCategoryPrefix = 'datasetDocuments.metadata.categoryMap.book.' |
|
|
| export const useBookCategories = () => { |
| const { t } = useTranslation() |
| return { |
| fiction: t(`${bookCategoryPrefix}fiction`), |
| biography: t(`${bookCategoryPrefix}biography`), |
| history: t(`${bookCategoryPrefix}history`), |
| science: t(`${bookCategoryPrefix}science`), |
| technology: t(`${bookCategoryPrefix}technology`), |
| education: t(`${bookCategoryPrefix}education`), |
| philosophy: t(`${bookCategoryPrefix}philosophy`), |
| religion: t(`${bookCategoryPrefix}religion`), |
| socialSciences: t(`${bookCategoryPrefix}socialSciences`), |
| art: t(`${bookCategoryPrefix}art`), |
| travel: t(`${bookCategoryPrefix}travel`), |
| health: t(`${bookCategoryPrefix}health`), |
| selfHelp: t(`${bookCategoryPrefix}selfHelp`), |
| businessEconomics: t(`${bookCategoryPrefix}businessEconomics`), |
| cooking: t(`${bookCategoryPrefix}cooking`), |
| childrenYoungAdults: t(`${bookCategoryPrefix}childrenYoungAdults`), |
| comicsGraphicNovels: t(`${bookCategoryPrefix}comicsGraphicNovels`), |
| poetry: t(`${bookCategoryPrefix}poetry`), |
| drama: t(`${bookCategoryPrefix}drama`), |
| other: t(`${bookCategoryPrefix}other`), |
| } |
| } |
|
|
| const personalDocCategoryPrefix |
| = 'datasetDocuments.metadata.categoryMap.personalDoc.' |
|
|
| export const usePersonalDocCategories = () => { |
| const { t } = useTranslation() |
| return { |
| notes: t(`${personalDocCategoryPrefix}notes`), |
| blogDraft: t(`${personalDocCategoryPrefix}blogDraft`), |
| diary: t(`${personalDocCategoryPrefix}diary`), |
| researchReport: t(`${personalDocCategoryPrefix}researchReport`), |
| bookExcerpt: t(`${personalDocCategoryPrefix}bookExcerpt`), |
| schedule: t(`${personalDocCategoryPrefix}schedule`), |
| list: t(`${personalDocCategoryPrefix}list`), |
| projectOverview: t(`${personalDocCategoryPrefix}projectOverview`), |
| photoCollection: t(`${personalDocCategoryPrefix}photoCollection`), |
| creativeWriting: t(`${personalDocCategoryPrefix}creativeWriting`), |
| codeSnippet: t(`${personalDocCategoryPrefix}codeSnippet`), |
| designDraft: t(`${personalDocCategoryPrefix}designDraft`), |
| personalResume: t(`${personalDocCategoryPrefix}personalResume`), |
| other: t(`${personalDocCategoryPrefix}other`), |
| } |
| } |
|
|
| const businessDocCategoryPrefix |
| = 'datasetDocuments.metadata.categoryMap.businessDoc.' |
|
|
| export const useBusinessDocCategories = () => { |
| const { t } = useTranslation() |
| return { |
| meetingMinutes: t(`${businessDocCategoryPrefix}meetingMinutes`), |
| researchReport: t(`${businessDocCategoryPrefix}researchReport`), |
| proposal: t(`${businessDocCategoryPrefix}proposal`), |
| employeeHandbook: t(`${businessDocCategoryPrefix}employeeHandbook`), |
| trainingMaterials: t(`${businessDocCategoryPrefix}trainingMaterials`), |
| requirementsDocument: t(`${businessDocCategoryPrefix}requirementsDocument`), |
| designDocument: t(`${businessDocCategoryPrefix}designDocument`), |
| productSpecification: t(`${businessDocCategoryPrefix}productSpecification`), |
| financialReport: t(`${businessDocCategoryPrefix}financialReport`), |
| marketAnalysis: t(`${businessDocCategoryPrefix}marketAnalysis`), |
| projectPlan: t(`${businessDocCategoryPrefix}projectPlan`), |
| teamStructure: t(`${businessDocCategoryPrefix}teamStructure`), |
| policiesProcedures: t(`${businessDocCategoryPrefix}policiesProcedures`), |
| contractsAgreements: t(`${businessDocCategoryPrefix}contractsAgreements`), |
| emailCorrespondence: t(`${businessDocCategoryPrefix}emailCorrespondence`), |
| other: t(`${businessDocCategoryPrefix}other`), |
| } |
| } |
|
|