Files
dify/web/app/components/datasets/documents/create-from-pipeline/processing/index.spec.tsx
CodingOnStar e2edcdb0cf fix(tests): normalize path handling in useDocLink mock for document processing tests
- Updated the mock implementation of useDocLink to strip leading slashes from paths, aligning with actual implementation behavior.
- This change ensures that the generated documentation URLs are consistent and accurate, improving the reliability of tests that depend on this mock.

These adjustments enhance the overall stability and correctness of the testing suite for document processing components.
2025-12-19 13:46:11 +08:00

809 lines
24 KiB
TypeScript

import { render, screen } from '@testing-library/react'
import React from 'react'
import Processing from './index'
import type { InitialDocumentDetail } from '@/models/pipeline'
import { DatasourceType } from '@/models/pipeline'
import type { DocumentIndexingStatus } from '@/models/datasets'
// ==========================================
// Mock External Dependencies
// ==========================================
// Mock react-i18next (handled by __mocks__/react-i18next.ts but we override for custom messages)
jest.mock('react-i18next', () => ({
useTranslation: () => ({
t: (key: string) => key,
}),
}))
// Mock useDocLink - returns a function that generates doc URLs
// Strips leading slash from path to match actual implementation behavior
jest.mock('@/context/i18n', () => ({
useDocLink: () => (path?: string) => {
const normalizedPath = path?.startsWith('/') ? path.slice(1) : (path || '')
return `https://docs.dify.ai/en-US/${normalizedPath}`
},
}))
// Mock dataset detail context
let mockDataset: {
id?: string
indexing_technique?: string
retrieval_model_dict?: { search_method?: string }
} | undefined
jest.mock('@/context/dataset-detail', () => ({
useDatasetDetailContextWithSelector: <T,>(selector: (state: { dataset?: typeof mockDataset }) => T): T => {
return selector({ dataset: mockDataset })
},
}))
// Mock the EmbeddingProcess component to track props
let embeddingProcessProps: Record<string, unknown> = {}
jest.mock('./embedding-process', () => ({
__esModule: true,
default: (props: Record<string, unknown>) => {
embeddingProcessProps = props
return (
<div data-testid="embedding-process">
<span data-testid="ep-dataset-id">{props.datasetId as string}</span>
<span data-testid="ep-batch-id">{props.batchId as string}</span>
<span data-testid="ep-documents-count">{(props.documents as unknown[])?.length ?? 0}</span>
<span data-testid="ep-indexing-type">{props.indexingType as string || 'undefined'}</span>
<span data-testid="ep-retrieval-method">{props.retrievalMethod as string || 'undefined'}</span>
</div>
)
},
}))
// ==========================================
// Test Data Factory Functions
// ==========================================
/**
* Creates a mock InitialDocumentDetail for testing
* Uses deterministic counter-based IDs to avoid flaky tests
*/
let documentIdCounter = 0
const createMockDocument = (overrides: Partial<InitialDocumentDetail> = {}): InitialDocumentDetail => ({
id: overrides.id ?? `doc-${++documentIdCounter}`,
name: 'test-document.txt',
data_source_type: DatasourceType.localFile,
data_source_info: {},
enable: true,
error: '',
indexing_status: 'waiting' as DocumentIndexingStatus,
position: 0,
...overrides,
})
/**
* Creates a list of mock documents
*/
const createMockDocuments = (count: number): InitialDocumentDetail[] =>
Array.from({ length: count }, (_, index) =>
createMockDocument({
id: `doc-${index + 1}`,
name: `document-${index + 1}.txt`,
position: index,
}),
)
// ==========================================
// Test Suite
// ==========================================
describe('Processing', () => {
beforeEach(() => {
jest.clearAllMocks()
embeddingProcessProps = {}
// Reset deterministic ID counter for reproducible tests
documentIdCounter = 0
// Reset mock dataset with default values
mockDataset = {
id: 'dataset-123',
indexing_technique: 'high_quality',
retrieval_model_dict: { search_method: 'semantic_search' },
}
})
// ==========================================
// Rendering Tests
// ==========================================
describe('Rendering', () => {
// Tests basic rendering functionality
it('should render without crashing', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: createMockDocuments(2),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
})
it('should render the EmbeddingProcess component', () => {
// Arrange
const props = {
batchId: 'batch-456',
documents: createMockDocuments(3),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
})
it('should render the side tip section with correct content', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert - verify translation keys are rendered
expect(screen.getByText('datasetCreation.stepThree.sideTipTitle')).toBeInTheDocument()
expect(screen.getByText('datasetCreation.stepThree.sideTipContent')).toBeInTheDocument()
expect(screen.getByText('datasetPipeline.addDocuments.stepThree.learnMore')).toBeInTheDocument()
})
it('should render the documentation link with correct attributes', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
const link = screen.getByRole('link', { name: 'datasetPipeline.addDocuments.stepThree.learnMore' })
expect(link).toHaveAttribute('href', 'https://docs.dify.ai/en-US/guides/knowledge-base/integrate-knowledge-within-application')
expect(link).toHaveAttribute('target', '_blank')
expect(link).toHaveAttribute('rel', 'noreferrer noopener')
})
it('should render the book icon in the side tip', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
const { container } = render(<Processing {...props} />)
// Assert - check for icon container with shadow styling
const iconContainer = container.querySelector('.shadow-lg.shadow-shadow-shadow-5')
expect(iconContainer).toBeInTheDocument()
})
})
// ==========================================
// Props Testing
// ==========================================
describe('Props', () => {
// Tests that props are correctly passed to child components
it('should pass batchId to EmbeddingProcess', () => {
// Arrange
const testBatchId = 'test-batch-id-789'
const props = {
batchId: testBatchId,
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-batch-id')).toHaveTextContent(testBatchId)
expect(embeddingProcessProps.batchId).toBe(testBatchId)
})
it('should pass documents to EmbeddingProcess', () => {
// Arrange
const documents = createMockDocuments(5)
const props = {
batchId: 'batch-123',
documents,
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-documents-count')).toHaveTextContent('5')
expect(embeddingProcessProps.documents).toEqual(documents)
})
it('should pass datasetId from context to EmbeddingProcess', () => {
// Arrange
mockDataset = {
id: 'context-dataset-id',
indexing_technique: 'high_quality',
retrieval_model_dict: { search_method: 'semantic_search' },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-dataset-id')).toHaveTextContent('context-dataset-id')
expect(embeddingProcessProps.datasetId).toBe('context-dataset-id')
})
it('should pass indexingType from context to EmbeddingProcess', () => {
// Arrange
mockDataset = {
id: 'dataset-123',
indexing_technique: 'economy',
retrieval_model_dict: { search_method: 'semantic_search' },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-indexing-type')).toHaveTextContent('economy')
expect(embeddingProcessProps.indexingType).toBe('economy')
})
it('should pass retrievalMethod from context to EmbeddingProcess', () => {
// Arrange
mockDataset = {
id: 'dataset-123',
indexing_technique: 'high_quality',
retrieval_model_dict: { search_method: 'keyword_search' },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-retrieval-method')).toHaveTextContent('keyword_search')
expect(embeddingProcessProps.retrievalMethod).toBe('keyword_search')
})
it('should handle different document types', () => {
// Arrange
const documents = [
createMockDocument({
id: 'doc-local',
name: 'local-file.pdf',
data_source_type: DatasourceType.localFile,
}),
createMockDocument({
id: 'doc-online',
name: 'online-doc',
data_source_type: DatasourceType.onlineDocument,
}),
createMockDocument({
id: 'doc-website',
name: 'website-page',
data_source_type: DatasourceType.websiteCrawl,
}),
]
const props = {
batchId: 'batch-123',
documents,
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-documents-count')).toHaveTextContent('3')
expect(embeddingProcessProps.documents).toEqual(documents)
})
})
// ==========================================
// Edge Cases
// ==========================================
describe('Edge Cases', () => {
// Tests for boundary conditions and unusual inputs
it('should handle empty documents array', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: [],
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(screen.getByTestId('ep-documents-count')).toHaveTextContent('0')
expect(embeddingProcessProps.documents).toEqual([])
})
it('should handle empty batchId', () => {
// Arrange
const props = {
batchId: '',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(screen.getByTestId('ep-batch-id')).toHaveTextContent('')
})
it('should handle undefined dataset from context', () => {
// Arrange
mockDataset = undefined
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(embeddingProcessProps.datasetId).toBeUndefined()
expect(embeddingProcessProps.indexingType).toBeUndefined()
expect(embeddingProcessProps.retrievalMethod).toBeUndefined()
})
it('should handle dataset with undefined id', () => {
// Arrange
mockDataset = {
id: undefined,
indexing_technique: 'high_quality',
retrieval_model_dict: { search_method: 'semantic_search' },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(embeddingProcessProps.datasetId).toBeUndefined()
})
it('should handle dataset with undefined indexing_technique', () => {
// Arrange
mockDataset = {
id: 'dataset-123',
indexing_technique: undefined,
retrieval_model_dict: { search_method: 'semantic_search' },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(embeddingProcessProps.indexingType).toBeUndefined()
})
it('should handle dataset with undefined retrieval_model_dict', () => {
// Arrange
mockDataset = {
id: 'dataset-123',
indexing_technique: 'high_quality',
retrieval_model_dict: undefined,
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(embeddingProcessProps.retrievalMethod).toBeUndefined()
})
it('should handle dataset with empty retrieval_model_dict', () => {
// Arrange
mockDataset = {
id: 'dataset-123',
indexing_technique: 'high_quality',
retrieval_model_dict: {},
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(embeddingProcessProps.retrievalMethod).toBeUndefined()
})
it('should handle large number of documents', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: createMockDocuments(100),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(screen.getByTestId('ep-documents-count')).toHaveTextContent('100')
})
it('should handle documents with error status', () => {
// Arrange
const documents = [
createMockDocument({
id: 'doc-error',
name: 'error-doc.txt',
error: 'Processing failed',
indexing_status: 'error' as DocumentIndexingStatus,
}),
]
const props = {
batchId: 'batch-123',
documents,
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(embeddingProcessProps.documents).toEqual(documents)
})
it('should handle documents with special characters in names', () => {
// Arrange
const documents = [
createMockDocument({
id: 'doc-special',
name: 'document with spaces & special-chars_测试.pdf',
}),
]
const props = {
batchId: 'batch-123',
documents,
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(embeddingProcessProps.documents).toEqual(documents)
})
it('should handle batchId with special characters', () => {
// Arrange
const props = {
batchId: 'batch-123-abc_xyz:456',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-batch-id')).toHaveTextContent('batch-123-abc_xyz:456')
})
})
// ==========================================
// Context Integration Tests
// ==========================================
describe('Context Integration', () => {
// Tests for proper context usage
it('should correctly use context selectors for all dataset properties', () => {
// Arrange
mockDataset = {
id: 'full-dataset-id',
indexing_technique: 'high_quality',
retrieval_model_dict: { search_method: 'hybrid_search' },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(embeddingProcessProps.datasetId).toBe('full-dataset-id')
expect(embeddingProcessProps.indexingType).toBe('high_quality')
expect(embeddingProcessProps.retrievalMethod).toBe('hybrid_search')
})
it('should handle context changes with different indexing techniques', () => {
// Arrange - Test with economy indexing
mockDataset = {
id: 'dataset-economy',
indexing_technique: 'economy',
retrieval_model_dict: { search_method: 'keyword_search' },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
const { rerender } = render(<Processing {...props} />)
// Assert economy indexing
expect(embeddingProcessProps.indexingType).toBe('economy')
// Arrange - Update to high_quality
mockDataset = {
id: 'dataset-hq',
indexing_technique: 'high_quality',
retrieval_model_dict: { search_method: 'semantic_search' },
}
// Act - Rerender with new context
rerender(<Processing {...props} />)
// Assert high_quality indexing
expect(embeddingProcessProps.indexingType).toBe('high_quality')
})
})
// ==========================================
// Layout Tests
// ==========================================
describe('Layout', () => {
// Tests for proper layout and structure
it('should render with correct layout structure', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
const { container } = render(<Processing {...props} />)
// Assert - Check for flex layout with proper widths
const mainContainer = container.querySelector('.flex.h-full.w-full.justify-center')
expect(mainContainer).toBeInTheDocument()
// Check for left panel (3/5 width)
const leftPanel = container.querySelector('.w-3\\/5')
expect(leftPanel).toBeInTheDocument()
// Check for right panel (2/5 width)
const rightPanel = container.querySelector('.w-2\\/5')
expect(rightPanel).toBeInTheDocument()
})
it('should render side tip card with correct styling', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
const { container } = render(<Processing {...props} />)
// Assert - Check for card container with rounded corners and background
const sideTipCard = container.querySelector('.rounded-xl.bg-background-section')
expect(sideTipCard).toBeInTheDocument()
})
it('should constrain max-width for EmbeddingProcess container', () => {
// Arrange
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
const { container } = render(<Processing {...props} />)
// Assert
const maxWidthContainer = container.querySelector('.max-w-\\[640px\\]')
expect(maxWidthContainer).toBeInTheDocument()
})
})
// ==========================================
// Document Variations Tests
// ==========================================
describe('Document Variations', () => {
// Tests for different document configurations
it('should handle documents with all indexing statuses', () => {
// Arrange
const statuses: DocumentIndexingStatus[] = [
'waiting',
'parsing',
'cleaning',
'splitting',
'indexing',
'paused',
'error',
'completed',
]
const documents = statuses.map((status, index) =>
createMockDocument({
id: `doc-${status}`,
name: `${status}-doc.txt`,
indexing_status: status,
position: index,
}),
)
const props = {
batchId: 'batch-123',
documents,
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-documents-count')).toHaveTextContent(String(statuses.length))
expect(embeddingProcessProps.documents).toEqual(documents)
})
it('should handle documents with enabled and disabled states', () => {
// Arrange
const documents = [
createMockDocument({ id: 'doc-enabled', enable: true }),
createMockDocument({ id: 'doc-disabled', enable: false }),
]
const props = {
batchId: 'batch-123',
documents,
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('ep-documents-count')).toHaveTextContent('2')
expect(embeddingProcessProps.documents).toEqual(documents)
})
it('should handle documents from online drive source', () => {
// Arrange
const documents = [
createMockDocument({
id: 'doc-drive',
name: 'google-drive-doc',
data_source_type: DatasourceType.onlineDrive,
data_source_info: { provider: 'google_drive' },
}),
]
const props = {
batchId: 'batch-123',
documents,
}
// Act
render(<Processing {...props} />)
// Assert
expect(screen.getByTestId('embedding-process')).toBeInTheDocument()
expect(embeddingProcessProps.documents).toEqual(documents)
})
it('should handle documents with complex data_source_info', () => {
// Arrange
const documents = [
createMockDocument({
id: 'doc-notion',
name: 'Notion Page',
data_source_type: DatasourceType.onlineDocument,
data_source_info: {
notion_page_icon: { type: 'emoji', emoji: '📄' },
notion_workspace_id: 'ws-123',
notion_page_id: 'page-456',
},
}),
]
const props = {
batchId: 'batch-123',
documents,
}
// Act
render(<Processing {...props} />)
// Assert
expect(embeddingProcessProps.documents).toEqual(documents)
})
})
// ==========================================
// Retrieval Method Variations
// ==========================================
describe('Retrieval Method Variations', () => {
// Tests for different retrieval methods
const retrievalMethods = ['semantic_search', 'keyword_search', 'hybrid_search', 'full_text_search']
it.each(retrievalMethods)('should handle %s retrieval method', (method) => {
// Arrange
mockDataset = {
id: 'dataset-123',
indexing_technique: 'high_quality',
retrieval_model_dict: { search_method: method },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(embeddingProcessProps.retrievalMethod).toBe(method)
})
})
// ==========================================
// Indexing Technique Variations
// ==========================================
describe('Indexing Technique Variations', () => {
// Tests for different indexing techniques
const indexingTechniques = ['high_quality', 'economy']
it.each(indexingTechniques)('should handle %s indexing technique', (technique) => {
// Arrange
mockDataset = {
id: 'dataset-123',
indexing_technique: technique,
retrieval_model_dict: { search_method: 'semantic_search' },
}
const props = {
batchId: 'batch-123',
documents: createMockDocuments(1),
}
// Act
render(<Processing {...props} />)
// Assert
expect(embeddingProcessProps.indexingType).toBe(technique)
})
})
})