fix: fix summary index bug. (#31258 )

fix: fix summary index bug.
Merge branch 'feat/summary-index' into deploy/dev
2026-01-20 05:54:02 +00:00 · 2026-01-20 11:55:42 +08:00 · 2026-01-20 11:53:16 +08:00 · 2026-01-19 15:35:41 +08:00 · 2026-01-19 15:35:07 +08:00 · 2026-01-19 15:14:28 +08:00
491 changed files with 22559 additions and 17037 deletions
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -1,11 +1,4 @@
 {
-  "enabledPlugins": {
-    "feature-dev@claude-plugins-official": true,
-    "context7@claude-plugins-official": true,
-    "typescript-lsp@claude-plugins-official": true,
-    "pyright-lsp@claude-plugins-official": true,
-    "ralph-loop@claude-plugins-official": true
-  },
  "hooks": {
    "PreToolUse": [
      {
@@ -18,5 +11,10 @@
        ]
      }
    ]
+  },
+  "enabledPlugins": {
+    "feature-dev@claude-plugins-official": true,
+    "context7@claude-plugins-official": true,
+    "ralph-loop@claude-plugins-official": true
  }
 }
--- a/.claude/skills/frontend-testing/SKILL.md
+++ b/.claude/skills/frontend-testing/SKILL.md
@@ -83,6 +83,9 @@ vi.mock('next/navigation', () => ({
  usePathname: () => '/test',
 }))

+// ✅ Zustand stores: Use real stores (auto-mocked globally)
+// Set test state with: useAppStore.setState({ ... })
+
 // Shared state for mocks (if needed)
 let mockSharedState = false

@@ -296,7 +299,7 @@ For each test file generated, aim for:
 For more detailed information, refer to:

 - `references/workflow.md` - **Incremental testing workflow** (MUST READ for multi-file testing)
- `references/mocking.md` - Mock patterns and best practices
+- `references/mocking.md` - Mock patterns, Zustand store testing, and best practices
 - `references/async-testing.md` - Async operations and API calls
 - `references/domain-components.md` - Workflow, Dataset, Configuration testing
 - `references/common-patterns.md` - Frequently used testing patterns
--- a/.claude/skills/frontend-testing/references/mocking.md
+++ b/.claude/skills/frontend-testing/references/mocking.md
@@ -37,16 +37,36 @@ Only mock these categories:
 1. **Third-party libraries with side effects** - `next/navigation`, external SDKs
 1. **i18n** - Always mock to return keys

+### Zustand Stores - DO NOT Mock Manually
+
+**Zustand is globally mocked** in `web/vitest.setup.ts`. Use real stores with `setState()`:
+
+```typescript
+// ✅ CORRECT: Use real store, set test state
+import { useAppStore } from '@/app/components/app/store'
+
+useAppStore.setState({ appDetail: { id: 'test', name: 'Test' } })
+render(<MyComponent />)
+
+// ❌ WRONG: Don't mock the store module
+vi.mock('@/app/components/app/store', () => ({ ... }))
+```
+
+See [Zustand Store Testing](#zustand-store-testing) section for full details.
+
 ## Mock Placement

 | Location | Purpose |
 |----------|---------|
-| `web/vitest.setup.ts` | Global mocks shared by all tests (for example `react-i18next`, `next/image`) |
+| `web/vitest.setup.ts` | Global mocks shared by all tests (`react-i18next`, `next/image`, `zustand`) |
+| `web/__mocks__/zustand.ts` | Zustand mock implementation (auto-resets stores after each test) |
 | `web/__mocks__/` | Reusable mock factories shared across multiple test files |
 | Test file | Test-specific mocks, inline with `vi.mock()` |

 Modules are not mocked automatically. Use `vi.mock` in test files, or add global mocks in `web/vitest.setup.ts`.

+**Note**: Zustand is special - it's globally mocked but you should NOT mock store modules manually. See [Zustand Store Testing](#zustand-store-testing).
+
 ## Essential Mocks

 ### 1. i18n (Auto-loaded via Global Mock)
@@ -276,6 +296,7 @@ const renderWithQueryClient = (ui: React.ReactElement) => {

 1. **Use real base components** - Import from `@/app/components/base/` directly
 1. **Use real project components** - Prefer importing over mocking
+1. **Use real Zustand stores** - Set test state via `store.setState()`
 1. **Reset mocks in `beforeEach`**, not `afterEach`
 1. **Match actual component behavior** in mocks (when mocking is necessary)
 1. **Use factory functions** for complex mock data
@@ -285,6 +306,7 @@ const renderWithQueryClient = (ui: React.ReactElement) => {
 ### ❌ DON'T

 1. **Don't mock base components** (`Loading`, `Button`, `Tooltip`, etc.)
+1. **Don't mock Zustand store modules** - Use real stores with `setState()`
 1. Don't mock components you can import directly
 1. Don't create overly simplified mocks that miss conditional logic
 1. Don't forget to clean up nock after each test
@@ -308,10 +330,151 @@ Need to use a component in test?
 ├─ Is it a third-party lib with side effects?
 │  └─ YES → Mock it (next/navigation, external SDKs)
 │
+├─ Is it a Zustand store?
+│  └─ YES → DO NOT mock the module!
+│           Use real store + setState() to set test state
+│           (Global mock handles auto-reset)
+│
 └─ Is it i18n?
   └─ YES → Uses shared mock (auto-loaded). Override only for custom translations
 ```

+## Zustand Store Testing
+
+### Global Zustand Mock (Auto-loaded)
+
+Zustand is globally mocked in `web/vitest.setup.ts` following the [official Zustand testing guide](https://zustand.docs.pmnd.rs/guides/testing). The mock in `web/__mocks__/zustand.ts` provides:
+
+- Real store behavior with `getState()`, `setState()`, `subscribe()` methods
+- Automatic store reset after each test via `afterEach`
+- Proper test isolation between tests
+
+### ✅ Recommended: Use Real Stores (Official Best Practice)
+
+**DO NOT mock store modules manually.** Import and use the real store, then use `setState()` to set test state:
+
+```typescript
+// ✅ CORRECT: Use real store with setState
+import { useAppStore } from '@/app/components/app/store'
+
+describe('MyComponent', () => {
+  it('should render app details', () => {
+    // Arrange: Set test state via setState
+    useAppStore.setState({
+      appDetail: {
+        id: 'test-app',
+        name: 'Test App',
+        mode: 'chat',
+      },
+    })
+
+    // Act
+    render(<MyComponent />)
+
+    // Assert
+    expect(screen.getByText('Test App')).toBeInTheDocument()
+    // Can also verify store state directly
+    expect(useAppStore.getState().appDetail?.name).toBe('Test App')
+  })
+
+  // No cleanup needed - global mock auto-resets after each test
+})
+```
+
+### ❌ Avoid: Manual Store Module Mocking
+
+Manual mocking conflicts with the global Zustand mock and loses store functionality:
+
+```typescript
+// ❌ WRONG: Don't mock the store module
+vi.mock('@/app/components/app/store', () => ({
+  useStore: (selector) => mockSelector(selector),  // Missing getState, setState!
+}))
+
+// ❌ WRONG: This conflicts with global zustand mock
+vi.mock('@/app/components/workflow/store', () => ({
+  useWorkflowStore: vi.fn(() => mockState),
+}))
+```
+
+**Problems with manual mocking:**
+
+1. Loses `getState()`, `setState()`, `subscribe()` methods
+1. Conflicts with global Zustand mock behavior
+1. Requires manual maintenance of store API
+1. Tests don't reflect actual store behavior
+
+### When Manual Store Mocking is Necessary
+
+In rare cases where the store has complex initialization or side effects, you can mock it, but ensure you provide the full store API:
+
+```typescript
+// If you MUST mock (rare), include full store API
+const mockStore = {
+  appDetail: { id: 'test', name: 'Test' },
+  setAppDetail: vi.fn(),
+}
+
+vi.mock('@/app/components/app/store', () => ({
+  useStore: Object.assign(
+    (selector: (state: typeof mockStore) => unknown) => selector(mockStore),
+    {
+      getState: () => mockStore,
+      setState: vi.fn(),
+      subscribe: vi.fn(),
+    },
+  ),
+}))
+```
+
+### Store Testing Decision Tree
+
+```
+Need to test a component using Zustand store?
+│
+├─ Can you use the real store?
+│  └─ YES → Use real store + setState (RECOMMENDED)
+│           useAppStore.setState({ ... })
+│
+├─ Does the store have complex initialization/side effects?
+│  └─ YES → Consider mocking, but include full API
+│           (getState, setState, subscribe)
+│
+└─ Are you testing the store itself (not a component)?
+   └─ YES → Test store directly with getState/setState
+            const store = useMyStore
+            store.setState({ count: 0 })
+            store.getState().increment()
+            expect(store.getState().count).toBe(1)
+```
+
+### Example: Testing Store Actions
+
+```typescript
+import { useCounterStore } from '@/stores/counter'
+
+describe('Counter Store', () => {
+  it('should increment count', () => {
+    // Initial state (auto-reset by global mock)
+    expect(useCounterStore.getState().count).toBe(0)
+
+    // Call action
+    useCounterStore.getState().increment()
+
+    // Verify state change
+    expect(useCounterStore.getState().count).toBe(1)
+  })
+
+  it('should reset to initial state', () => {
+    // Set some state
+    useCounterStore.setState({ count: 100 })
+    expect(useCounterStore.getState().count).toBe(100)
+
+    // After this test, global mock will reset to initial state
+  })
+})
+```
+
 ## Factory Function Pattern

 ```typescript
--- a/.claude/skills/vercel-react-best-practices/AGENTS.md
+++ b/.claude/skills/vercel-react-best-practices/AGENTS.md
--- a/.claude/skills/vercel-react-best-practices/SKILL.md
+++ b/.claude/skills/vercel-react-best-practices/SKILL.md
@@ -0,0 +1,125 @@
+---
+name: vercel-react-best-practices
+description: React and Next.js performance optimization guidelines from Vercel Engineering. This skill should be used when writing, reviewing, or refactoring React/Next.js code to ensure optimal performance patterns. Triggers on tasks involving React components, Next.js pages, data fetching, bundle optimization, or performance improvements.
+license: MIT
+metadata:
+  author: vercel
+  version: "1.0.0"
+---
+
+# Vercel React Best Practices
+
+Comprehensive performance optimization guide for React and Next.js applications, maintained by Vercel. Contains 45 rules across 8 categories, prioritized by impact to guide automated refactoring and code generation.
+
+## When to Apply
+
+Reference these guidelines when:
+- Writing new React components or Next.js pages
+- Implementing data fetching (client or server-side)
+- Reviewing code for performance issues
+- Refactoring existing React/Next.js code
+- Optimizing bundle size or load times
+
+## Rule Categories by Priority
+
+| Priority | Category | Impact | Prefix |
+|----------|----------|--------|--------|
+| 1 | Eliminating Waterfalls | CRITICAL | `async-` |
+| 2 | Bundle Size Optimization | CRITICAL | `bundle-` |
+| 3 | Server-Side Performance | HIGH | `server-` |
+| 4 | Client-Side Data Fetching | MEDIUM-HIGH | `client-` |
+| 5 | Re-render Optimization | MEDIUM | `rerender-` |
+| 6 | Rendering Performance | MEDIUM | `rendering-` |
+| 7 | JavaScript Performance | LOW-MEDIUM | `js-` |
+| 8 | Advanced Patterns | LOW | `advanced-` |
+
+## Quick Reference
+
+### 1. Eliminating Waterfalls (CRITICAL)
+
+- `async-defer-await` - Move await into branches where actually used
+- `async-parallel` - Use Promise.all() for independent operations
+- `async-dependencies` - Use better-all for partial dependencies
+- `async-api-routes` - Start promises early, await late in API routes
+- `async-suspense-boundaries` - Use Suspense to stream content
+
+### 2. Bundle Size Optimization (CRITICAL)
+
+- `bundle-barrel-imports` - Import directly, avoid barrel files
+- `bundle-dynamic-imports` - Use next/dynamic for heavy components
+- `bundle-defer-third-party` - Load analytics/logging after hydration
+- `bundle-conditional` - Load modules only when feature is activated
+- `bundle-preload` - Preload on hover/focus for perceived speed
+
+### 3. Server-Side Performance (HIGH)
+
+- `server-cache-react` - Use React.cache() for per-request deduplication
+- `server-cache-lru` - Use LRU cache for cross-request caching
+- `server-serialization` - Minimize data passed to client components
+- `server-parallel-fetching` - Restructure components to parallelize fetches
+- `server-after-nonblocking` - Use after() for non-blocking operations
+
+### 4. Client-Side Data Fetching (MEDIUM-HIGH)
+
+- `client-swr-dedup` - Use SWR for automatic request deduplication
+- `client-event-listeners` - Deduplicate global event listeners
+
+### 5. Re-render Optimization (MEDIUM)
+
+- `rerender-defer-reads` - Don't subscribe to state only used in callbacks
+- `rerender-memo` - Extract expensive work into memoized components
+- `rerender-dependencies` - Use primitive dependencies in effects
+- `rerender-derived-state` - Subscribe to derived booleans, not raw values
+- `rerender-functional-setstate` - Use functional setState for stable callbacks
+- `rerender-lazy-state-init` - Pass function to useState for expensive values
+- `rerender-transitions` - Use startTransition for non-urgent updates
+
+### 6. Rendering Performance (MEDIUM)
+
+- `rendering-animate-svg-wrapper` - Animate div wrapper, not SVG element
+- `rendering-content-visibility` - Use content-visibility for long lists
+- `rendering-hoist-jsx` - Extract static JSX outside components
+- `rendering-svg-precision` - Reduce SVG coordinate precision
+- `rendering-hydration-no-flicker` - Use inline script for client-only data
+- `rendering-activity` - Use Activity component for show/hide
+- `rendering-conditional-render` - Use ternary, not && for conditionals
+
+### 7. JavaScript Performance (LOW-MEDIUM)
+
+- `js-batch-dom-css` - Group CSS changes via classes or cssText
+- `js-index-maps` - Build Map for repeated lookups
+- `js-cache-property-access` - Cache object properties in loops
+- `js-cache-function-results` - Cache function results in module-level Map
+- `js-cache-storage` - Cache localStorage/sessionStorage reads
+- `js-combine-iterations` - Combine multiple filter/map into one loop
+- `js-length-check-first` - Check array length before expensive comparison
+- `js-early-exit` - Return early from functions
+- `js-hoist-regexp` - Hoist RegExp creation outside loops
+- `js-min-max-loop` - Use loop for min/max instead of sort
+- `js-set-map-lookups` - Use Set/Map for O(1) lookups
+- `js-tosorted-immutable` - Use toSorted() for immutability
+
+### 8. Advanced Patterns (LOW)
+
+- `advanced-event-handler-refs` - Store event handlers in refs
+- `advanced-use-latest` - useLatest for stable callback refs
+
+## How to Use
+
+Read individual rule files for detailed explanations and code examples:
+
+```
+rules/async-parallel.md
+rules/bundle-barrel-imports.md
+rules/_sections.md
+```
+
+Each rule file contains:
+- Brief explanation of why it matters
+- Incorrect code example with explanation
+- Correct code example with explanation
+- Additional context and references
+
+## Full Compiled Document
+
+For the complete guide with all rules expanded: `AGENTS.md`
--- a/.claude/skills/vercel-react-best-practices/rules/advanced-event-handler-refs.md
+++ b/.claude/skills/vercel-react-best-practices/rules/advanced-event-handler-refs.md
@@ -0,0 +1,55 @@
+---
+title: Store Event Handlers in Refs
+impact: LOW
+impactDescription: stable subscriptions
+tags: advanced, hooks, refs, event-handlers, optimization
+---
+
+## Store Event Handlers in Refs
+
+Store callbacks in refs when used in effects that shouldn't re-subscribe on callback changes.
+
+**Incorrect (re-subscribes on every render):**
+
+```tsx
+function useWindowEvent(event: string, handler: (e) => void) {
+  useEffect(() => {
+    window.addEventListener(event, handler)
+    return () => window.removeEventListener(event, handler)
+  }, [event, handler])
+}
+```
+
+**Correct (stable subscription):**
+
+```tsx
+function useWindowEvent(event: string, handler: (e) => void) {
+  const handlerRef = useRef(handler)
+  useEffect(() => {
+    handlerRef.current = handler
+  }, [handler])
+
+  useEffect(() => {
+    const listener = (e) => handlerRef.current(e)
+    window.addEventListener(event, listener)
+    return () => window.removeEventListener(event, listener)
+  }, [event])
+}
+```
+
+**Alternative: use `useEffectEvent` if you're on latest React:**
+
+```tsx
+import { useEffectEvent } from 'react'
+
+function useWindowEvent(event: string, handler: (e) => void) {
+  const onEvent = useEffectEvent(handler)
+
+  useEffect(() => {
+    window.addEventListener(event, onEvent)
+    return () => window.removeEventListener(event, onEvent)
+  }, [event])
+}
+```
+
+`useEffectEvent` provides a cleaner API for the same pattern: it creates a stable function reference that always calls the latest version of the handler.
--- a/.claude/skills/vercel-react-best-practices/rules/advanced-use-latest.md
+++ b/.claude/skills/vercel-react-best-practices/rules/advanced-use-latest.md
@@ -0,0 +1,49 @@
+---
+title: useLatest for Stable Callback Refs
+impact: LOW
+impactDescription: prevents effect re-runs
+tags: advanced, hooks, useLatest, refs, optimization
+---
+
+## useLatest for Stable Callback Refs
+
+Access latest values in callbacks without adding them to dependency arrays. Prevents effect re-runs while avoiding stale closures.
+
+**Implementation:**
+
+```typescript
+function useLatest<T>(value: T) {
+  const ref = useRef(value)
+  useLayoutEffect(() => {
+    ref.current = value
+  }, [value])
+  return ref
+}
+```
+
+**Incorrect (effect re-runs on every callback change):**
+
+```tsx
+function SearchInput({ onSearch }: { onSearch: (q: string) => void }) {
+  const [query, setQuery] = useState('')
+
+  useEffect(() => {
+    const timeout = setTimeout(() => onSearch(query), 300)
+    return () => clearTimeout(timeout)
+  }, [query, onSearch])
+}
+```
+
+**Correct (stable effect, fresh callback):**
+
+```tsx
+function SearchInput({ onSearch }: { onSearch: (q: string) => void }) {
+  const [query, setQuery] = useState('')
+  const onSearchRef = useLatest(onSearch)
+
+  useEffect(() => {
+    const timeout = setTimeout(() => onSearchRef.current(query), 300)
+    return () => clearTimeout(timeout)
+  }, [query])
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/async-api-routes.md
+++ b/.claude/skills/vercel-react-best-practices/rules/async-api-routes.md
@@ -0,0 +1,38 @@
+---
+title: Prevent Waterfall Chains in API Routes
+impact: CRITICAL
+impactDescription: 2-10× improvement
+tags: api-routes, server-actions, waterfalls, parallelization
+---
+
+## Prevent Waterfall Chains in API Routes
+
+In API routes and Server Actions, start independent operations immediately, even if you don't await them yet.
+
+**Incorrect (config waits for auth, data waits for both):**
+
+```typescript
+export async function GET(request: Request) {
+  const session = await auth()
+  const config = await fetchConfig()
+  const data = await fetchData(session.user.id)
+  return Response.json({ data, config })
+}
+```
+
+**Correct (auth and config start immediately):**
+
+```typescript
+export async function GET(request: Request) {
+  const sessionPromise = auth()
+  const configPromise = fetchConfig()
+  const session = await sessionPromise
+  const [config, data] = await Promise.all([
+    configPromise,
+    fetchData(session.user.id)
+  ])
+  return Response.json({ data, config })
+}
+```
+
+For operations with more complex dependency chains, use `better-all` to automatically maximize parallelism (see Dependency-Based Parallelization).
--- a/.claude/skills/vercel-react-best-practices/rules/async-defer-await.md
+++ b/.claude/skills/vercel-react-best-practices/rules/async-defer-await.md
@@ -0,0 +1,80 @@
+---
+title: Defer Await Until Needed
+impact: HIGH
+impactDescription: avoids blocking unused code paths
+tags: async, await, conditional, optimization
+---
+
+## Defer Await Until Needed
+
+Move `await` operations into the branches where they're actually used to avoid blocking code paths that don't need them.
+
+**Incorrect (blocks both branches):**
+
+```typescript
+async function handleRequest(userId: string, skipProcessing: boolean) {
+  const userData = await fetchUserData(userId)
+  
+  if (skipProcessing) {
+    // Returns immediately but still waited for userData
+    return { skipped: true }
+  }
+  
+  // Only this branch uses userData
+  return processUserData(userData)
+}
+```
+
+**Correct (only blocks when needed):**
+
+```typescript
+async function handleRequest(userId: string, skipProcessing: boolean) {
+  if (skipProcessing) {
+    // Returns immediately without waiting
+    return { skipped: true }
+  }
+  
+  // Fetch only when needed
+  const userData = await fetchUserData(userId)
+  return processUserData(userData)
+}
+```
+
+**Another example (early return optimization):**
+
+```typescript
+// Incorrect: always fetches permissions
+async function updateResource(resourceId: string, userId: string) {
+  const permissions = await fetchPermissions(userId)
+  const resource = await getResource(resourceId)
+  
+  if (!resource) {
+    return { error: 'Not found' }
+  }
+  
+  if (!permissions.canEdit) {
+    return { error: 'Forbidden' }
+  }
+  
+  return await updateResourceData(resource, permissions)
+}
+
+// Correct: fetches only when needed
+async function updateResource(resourceId: string, userId: string) {
+  const resource = await getResource(resourceId)
+  
+  if (!resource) {
+    return { error: 'Not found' }
+  }
+  
+  const permissions = await fetchPermissions(userId)
+  
+  if (!permissions.canEdit) {
+    return { error: 'Forbidden' }
+  }
+  
+  return await updateResourceData(resource, permissions)
+}
+```
+
+This optimization is especially valuable when the skipped branch is frequently taken, or when the deferred operation is expensive.
--- a/.claude/skills/vercel-react-best-practices/rules/async-dependencies.md
+++ b/.claude/skills/vercel-react-best-practices/rules/async-dependencies.md
@@ -0,0 +1,36 @@
+---
+title: Dependency-Based Parallelization
+impact: CRITICAL
+impactDescription: 2-10× improvement
+tags: async, parallelization, dependencies, better-all
+---
+
+## Dependency-Based Parallelization
+
+For operations with partial dependencies, use `better-all` to maximize parallelism. It automatically starts each task at the earliest possible moment.
+
+**Incorrect (profile waits for config unnecessarily):**
+
+```typescript
+const [user, config] = await Promise.all([
+  fetchUser(),
+  fetchConfig()
+])
+const profile = await fetchProfile(user.id)
+```
+
+**Correct (config and profile run in parallel):**
+
+```typescript
+import { all } from 'better-all'
+
+const { user, config, profile } = await all({
+  async user() { return fetchUser() },
+  async config() { return fetchConfig() },
+  async profile() {
+    return fetchProfile((await this.$.user).id)
+  }
+})
+```
+
+Reference: [https://github.com/shuding/better-all](https://github.com/shuding/better-all)
--- a/.claude/skills/vercel-react-best-practices/rules/async-parallel.md
+++ b/.claude/skills/vercel-react-best-practices/rules/async-parallel.md
@@ -0,0 +1,28 @@
+---
+title: Promise.all() for Independent Operations
+impact: CRITICAL
+impactDescription: 2-10× improvement
+tags: async, parallelization, promises, waterfalls
+---
+
+## Promise.all() for Independent Operations
+
+When async operations have no interdependencies, execute them concurrently using `Promise.all()`.
+
+**Incorrect (sequential execution, 3 round trips):**
+
+```typescript
+const user = await fetchUser()
+const posts = await fetchPosts()
+const comments = await fetchComments()
+```
+
+**Correct (parallel execution, 1 round trip):**
+
+```typescript
+const [user, posts, comments] = await Promise.all([
+  fetchUser(),
+  fetchPosts(),
+  fetchComments()
+])
+```
--- a/.claude/skills/vercel-react-best-practices/rules/async-suspense-boundaries.md
+++ b/.claude/skills/vercel-react-best-practices/rules/async-suspense-boundaries.md
@@ -0,0 +1,99 @@
+---
+title: Strategic Suspense Boundaries
+impact: HIGH
+impactDescription: faster initial paint
+tags: async, suspense, streaming, layout-shift
+---
+
+## Strategic Suspense Boundaries
+
+Instead of awaiting data in async components before returning JSX, use Suspense boundaries to show the wrapper UI faster while data loads.
+
+**Incorrect (wrapper blocked by data fetching):**
+
+```tsx
+async function Page() {
+  const data = await fetchData() // Blocks entire page
+  
+  return (
+    <div>
+      <div>Sidebar</div>
+      <div>Header</div>
+      <div>
+        <DataDisplay data={data} />
+      </div>
+      <div>Footer</div>
+    </div>
+  )
+}
+```
+
+The entire layout waits for data even though only the middle section needs it.
+
+**Correct (wrapper shows immediately, data streams in):**
+
+```tsx
+function Page() {
+  return (
+    <div>
+      <div>Sidebar</div>
+      <div>Header</div>
+      <div>
+        <Suspense fallback={<Skeleton />}>
+          <DataDisplay />
+        </Suspense>
+      </div>
+      <div>Footer</div>
+    </div>
+  )
+}
+
+async function DataDisplay() {
+  const data = await fetchData() // Only blocks this component
+  return <div>{data.content}</div>
+}
+```
+
+Sidebar, Header, and Footer render immediately. Only DataDisplay waits for data.
+
+**Alternative (share promise across components):**
+
+```tsx
+function Page() {
+  // Start fetch immediately, but don't await
+  const dataPromise = fetchData()
+  
+  return (
+    <div>
+      <div>Sidebar</div>
+      <div>Header</div>
+      <Suspense fallback={<Skeleton />}>
+        <DataDisplay dataPromise={dataPromise} />
+        <DataSummary dataPromise={dataPromise} />
+      </Suspense>
+      <div>Footer</div>
+    </div>
+  )
+}
+
+function DataDisplay({ dataPromise }: { dataPromise: Promise<Data> }) {
+  const data = use(dataPromise) // Unwraps the promise
+  return <div>{data.content}</div>
+}
+
+function DataSummary({ dataPromise }: { dataPromise: Promise<Data> }) {
+  const data = use(dataPromise) // Reuses the same promise
+  return <div>{data.summary}</div>
+}
+```
+
+Both components share the same promise, so only one fetch occurs. Layout renders immediately while both components wait together.
+
+**When NOT to use this pattern:**
+
+- Critical data needed for layout decisions (affects positioning)
+- SEO-critical content above the fold
+- Small, fast queries where suspense overhead isn't worth it
+- When you want to avoid layout shift (loading → content jump)
+
+**Trade-off:** Faster initial paint vs potential layout shift. Choose based on your UX priorities.
--- a/.claude/skills/vercel-react-best-practices/rules/bundle-barrel-imports.md
+++ b/.claude/skills/vercel-react-best-practices/rules/bundle-barrel-imports.md
@@ -0,0 +1,59 @@
+---
+title: Avoid Barrel File Imports
+impact: CRITICAL
+impactDescription: 200-800ms import cost, slow builds
+tags: bundle, imports, tree-shaking, barrel-files, performance
+---
+
+## Avoid Barrel File Imports
+
+Import directly from source files instead of barrel files to avoid loading thousands of unused modules. **Barrel files** are entry points that re-export multiple modules (e.g., `index.js` that does `export * from './module'`).
+
+Popular icon and component libraries can have **up to 10,000 re-exports** in their entry file. For many React packages, **it takes 200-800ms just to import them**, affecting both development speed and production cold starts.
+
+**Why tree-shaking doesn't help:** When a library is marked as external (not bundled), the bundler can't optimize it. If you bundle it to enable tree-shaking, builds become substantially slower analyzing the entire module graph.
+
+**Incorrect (imports entire library):**
+
+```tsx
+import { Check, X, Menu } from 'lucide-react'
+// Loads 1,583 modules, takes ~2.8s extra in dev
+// Runtime cost: 200-800ms on every cold start
+
+import { Button, TextField } from '@mui/material'
+// Loads 2,225 modules, takes ~4.2s extra in dev
+```
+
+**Correct (imports only what you need):**
+
+```tsx
+import Check from 'lucide-react/dist/esm/icons/check'
+import X from 'lucide-react/dist/esm/icons/x'
+import Menu from 'lucide-react/dist/esm/icons/menu'
+// Loads only 3 modules (~2KB vs ~1MB)
+
+import Button from '@mui/material/Button'
+import TextField from '@mui/material/TextField'
+// Loads only what you use
+```
+
+**Alternative (Next.js 13.5+):**
+
+```js
+// next.config.js - use optimizePackageImports
+module.exports = {
+  experimental: {
+    optimizePackageImports: ['lucide-react', '@mui/material']
+  }
+}
+
+// Then you can keep the ergonomic barrel imports:
+import { Check, X, Menu } from 'lucide-react'
+// Automatically transformed to direct imports at build time
+```
+
+Direct imports provide 15-70% faster dev boot, 28% faster builds, 40% faster cold starts, and significantly faster HMR.
+
+Libraries commonly affected: `lucide-react`, `@mui/material`, `@mui/icons-material`, `@tabler/icons-react`, `react-icons`, `@headlessui/react`, `@radix-ui/react-*`, `lodash`, `ramda`, `date-fns`, `rxjs`, `react-use`.
+
+Reference: [How we optimized package imports in Next.js](https://vercel.com/blog/how-we-optimized-package-imports-in-next-js)
--- a/.claude/skills/vercel-react-best-practices/rules/bundle-conditional.md
+++ b/.claude/skills/vercel-react-best-practices/rules/bundle-conditional.md
@@ -0,0 +1,31 @@
+---
+title: Conditional Module Loading
+impact: HIGH
+impactDescription: loads large data only when needed
+tags: bundle, conditional-loading, lazy-loading
+---
+
+## Conditional Module Loading
+
+Load large data or modules only when a feature is activated.
+
+**Example (lazy-load animation frames):**
+
+```tsx
+function AnimationPlayer({ enabled, setEnabled }: { enabled: boolean; setEnabled: React.Dispatch<React.SetStateAction<boolean>> }) {
+  const [frames, setFrames] = useState<Frame[] | null>(null)
+
+  useEffect(() => {
+    if (enabled && !frames && typeof window !== 'undefined') {
+      import('./animation-frames.js')
+        .then(mod => setFrames(mod.frames))
+        .catch(() => setEnabled(false))
+    }
+  }, [enabled, frames, setEnabled])
+
+  if (!frames) return <Skeleton />
+  return <Canvas frames={frames} />
+}
+```
+
+The `typeof window !== 'undefined'` check prevents bundling this module for SSR, optimizing server bundle size and build speed.
--- a/.claude/skills/vercel-react-best-practices/rules/bundle-defer-third-party.md
+++ b/.claude/skills/vercel-react-best-practices/rules/bundle-defer-third-party.md
@@ -0,0 +1,49 @@
+---
+title: Defer Non-Critical Third-Party Libraries
+impact: MEDIUM
+impactDescription: loads after hydration
+tags: bundle, third-party, analytics, defer
+---
+
+## Defer Non-Critical Third-Party Libraries
+
+Analytics, logging, and error tracking don't block user interaction. Load them after hydration.
+
+**Incorrect (blocks initial bundle):**
+
+```tsx
+import { Analytics } from '@vercel/analytics/react'
+
+export default function RootLayout({ children }) {
+  return (
+    <html>
+      <body>
+        {children}
+        <Analytics />
+      </body>
+    </html>
+  )
+}
+```
+
+**Correct (loads after hydration):**
+
+```tsx
+import dynamic from 'next/dynamic'
+
+const Analytics = dynamic(
+  () => import('@vercel/analytics/react').then(m => m.Analytics),
+  { ssr: false }
+)
+
+export default function RootLayout({ children }) {
+  return (
+    <html>
+      <body>
+        {children}
+        <Analytics />
+      </body>
+    </html>
+  )
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/bundle-dynamic-imports.md
+++ b/.claude/skills/vercel-react-best-practices/rules/bundle-dynamic-imports.md
@@ -0,0 +1,35 @@
+---
+title: Dynamic Imports for Heavy Components
+impact: CRITICAL
+impactDescription: directly affects TTI and LCP
+tags: bundle, dynamic-import, code-splitting, next-dynamic
+---
+
+## Dynamic Imports for Heavy Components
+
+Use `next/dynamic` to lazy-load large components not needed on initial render.
+
+**Incorrect (Monaco bundles with main chunk ~300KB):**
+
+```tsx
+import { MonacoEditor } from './monaco-editor'
+
+function CodePanel({ code }: { code: string }) {
+  return <MonacoEditor value={code} />
+}
+```
+
+**Correct (Monaco loads on demand):**
+
+```tsx
+import dynamic from 'next/dynamic'
+
+const MonacoEditor = dynamic(
+  () => import('./monaco-editor').then(m => m.MonacoEditor),
+  { ssr: false }
+)
+
+function CodePanel({ code }: { code: string }) {
+  return <MonacoEditor value={code} />
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/bundle-preload.md
+++ b/.claude/skills/vercel-react-best-practices/rules/bundle-preload.md
@@ -0,0 +1,50 @@
+---
+title: Preload Based on User Intent
+impact: MEDIUM
+impactDescription: reduces perceived latency
+tags: bundle, preload, user-intent, hover
+---
+
+## Preload Based on User Intent
+
+Preload heavy bundles before they're needed to reduce perceived latency.
+
+**Example (preload on hover/focus):**
+
+```tsx
+function EditorButton({ onClick }: { onClick: () => void }) {
+  const preload = () => {
+    if (typeof window !== 'undefined') {
+      void import('./monaco-editor')
+    }
+  }
+
+  return (
+    <button
+      onMouseEnter={preload}
+      onFocus={preload}
+      onClick={onClick}
+    >
+      Open Editor
+    </button>
+  )
+}
+```
+
+**Example (preload when feature flag is enabled):**
+
+```tsx
+function FlagsProvider({ children, flags }: Props) {
+  useEffect(() => {
+    if (flags.editorEnabled && typeof window !== 'undefined') {
+      void import('./monaco-editor').then(mod => mod.init())
+    }
+  }, [flags.editorEnabled])
+
+  return <FlagsContext.Provider value={flags}>
+    {children}
+  </FlagsContext.Provider>
+}
+```
+
+The `typeof window !== 'undefined'` check prevents bundling preloaded modules for SSR, optimizing server bundle size and build speed.
--- a/.claude/skills/vercel-react-best-practices/rules/client-event-listeners.md
+++ b/.claude/skills/vercel-react-best-practices/rules/client-event-listeners.md
@@ -0,0 +1,74 @@
+---
+title: Deduplicate Global Event Listeners
+impact: LOW
+impactDescription: single listener for N components
+tags: client, swr, event-listeners, subscription
+---
+
+## Deduplicate Global Event Listeners
+
+Use `useSWRSubscription()` to share global event listeners across component instances.
+
+**Incorrect (N instances = N listeners):**
+
+```tsx
+function useKeyboardShortcut(key: string, callback: () => void) {
+  useEffect(() => {
+    const handler = (e: KeyboardEvent) => {
+      if (e.metaKey && e.key === key) {
+        callback()
+      }
+    }
+    window.addEventListener('keydown', handler)
+    return () => window.removeEventListener('keydown', handler)
+  }, [key, callback])
+}
+```
+
+When using the `useKeyboardShortcut` hook multiple times, each instance will register a new listener.
+
+**Correct (N instances = 1 listener):**
+
+```tsx
+import useSWRSubscription from 'swr/subscription'
+
+// Module-level Map to track callbacks per key
+const keyCallbacks = new Map<string, Set<() => void>>()
+
+function useKeyboardShortcut(key: string, callback: () => void) {
+  // Register this callback in the Map
+  useEffect(() => {
+    if (!keyCallbacks.has(key)) {
+      keyCallbacks.set(key, new Set())
+    }
+    keyCallbacks.get(key)!.add(callback)
+
+    return () => {
+      const set = keyCallbacks.get(key)
+      if (set) {
+        set.delete(callback)
+        if (set.size === 0) {
+          keyCallbacks.delete(key)
+        }
+      }
+    }
+  }, [key, callback])
+
+  useSWRSubscription('global-keydown', () => {
+    const handler = (e: KeyboardEvent) => {
+      if (e.metaKey && keyCallbacks.has(e.key)) {
+        keyCallbacks.get(e.key)!.forEach(cb => cb())
+      }
+    }
+    window.addEventListener('keydown', handler)
+    return () => window.removeEventListener('keydown', handler)
+  })
+}
+
+function Profile() {
+  // Multiple shortcuts will share the same listener
+  useKeyboardShortcut('p', () => { /* ... */ }) 
+  useKeyboardShortcut('k', () => { /* ... */ })
+  // ...
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/client-localstorage-schema.md
+++ b/.claude/skills/vercel-react-best-practices/rules/client-localstorage-schema.md
@@ -0,0 +1,71 @@
+---
+title: Version and Minimize localStorage Data
+impact: MEDIUM
+impactDescription: prevents schema conflicts, reduces storage size
+tags: client, localStorage, storage, versioning, data-minimization
+---
+
+## Version and Minimize localStorage Data
+
+Add version prefix to keys and store only needed fields. Prevents schema conflicts and accidental storage of sensitive data.
+
+**Incorrect:**
+
+```typescript
+// No version, stores everything, no error handling
+localStorage.setItem('userConfig', JSON.stringify(fullUserObject))
+const data = localStorage.getItem('userConfig')
+```
+
+**Correct:**
+
+```typescript
+const VERSION = 'v2'
+
+function saveConfig(config: { theme: string; language: string }) {
+  try {
+    localStorage.setItem(`userConfig:${VERSION}`, JSON.stringify(config))
+  } catch {
+    // Throws in incognito/private browsing, quota exceeded, or disabled
+  }
+}
+
+function loadConfig() {
+  try {
+    const data = localStorage.getItem(`userConfig:${VERSION}`)
+    return data ? JSON.parse(data) : null
+  } catch {
+    return null
+  }
+}
+
+// Migration from v1 to v2
+function migrate() {
+  try {
+    const v1 = localStorage.getItem('userConfig:v1')
+    if (v1) {
+      const old = JSON.parse(v1)
+      saveConfig({ theme: old.darkMode ? 'dark' : 'light', language: old.lang })
+      localStorage.removeItem('userConfig:v1')
+    }
+  } catch {}
+}
+```
+
+**Store minimal fields from server responses:**
+
+```typescript
+// User object has 20+ fields, only store what UI needs
+function cachePrefs(user: FullUser) {
+  try {
+    localStorage.setItem('prefs:v1', JSON.stringify({
+      theme: user.preferences.theme,
+      notifications: user.preferences.notifications
+    }))
+  } catch {}
+}
+```
+
+**Always wrap in try-catch:** `getItem()` and `setItem()` throw in incognito/private browsing (Safari, Firefox), when quota exceeded, or when disabled.
+
+**Benefits:** Schema evolution via versioning, reduced storage size, prevents storing tokens/PII/internal flags.
--- a/.claude/skills/vercel-react-best-practices/rules/client-passive-event-listeners.md
+++ b/.claude/skills/vercel-react-best-practices/rules/client-passive-event-listeners.md
@@ -0,0 +1,48 @@
+---
+title: Use Passive Event Listeners for Scrolling Performance
+impact: MEDIUM
+impactDescription: eliminates scroll delay caused by event listeners
+tags: client, event-listeners, scrolling, performance, touch, wheel
+---
+
+## Use Passive Event Listeners for Scrolling Performance
+
+Add `{ passive: true }` to touch and wheel event listeners to enable immediate scrolling. Browsers normally wait for listeners to finish to check if `preventDefault()` is called, causing scroll delay.
+
+**Incorrect:**
+
+```typescript
+useEffect(() => {
+  const handleTouch = (e: TouchEvent) => console.log(e.touches[0].clientX)
+  const handleWheel = (e: WheelEvent) => console.log(e.deltaY)
+  
+  document.addEventListener('touchstart', handleTouch)
+  document.addEventListener('wheel', handleWheel)
+  
+  return () => {
+    document.removeEventListener('touchstart', handleTouch)
+    document.removeEventListener('wheel', handleWheel)
+  }
+}, [])
+```
+
+**Correct:**
+
+```typescript
+useEffect(() => {
+  const handleTouch = (e: TouchEvent) => console.log(e.touches[0].clientX)
+  const handleWheel = (e: WheelEvent) => console.log(e.deltaY)
+  
+  document.addEventListener('touchstart', handleTouch, { passive: true })
+  document.addEventListener('wheel', handleWheel, { passive: true })
+  
+  return () => {
+    document.removeEventListener('touchstart', handleTouch)
+    document.removeEventListener('wheel', handleWheel)
+  }
+}, [])
+```
+
+**Use passive when:** tracking/analytics, logging, any listener that doesn't call `preventDefault()`.
+
+**Don't use passive when:** implementing custom swipe gestures, custom zoom controls, or any listener that needs `preventDefault()`.
--- a/.claude/skills/vercel-react-best-practices/rules/client-swr-dedup.md
+++ b/.claude/skills/vercel-react-best-practices/rules/client-swr-dedup.md
@@ -0,0 +1,56 @@
+---
+title: Use SWR for Automatic Deduplication
+impact: MEDIUM-HIGH
+impactDescription: automatic deduplication
+tags: client, swr, deduplication, data-fetching
+---
+
+## Use SWR for Automatic Deduplication
+
+SWR enables request deduplication, caching, and revalidation across component instances.
+
+**Incorrect (no deduplication, each instance fetches):**
+
+```tsx
+function UserList() {
+  const [users, setUsers] = useState([])
+  useEffect(() => {
+    fetch('/api/users')
+      .then(r => r.json())
+      .then(setUsers)
+  }, [])
+}
+```
+
+**Correct (multiple instances share one request):**
+
+```tsx
+import useSWR from 'swr'
+
+function UserList() {
+  const { data: users } = useSWR('/api/users', fetcher)
+}
+```
+
+**For immutable data:**
+
+```tsx
+import { useImmutableSWR } from '@/lib/swr'
+
+function StaticContent() {
+  const { data } = useImmutableSWR('/api/config', fetcher)
+}
+```
+
+**For mutations:**
+
+```tsx
+import { useSWRMutation } from 'swr/mutation'
+
+function UpdateButton() {
+  const { trigger } = useSWRMutation('/api/user', updateUser)
+  return <button onClick={() => trigger()}>Update</button>
+}
+```
+
+Reference: [https://swr.vercel.app](https://swr.vercel.app)
--- a/.claude/skills/vercel-react-best-practices/rules/js-batch-dom-css.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-batch-dom-css.md
@@ -0,0 +1,57 @@
+---
+title: Batch DOM CSS Changes
+impact: MEDIUM
+impactDescription: reduces reflows/repaints
+tags: javascript, dom, css, performance, reflow
+---
+
+## Batch DOM CSS Changes
+
+Avoid interleaving style writes with layout reads. When you read a layout property (like `offsetWidth`, `getBoundingClientRect()`, or `getComputedStyle()`) between style changes, the browser is forced to trigger a synchronous reflow.
+
+**Incorrect (interleaved reads and writes force reflows):**
+
+```typescript
+function updateElementStyles(element: HTMLElement) {
+  element.style.width = '100px'
+  const width = element.offsetWidth  // Forces reflow
+  element.style.height = '200px'
+  const height = element.offsetHeight  // Forces another reflow
+}
+```
+
+**Correct (batch writes, then read once):**
+
+```typescript
+function updateElementStyles(element: HTMLElement) {
+  // Batch all writes together
+  element.style.width = '100px'
+  element.style.height = '200px'
+  element.style.backgroundColor = 'blue'
+  element.style.border = '1px solid black'
+  
+  // Read after all writes are done (single reflow)
+  const { width, height } = element.getBoundingClientRect()
+}
+```
+
+**Better: use CSS classes**
+
+```css
+.highlighted-box {
+  width: 100px;
+  height: 200px;
+  background-color: blue;
+  border: 1px solid black;
+}
+```
+
+```typescript
+function updateElementStyles(element: HTMLElement) {
+  element.classList.add('highlighted-box')
+
+  const { width, height } = element.getBoundingClientRect()
+}
+```
+
+Prefer CSS classes over inline styles when possible. CSS files are cached by the browser, and classes provide better separation of concerns and are easier to maintain.
--- a/.claude/skills/vercel-react-best-practices/rules/js-cache-function-results.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-cache-function-results.md
@@ -0,0 +1,80 @@
+---
+title: Cache Repeated Function Calls
+impact: MEDIUM
+impactDescription: avoid redundant computation
+tags: javascript, cache, memoization, performance
+---
+
+## Cache Repeated Function Calls
+
+Use a module-level Map to cache function results when the same function is called repeatedly with the same inputs during render.
+
+**Incorrect (redundant computation):**
+
+```typescript
+function ProjectList({ projects }: { projects: Project[] }) {
+  return (
+    <div>
+      {projects.map(project => {
+        // slugify() called 100+ times for same project names
+        const slug = slugify(project.name)
+        
+        return <ProjectCard key={project.id} slug={slug} />
+      })}
+    </div>
+  )
+}
+```
+
+**Correct (cached results):**
+
+```typescript
+// Module-level cache
+const slugifyCache = new Map<string, string>()
+
+function cachedSlugify(text: string): string {
+  if (slugifyCache.has(text)) {
+    return slugifyCache.get(text)!
+  }
+  const result = slugify(text)
+  slugifyCache.set(text, result)
+  return result
+}
+
+function ProjectList({ projects }: { projects: Project[] }) {
+  return (
+    <div>
+      {projects.map(project => {
+        // Computed only once per unique project name
+        const slug = cachedSlugify(project.name)
+        
+        return <ProjectCard key={project.id} slug={slug} />
+      })}
+    </div>
+  )
+}
+```
+
+**Simpler pattern for single-value functions:**
+
+```typescript
+let isLoggedInCache: boolean | null = null
+
+function isLoggedIn(): boolean {
+  if (isLoggedInCache !== null) {
+    return isLoggedInCache
+  }
+  
+  isLoggedInCache = document.cookie.includes('auth=')
+  return isLoggedInCache
+}
+
+// Clear cache when auth changes
+function onAuthChange() {
+  isLoggedInCache = null
+}
+```
+
+Use a Map (not a hook) so it works everywhere: utilities, event handlers, not just React components.
+
+Reference: [How we made the Vercel Dashboard twice as fast](https://vercel.com/blog/how-we-made-the-vercel-dashboard-twice-as-fast)
--- a/.claude/skills/vercel-react-best-practices/rules/js-cache-property-access.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-cache-property-access.md
@@ -0,0 +1,28 @@
+---
+title: Cache Property Access in Loops
+impact: LOW-MEDIUM
+impactDescription: reduces lookups
+tags: javascript, loops, optimization, caching
+---
+
+## Cache Property Access in Loops
+
+Cache object property lookups in hot paths.
+
+**Incorrect (3 lookups × N iterations):**
+
+```typescript
+for (let i = 0; i < arr.length; i++) {
+  process(obj.config.settings.value)
+}
+```
+
+**Correct (1 lookup total):**
+
+```typescript
+const value = obj.config.settings.value
+const len = arr.length
+for (let i = 0; i < len; i++) {
+  process(value)
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/js-cache-storage.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-cache-storage.md
@@ -0,0 +1,70 @@
+---
+title: Cache Storage API Calls
+impact: LOW-MEDIUM
+impactDescription: reduces expensive I/O
+tags: javascript, localStorage, storage, caching, performance
+---
+
+## Cache Storage API Calls
+
+`localStorage`, `sessionStorage`, and `document.cookie` are synchronous and expensive. Cache reads in memory.
+
+**Incorrect (reads storage on every call):**
+
+```typescript
+function getTheme() {
+  return localStorage.getItem('theme') ?? 'light'
+}
+// Called 10 times = 10 storage reads
+```
+
+**Correct (Map cache):**
+
+```typescript
+const storageCache = new Map<string, string | null>()
+
+function getLocalStorage(key: string) {
+  if (!storageCache.has(key)) {
+    storageCache.set(key, localStorage.getItem(key))
+  }
+  return storageCache.get(key)
+}
+
+function setLocalStorage(key: string, value: string) {
+  localStorage.setItem(key, value)
+  storageCache.set(key, value)  // keep cache in sync
+}
+```
+
+Use a Map (not a hook) so it works everywhere: utilities, event handlers, not just React components.
+
+**Cookie caching:**
+
+```typescript
+let cookieCache: Record<string, string> | null = null
+
+function getCookie(name: string) {
+  if (!cookieCache) {
+    cookieCache = Object.fromEntries(
+      document.cookie.split('; ').map(c => c.split('='))
+    )
+  }
+  return cookieCache[name]
+}
+```
+
+**Important (invalidate on external changes):**
+
+If storage can change externally (another tab, server-set cookies), invalidate cache:
+
+```typescript
+window.addEventListener('storage', (e) => {
+  if (e.key) storageCache.delete(e.key)
+})
+
+document.addEventListener('visibilitychange', () => {
+  if (document.visibilityState === 'visible') {
+    storageCache.clear()
+  }
+})
+```
--- a/.claude/skills/vercel-react-best-practices/rules/js-combine-iterations.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-combine-iterations.md
@@ -0,0 +1,32 @@
+---
+title: Combine Multiple Array Iterations
+impact: LOW-MEDIUM
+impactDescription: reduces iterations
+tags: javascript, arrays, loops, performance
+---
+
+## Combine Multiple Array Iterations
+
+Multiple `.filter()` or `.map()` calls iterate the array multiple times. Combine into one loop.
+
+**Incorrect (3 iterations):**
+
+```typescript
+const admins = users.filter(u => u.isAdmin)
+const testers = users.filter(u => u.isTester)
+const inactive = users.filter(u => !u.isActive)
+```
+
+**Correct (1 iteration):**
+
+```typescript
+const admins: User[] = []
+const testers: User[] = []
+const inactive: User[] = []
+
+for (const user of users) {
+  if (user.isAdmin) admins.push(user)
+  if (user.isTester) testers.push(user)
+  if (!user.isActive) inactive.push(user)
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/js-early-exit.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-early-exit.md
@@ -0,0 +1,50 @@
+---
+title: Early Return from Functions
+impact: LOW-MEDIUM
+impactDescription: avoids unnecessary computation
+tags: javascript, functions, optimization, early-return
+---
+
+## Early Return from Functions
+
+Return early when result is determined to skip unnecessary processing.
+
+**Incorrect (processes all items even after finding answer):**
+
+```typescript
+function validateUsers(users: User[]) {
+  let hasError = false
+  let errorMessage = ''
+  
+  for (const user of users) {
+    if (!user.email) {
+      hasError = true
+      errorMessage = 'Email required'
+    }
+    if (!user.name) {
+      hasError = true
+      errorMessage = 'Name required'
+    }
+    // Continues checking all users even after error found
+  }
+  
+  return hasError ? { valid: false, error: errorMessage } : { valid: true }
+}
+```
+
+**Correct (returns immediately on first error):**
+
+```typescript
+function validateUsers(users: User[]) {
+  for (const user of users) {
+    if (!user.email) {
+      return { valid: false, error: 'Email required' }
+    }
+    if (!user.name) {
+      return { valid: false, error: 'Name required' }
+    }
+  }
+
+  return { valid: true }
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/js-hoist-regexp.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-hoist-regexp.md
@@ -0,0 +1,45 @@
+---
+title: Hoist RegExp Creation
+impact: LOW-MEDIUM
+impactDescription: avoids recreation
+tags: javascript, regexp, optimization, memoization
+---
+
+## Hoist RegExp Creation
+
+Don't create RegExp inside render. Hoist to module scope or memoize with `useMemo()`.
+
+**Incorrect (new RegExp every render):**
+
+```tsx
+function Highlighter({ text, query }: Props) {
+  const regex = new RegExp(`(${query})`, 'gi')
+  const parts = text.split(regex)
+  return <>{parts.map((part, i) => ...)}</>
+}
+```
+
+**Correct (memoize or hoist):**
+
+```tsx
+const EMAIL_REGEX = /^[^\s@]+@[^\s@]+\.[^\s@]+$/
+
+function Highlighter({ text, query }: Props) {
+  const regex = useMemo(
+    () => new RegExp(`(${escapeRegex(query)})`, 'gi'),
+    [query]
+  )
+  const parts = text.split(regex)
+  return <>{parts.map((part, i) => ...)}</>
+}
+```
+
+**Warning (global regex has mutable state):**
+
+Global regex (`/g`) has mutable `lastIndex` state:
+
+```typescript
+const regex = /foo/g
+regex.test('foo')  // true, lastIndex = 3
+regex.test('foo')  // false, lastIndex = 0
+```
--- a/.claude/skills/vercel-react-best-practices/rules/js-index-maps.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-index-maps.md
@@ -0,0 +1,37 @@
+---
+title: Build Index Maps for Repeated Lookups
+impact: LOW-MEDIUM
+impactDescription: 1M ops to 2K ops
+tags: javascript, map, indexing, optimization, performance
+---
+
+## Build Index Maps for Repeated Lookups
+
+Multiple `.find()` calls by the same key should use a Map.
+
+**Incorrect (O(n) per lookup):**
+
+```typescript
+function processOrders(orders: Order[], users: User[]) {
+  return orders.map(order => ({
+    ...order,
+    user: users.find(u => u.id === order.userId)
+  }))
+}
+```
+
+**Correct (O(1) per lookup):**
+
+```typescript
+function processOrders(orders: Order[], users: User[]) {
+  const userById = new Map(users.map(u => [u.id, u]))
+
+  return orders.map(order => ({
+    ...order,
+    user: userById.get(order.userId)
+  }))
+}
+```
+
+Build map once (O(n)), then all lookups are O(1).
+For 1000 orders × 1000 users: 1M ops → 2K ops.
--- a/.claude/skills/vercel-react-best-practices/rules/js-length-check-first.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-length-check-first.md
@@ -0,0 +1,49 @@
+---
+title: Early Length Check for Array Comparisons
+impact: MEDIUM-HIGH
+impactDescription: avoids expensive operations when lengths differ
+tags: javascript, arrays, performance, optimization, comparison
+---
+
+## Early Length Check for Array Comparisons
+
+When comparing arrays with expensive operations (sorting, deep equality, serialization), check lengths first. If lengths differ, the arrays cannot be equal.
+
+In real-world applications, this optimization is especially valuable when the comparison runs in hot paths (event handlers, render loops).
+
+**Incorrect (always runs expensive comparison):**
+
+```typescript
+function hasChanges(current: string[], original: string[]) {
+  // Always sorts and joins, even when lengths differ
+  return current.sort().join() !== original.sort().join()
+}
+```
+
+Two O(n log n) sorts run even when `current.length` is 5 and `original.length` is 100. There is also overhead of joining the arrays and comparing the strings.
+
+**Correct (O(1) length check first):**
+
+```typescript
+function hasChanges(current: string[], original: string[]) {
+  // Early return if lengths differ
+  if (current.length !== original.length) {
+    return true
+  }
+  // Only sort when lengths match
+  const currentSorted = current.toSorted()
+  const originalSorted = original.toSorted()
+  for (let i = 0; i < currentSorted.length; i++) {
+    if (currentSorted[i] !== originalSorted[i]) {
+      return true
+    }
+  }
+  return false
+}
+```
+
+This new approach is more efficient because:
+- It avoids the overhead of sorting and joining the arrays when lengths differ
+- It avoids consuming memory for the joined strings (especially important for large arrays)
+- It avoids mutating the original arrays
+- It returns early when a difference is found
--- a/.claude/skills/vercel-react-best-practices/rules/js-min-max-loop.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-min-max-loop.md
@@ -0,0 +1,82 @@
+---
+title: Use Loop for Min/Max Instead of Sort
+impact: LOW
+impactDescription: O(n) instead of O(n log n)
+tags: javascript, arrays, performance, sorting, algorithms
+---
+
+## Use Loop for Min/Max Instead of Sort
+
+Finding the smallest or largest element only requires a single pass through the array. Sorting is wasteful and slower.
+
+**Incorrect (O(n log n) - sort to find latest):**
+
+```typescript
+interface Project {
+  id: string
+  name: string
+  updatedAt: number
+}
+
+function getLatestProject(projects: Project[]) {
+  const sorted = [...projects].sort((a, b) => b.updatedAt - a.updatedAt)
+  return sorted[0]
+}
+```
+
+Sorts the entire array just to find the maximum value.
+
+**Incorrect (O(n log n) - sort for oldest and newest):**
+
+```typescript
+function getOldestAndNewest(projects: Project[]) {
+  const sorted = [...projects].sort((a, b) => a.updatedAt - b.updatedAt)
+  return { oldest: sorted[0], newest: sorted[sorted.length - 1] }
+}
+```
+
+Still sorts unnecessarily when only min/max are needed.
+
+**Correct (O(n) - single loop):**
+
+```typescript
+function getLatestProject(projects: Project[]) {
+  if (projects.length === 0) return null
+  
+  let latest = projects[0]
+  
+  for (let i = 1; i < projects.length; i++) {
+    if (projects[i].updatedAt > latest.updatedAt) {
+      latest = projects[i]
+    }
+  }
+  
+  return latest
+}
+
+function getOldestAndNewest(projects: Project[]) {
+  if (projects.length === 0) return { oldest: null, newest: null }
+  
+  let oldest = projects[0]
+  let newest = projects[0]
+  
+  for (let i = 1; i < projects.length; i++) {
+    if (projects[i].updatedAt < oldest.updatedAt) oldest = projects[i]
+    if (projects[i].updatedAt > newest.updatedAt) newest = projects[i]
+  }
+  
+  return { oldest, newest }
+}
+```
+
+Single pass through the array, no copying, no sorting.
+
+**Alternative (Math.min/Math.max for small arrays):**
+
+```typescript
+const numbers = [5, 2, 8, 1, 9]
+const min = Math.min(...numbers)
+const max = Math.max(...numbers)
+```
+
+This works for small arrays, but can be slower or just throw an error for very large arrays due to spread operator limitations. Maximal array length is approximately 124000 in Chrome 143 and 638000 in Safari 18; exact numbers may vary - see [the fiddle](https://jsfiddle.net/qw1jabsx/4/). Use the loop approach for reliability.
--- a/.claude/skills/vercel-react-best-practices/rules/js-set-map-lookups.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-set-map-lookups.md
@@ -0,0 +1,24 @@
+---
+title: Use Set/Map for O(1) Lookups
+impact: LOW-MEDIUM
+impactDescription: O(n) to O(1)
+tags: javascript, set, map, data-structures, performance
+---
+
+## Use Set/Map for O(1) Lookups
+
+Convert arrays to Set/Map for repeated membership checks.
+
+**Incorrect (O(n) per check):**
+
+```typescript
+const allowedIds = ['a', 'b', 'c', ...]
+items.filter(item => allowedIds.includes(item.id))
+```
+
+**Correct (O(1) per check):**
+
+```typescript
+const allowedIds = new Set(['a', 'b', 'c', ...])
+items.filter(item => allowedIds.has(item.id))
+```
--- a/.claude/skills/vercel-react-best-practices/rules/js-tosorted-immutable.md
+++ b/.claude/skills/vercel-react-best-practices/rules/js-tosorted-immutable.md
@@ -0,0 +1,57 @@
+---
+title: Use toSorted() Instead of sort() for Immutability
+impact: MEDIUM-HIGH
+impactDescription: prevents mutation bugs in React state
+tags: javascript, arrays, immutability, react, state, mutation
+---
+
+## Use toSorted() Instead of sort() for Immutability
+
+`.sort()` mutates the array in place, which can cause bugs with React state and props. Use `.toSorted()` to create a new sorted array without mutation.
+
+**Incorrect (mutates original array):**
+
+```typescript
+function UserList({ users }: { users: User[] }) {
+  // Mutates the users prop array!
+  const sorted = useMemo(
+    () => users.sort((a, b) => a.name.localeCompare(b.name)),
+    [users]
+  )
+  return <div>{sorted.map(renderUser)}</div>
+}
+```
+
+**Correct (creates new array):**
+
+```typescript
+function UserList({ users }: { users: User[] }) {
+  // Creates new sorted array, original unchanged
+  const sorted = useMemo(
+    () => users.toSorted((a, b) => a.name.localeCompare(b.name)),
+    [users]
+  )
+  return <div>{sorted.map(renderUser)}</div>
+}
+```
+
+**Why this matters in React:**
+
+1. Props/state mutations break React's immutability model - React expects props and state to be treated as read-only
+2. Causes stale closure bugs - Mutating arrays inside closures (callbacks, effects) can lead to unexpected behavior
+
+**Browser support (fallback for older browsers):**
+
+`.toSorted()` is available in all modern browsers (Chrome 110+, Safari 16+, Firefox 115+, Node.js 20+). For older environments, use spread operator:
+
+```typescript
+// Fallback for older browsers
+const sorted = [...items].sort((a, b) => a.value - b.value)
+```
+
+**Other immutable array methods:**
+
+- `.toSorted()` - immutable sort
+- `.toReversed()` - immutable reverse
+- `.toSpliced()` - immutable splice
+- `.with()` - immutable element replacement
--- a/.claude/skills/vercel-react-best-practices/rules/rendering-activity.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rendering-activity.md
@@ -0,0 +1,26 @@
+---
+title: Use Activity Component for Show/Hide
+impact: MEDIUM
+impactDescription: preserves state/DOM
+tags: rendering, activity, visibility, state-preservation
+---
+
+## Use Activity Component for Show/Hide
+
+Use React's `<Activity>` to preserve state/DOM for expensive components that frequently toggle visibility.
+
+**Usage:**
+
+```tsx
+import { Activity } from 'react'
+
+function Dropdown({ isOpen }: Props) {
+  return (
+    <Activity mode={isOpen ? 'visible' : 'hidden'}>
+      <ExpensiveMenu />
+    </Activity>
+  )
+}
+```
+
+Avoids expensive re-renders and state loss.
--- a/.claude/skills/vercel-react-best-practices/rules/rendering-animate-svg-wrapper.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rendering-animate-svg-wrapper.md
@@ -0,0 +1,47 @@
+---
+title: Animate SVG Wrapper Instead of SVG Element
+impact: LOW
+impactDescription: enables hardware acceleration
+tags: rendering, svg, css, animation, performance
+---
+
+## Animate SVG Wrapper Instead of SVG Element
+
+Many browsers don't have hardware acceleration for CSS3 animations on SVG elements. Wrap SVG in a `<div>` and animate the wrapper instead.
+
+**Incorrect (animating SVG directly - no hardware acceleration):**
+
+```tsx
+function LoadingSpinner() {
+  return (
+    <svg 
+      className="animate-spin"
+      width="24" 
+      height="24" 
+      viewBox="0 0 24 24"
+    >
+      <circle cx="12" cy="12" r="10" stroke="currentColor" />
+    </svg>
+  )
+}
+```
+
+**Correct (animating wrapper div - hardware accelerated):**
+
+```tsx
+function LoadingSpinner() {
+  return (
+    <div className="animate-spin">
+      <svg 
+        width="24" 
+        height="24" 
+        viewBox="0 0 24 24"
+      >
+        <circle cx="12" cy="12" r="10" stroke="currentColor" />
+      </svg>
+    </div>
+  )
+}
+```
+
+This applies to all CSS transforms and transitions (`transform`, `opacity`, `translate`, `scale`, `rotate`). The wrapper div allows browsers to use GPU acceleration for smoother animations.
--- a/.claude/skills/vercel-react-best-practices/rules/rendering-conditional-render.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rendering-conditional-render.md
@@ -0,0 +1,40 @@
+---
+title: Use Explicit Conditional Rendering
+impact: LOW
+impactDescription: prevents rendering 0 or NaN
+tags: rendering, conditional, jsx, falsy-values
+---
+
+## Use Explicit Conditional Rendering
+
+Use explicit ternary operators (`? :`) instead of `&&` for conditional rendering when the condition can be `0`, `NaN`, or other falsy values that render.
+
+**Incorrect (renders "0" when count is 0):**
+
+```tsx
+function Badge({ count }: { count: number }) {
+  return (
+    <div>
+      {count && <span className="badge">{count}</span>}
+    </div>
+  )
+}
+
+// When count = 0, renders: <div>0</div>
+// When count = 5, renders: <div><span class="badge">5</span></div>
+```
+
+**Correct (renders nothing when count is 0):**
+
+```tsx
+function Badge({ count }: { count: number }) {
+  return (
+    <div>
+      {count > 0 ? <span className="badge">{count}</span> : null}
+    </div>
+  )
+}
+
+// When count = 0, renders: <div></div>
+// When count = 5, renders: <div><span class="badge">5</span></div>
+```
--- a/.claude/skills/vercel-react-best-practices/rules/rendering-content-visibility.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rendering-content-visibility.md
@@ -0,0 +1,38 @@
+---
+title: CSS content-visibility for Long Lists
+impact: HIGH
+impactDescription: faster initial render
+tags: rendering, css, content-visibility, long-lists
+---
+
+## CSS content-visibility for Long Lists
+
+Apply `content-visibility: auto` to defer off-screen rendering.
+
+**CSS:**
+
+```css
+.message-item {
+  content-visibility: auto;
+  contain-intrinsic-size: 0 80px;
+}
+```
+
+**Example:**
+
+```tsx
+function MessageList({ messages }: { messages: Message[] }) {
+  return (
+    <div className="overflow-y-auto h-screen">
+      {messages.map(msg => (
+        <div key={msg.id} className="message-item">
+          <Avatar user={msg.author} />
+          <div>{msg.content}</div>
+        </div>
+      ))}
+    </div>
+  )
+}
+```
+
+For 1000 messages, browser skips layout/paint for ~990 off-screen items (10× faster initial render).
--- a/.claude/skills/vercel-react-best-practices/rules/rendering-hoist-jsx.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rendering-hoist-jsx.md
@@ -0,0 +1,46 @@
+---
+title: Hoist Static JSX Elements
+impact: LOW
+impactDescription: avoids re-creation
+tags: rendering, jsx, static, optimization
+---
+
+## Hoist Static JSX Elements
+
+Extract static JSX outside components to avoid re-creation.
+
+**Incorrect (recreates element every render):**
+
+```tsx
+function LoadingSkeleton() {
+  return <div className="animate-pulse h-20 bg-gray-200" />
+}
+
+function Container() {
+  return (
+    <div>
+      {loading && <LoadingSkeleton />}
+    </div>
+  )
+}
+```
+
+**Correct (reuses same element):**
+
+```tsx
+const loadingSkeleton = (
+  <div className="animate-pulse h-20 bg-gray-200" />
+)
+
+function Container() {
+  return (
+    <div>
+      {loading && loadingSkeleton}
+    </div>
+  )
+}
+```
+
+This is especially helpful for large and static SVG nodes, which can be expensive to recreate on every render.
+
+**Note:** If your project has [React Compiler](https://react.dev/learn/react-compiler) enabled, the compiler automatically hoists static JSX elements and optimizes component re-renders, making manual hoisting unnecessary.
--- a/.claude/skills/vercel-react-best-practices/rules/rendering-hydration-no-flicker.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rendering-hydration-no-flicker.md
@@ -0,0 +1,82 @@
+---
+title: Prevent Hydration Mismatch Without Flickering
+impact: MEDIUM
+impactDescription: avoids visual flicker and hydration errors
+tags: rendering, ssr, hydration, localStorage, flicker
+---
+
+## Prevent Hydration Mismatch Without Flickering
+
+When rendering content that depends on client-side storage (localStorage, cookies), avoid both SSR breakage and post-hydration flickering by injecting a synchronous script that updates the DOM before React hydrates.
+
+**Incorrect (breaks SSR):**
+
+```tsx
+function ThemeWrapper({ children }: { children: ReactNode }) {
+  // localStorage is not available on server - throws error
+  const theme = localStorage.getItem('theme') || 'light'
+  
+  return (
+    <div className={theme}>
+      {children}
+    </div>
+  )
+}
+```
+
+Server-side rendering will fail because `localStorage` is undefined.
+
+**Incorrect (visual flickering):**
+
+```tsx
+function ThemeWrapper({ children }: { children: ReactNode }) {
+  const [theme, setTheme] = useState('light')
+  
+  useEffect(() => {
+    // Runs after hydration - causes visible flash
+    const stored = localStorage.getItem('theme')
+    if (stored) {
+      setTheme(stored)
+    }
+  }, [])
+  
+  return (
+    <div className={theme}>
+      {children}
+    </div>
+  )
+}
+```
+
+Component first renders with default value (`light`), then updates after hydration, causing a visible flash of incorrect content.
+
+**Correct (no flicker, no hydration mismatch):**
+
+```tsx
+function ThemeWrapper({ children }: { children: ReactNode }) {
+  return (
+    <>
+      <div id="theme-wrapper">
+        {children}
+      </div>
+      <script
+        dangerouslySetInnerHTML={{
+          __html: `
+            (function() {
+              try {
+                var theme = localStorage.getItem('theme') || 'light';
+                var el = document.getElementById('theme-wrapper');
+                if (el) el.className = theme;
+              } catch (e) {}
+            })();
+          `,
+        }}
+      />
+    </>
+  )
+}
+```
+
+The inline script executes synchronously before showing the element, ensuring the DOM already has the correct value. No flickering, no hydration mismatch.
+
+This pattern is especially useful for theme toggles, user preferences, authentication states, and any client-only data that should render immediately without flashing default values.
--- a/.claude/skills/vercel-react-best-practices/rules/rendering-svg-precision.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rendering-svg-precision.md
@@ -0,0 +1,28 @@
+---
+title: Optimize SVG Precision
+impact: LOW
+impactDescription: reduces file size
+tags: rendering, svg, optimization, svgo
+---
+
+## Optimize SVG Precision
+
+Reduce SVG coordinate precision to decrease file size. The optimal precision depends on the viewBox size, but in general reducing precision should be considered.
+
+**Incorrect (excessive precision):**
+
+```svg
+<path d="M 10.293847 20.847362 L 30.938472 40.192837" />
+```
+
+**Correct (1 decimal place):**
+
+```svg
+<path d="M 10.3 20.8 L 30.9 40.2" />
+```
+
+**Automate with SVGO:**
+
+```bash
+npx svgo --precision=1 --multipass icon.svg
+```
--- a/.claude/skills/vercel-react-best-practices/rules/rerender-defer-reads.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rerender-defer-reads.md
@@ -0,0 +1,39 @@
+---
+title: Defer State Reads to Usage Point
+impact: MEDIUM
+impactDescription: avoids unnecessary subscriptions
+tags: rerender, searchParams, localStorage, optimization
+---
+
+## Defer State Reads to Usage Point
+
+Don't subscribe to dynamic state (searchParams, localStorage) if you only read it inside callbacks.
+
+**Incorrect (subscribes to all searchParams changes):**
+
+```tsx
+function ShareButton({ chatId }: { chatId: string }) {
+  const searchParams = useSearchParams()
+
+  const handleShare = () => {
+    const ref = searchParams.get('ref')
+    shareChat(chatId, { ref })
+  }
+
+  return <button onClick={handleShare}>Share</button>
+}
+```
+
+**Correct (reads on demand, no subscription):**
+
+```tsx
+function ShareButton({ chatId }: { chatId: string }) {
+  const handleShare = () => {
+    const params = new URLSearchParams(window.location.search)
+    const ref = params.get('ref')
+    shareChat(chatId, { ref })
+  }
+
+  return <button onClick={handleShare}>Share</button>
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/rerender-dependencies.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rerender-dependencies.md
@@ -0,0 +1,45 @@
+---
+title: Narrow Effect Dependencies
+impact: LOW
+impactDescription: minimizes effect re-runs
+tags: rerender, useEffect, dependencies, optimization
+---
+
+## Narrow Effect Dependencies
+
+Specify primitive dependencies instead of objects to minimize effect re-runs.
+
+**Incorrect (re-runs on any user field change):**
+
+```tsx
+useEffect(() => {
+  console.log(user.id)
+}, [user])
+```
+
+**Correct (re-runs only when id changes):**
+
+```tsx
+useEffect(() => {
+  console.log(user.id)
+}, [user.id])
+```
+
+**For derived state, compute outside effect:**
+
+```tsx
+// Incorrect: runs on width=767, 766, 765...
+useEffect(() => {
+  if (width < 768) {
+    enableMobileMode()
+  }
+}, [width])
+
+// Correct: runs only on boolean transition
+const isMobile = width < 768
+useEffect(() => {
+  if (isMobile) {
+    enableMobileMode()
+  }
+}, [isMobile])
+```
--- a/.claude/skills/vercel-react-best-practices/rules/rerender-derived-state.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rerender-derived-state.md
@@ -0,0 +1,29 @@
+---
+title: Subscribe to Derived State
+impact: MEDIUM
+impactDescription: reduces re-render frequency
+tags: rerender, derived-state, media-query, optimization
+---
+
+## Subscribe to Derived State
+
+Subscribe to derived boolean state instead of continuous values to reduce re-render frequency.
+
+**Incorrect (re-renders on every pixel change):**
+
+```tsx
+function Sidebar() {
+  const width = useWindowWidth()  // updates continuously
+  const isMobile = width < 768
+  return <nav className={isMobile ? 'mobile' : 'desktop'} />
+}
+```
+
+**Correct (re-renders only when boolean changes):**
+
+```tsx
+function Sidebar() {
+  const isMobile = useMediaQuery('(max-width: 767px)')
+  return <nav className={isMobile ? 'mobile' : 'desktop'} />
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/rerender-functional-setstate.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rerender-functional-setstate.md
@@ -0,0 +1,74 @@
+---
+title: Use Functional setState Updates
+impact: MEDIUM
+impactDescription: prevents stale closures and unnecessary callback recreations
+tags: react, hooks, useState, useCallback, callbacks, closures
+---
+
+## Use Functional setState Updates
+
+When updating state based on the current state value, use the functional update form of setState instead of directly referencing the state variable. This prevents stale closures, eliminates unnecessary dependencies, and creates stable callback references.
+
+**Incorrect (requires state as dependency):**
+
+```tsx
+function TodoList() {
+  const [items, setItems] = useState(initialItems)
+  
+  // Callback must depend on items, recreated on every items change
+  const addItems = useCallback((newItems: Item[]) => {
+    setItems([...items, ...newItems])
+  }, [items])  // ❌ items dependency causes recreations
+  
+  // Risk of stale closure if dependency is forgotten
+  const removeItem = useCallback((id: string) => {
+    setItems(items.filter(item => item.id !== id))
+  }, [])  // ❌ Missing items dependency - will use stale items!
+  
+  return <ItemsEditor items={items} onAdd={addItems} onRemove={removeItem} />
+}
+```
+
+The first callback is recreated every time `items` changes, which can cause child components to re-render unnecessarily. The second callback has a stale closure bug—it will always reference the initial `items` value.
+
+**Correct (stable callbacks, no stale closures):**
+
+```tsx
+function TodoList() {
+  const [items, setItems] = useState(initialItems)
+  
+  // Stable callback, never recreated
+  const addItems = useCallback((newItems: Item[]) => {
+    setItems(curr => [...curr, ...newItems])
+  }, [])  // ✅ No dependencies needed
+  
+  // Always uses latest state, no stale closure risk
+  const removeItem = useCallback((id: string) => {
+    setItems(curr => curr.filter(item => item.id !== id))
+  }, [])  // ✅ Safe and stable
+  
+  return <ItemsEditor items={items} onAdd={addItems} onRemove={removeItem} />
+}
+```
+
+**Benefits:**
+
+1. **Stable callback references** - Callbacks don't need to be recreated when state changes
+2. **No stale closures** - Always operates on the latest state value
+3. **Fewer dependencies** - Simplifies dependency arrays and reduces memory leaks
+4. **Prevents bugs** - Eliminates the most common source of React closure bugs
+
+**When to use functional updates:**
+
+- Any setState that depends on the current state value
+- Inside useCallback/useMemo when state is needed
+- Event handlers that reference state
+- Async operations that update state
+
+**When direct updates are fine:**
+
+- Setting state to a static value: `setCount(0)`
+- Setting state from props/arguments only: `setName(newName)`
+- State doesn't depend on previous value
+
+**Note:** If your project has [React Compiler](https://react.dev/learn/react-compiler) enabled, the compiler can automatically optimize some cases, but functional updates are still recommended for correctness and to prevent stale closure bugs.
--- a/.claude/skills/vercel-react-best-practices/rules/rerender-lazy-state-init.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rerender-lazy-state-init.md
@@ -0,0 +1,58 @@
+---
+title: Use Lazy State Initialization
+impact: MEDIUM
+impactDescription: wasted computation on every render
+tags: react, hooks, useState, performance, initialization
+---
+
+## Use Lazy State Initialization
+
+Pass a function to `useState` for expensive initial values. Without the function form, the initializer runs on every render even though the value is only used once.
+
+**Incorrect (runs on every render):**
+
+```tsx
+function FilteredList({ items }: { items: Item[] }) {
+  // buildSearchIndex() runs on EVERY render, even after initialization
+  const [searchIndex, setSearchIndex] = useState(buildSearchIndex(items))
+  const [query, setQuery] = useState('')
+  
+  // When query changes, buildSearchIndex runs again unnecessarily
+  return <SearchResults index={searchIndex} query={query} />
+}
+
+function UserProfile() {
+  // JSON.parse runs on every render
+  const [settings, setSettings] = useState(
+    JSON.parse(localStorage.getItem('settings') || '{}')
+  )
+  
+  return <SettingsForm settings={settings} onChange={setSettings} />
+}
+```
+
+**Correct (runs only once):**
+
+```tsx
+function FilteredList({ items }: { items: Item[] }) {
+  // buildSearchIndex() runs ONLY on initial render
+  const [searchIndex, setSearchIndex] = useState(() => buildSearchIndex(items))
+  const [query, setQuery] = useState('')
+  
+  return <SearchResults index={searchIndex} query={query} />
+}
+
+function UserProfile() {
+  // JSON.parse runs only on initial render
+  const [settings, setSettings] = useState(() => {
+    const stored = localStorage.getItem('settings')
+    return stored ? JSON.parse(stored) : {}
+  })
+  
+  return <SettingsForm settings={settings} onChange={setSettings} />
+}
+```
+
+Use lazy initialization when computing initial values from localStorage/sessionStorage, building data structures (indexes, maps), reading from the DOM, or performing heavy transformations.
+
+For simple primitives (`useState(0)`), direct references (`useState(props.value)`), or cheap literals (`useState({})`), the function form is unnecessary.
--- a/.claude/skills/vercel-react-best-practices/rules/rerender-memo.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rerender-memo.md
@@ -0,0 +1,44 @@
+---
+title: Extract to Memoized Components
+impact: MEDIUM
+impactDescription: enables early returns
+tags: rerender, memo, useMemo, optimization
+---
+
+## Extract to Memoized Components
+
+Extract expensive work into memoized components to enable early returns before computation.
+
+**Incorrect (computes avatar even when loading):**
+
+```tsx
+function Profile({ user, loading }: Props) {
+  const avatar = useMemo(() => {
+    const id = computeAvatarId(user)
+    return <Avatar id={id} />
+  }, [user])
+
+  if (loading) return <Skeleton />
+  return <div>{avatar}</div>
+}
+```
+
+**Correct (skips computation when loading):**
+
+```tsx
+const UserAvatar = memo(function UserAvatar({ user }: { user: User }) {
+  const id = useMemo(() => computeAvatarId(user), [user])
+  return <Avatar id={id} />
+})
+
+function Profile({ user, loading }: Props) {
+  if (loading) return <Skeleton />
+  return (
+    <div>
+      <UserAvatar user={user} />
+    </div>
+  )
+}
+```
+
+**Note:** If your project has [React Compiler](https://react.dev/learn/react-compiler) enabled, manual memoization with `memo()` and `useMemo()` is not necessary. The compiler automatically optimizes re-renders.
--- a/.claude/skills/vercel-react-best-practices/rules/rerender-transitions.md
+++ b/.claude/skills/vercel-react-best-practices/rules/rerender-transitions.md
@@ -0,0 +1,40 @@
+---
+title: Use Transitions for Non-Urgent Updates
+impact: MEDIUM
+impactDescription: maintains UI responsiveness
+tags: rerender, transitions, startTransition, performance
+---
+
+## Use Transitions for Non-Urgent Updates
+
+Mark frequent, non-urgent state updates as transitions to maintain UI responsiveness.
+
+**Incorrect (blocks UI on every scroll):**
+
+```tsx
+function ScrollTracker() {
+  const [scrollY, setScrollY] = useState(0)
+  useEffect(() => {
+    const handler = () => setScrollY(window.scrollY)
+    window.addEventListener('scroll', handler, { passive: true })
+    return () => window.removeEventListener('scroll', handler)
+  }, [])
+}
+```
+
+**Correct (non-blocking updates):**
+
+```tsx
+import { startTransition } from 'react'
+
+function ScrollTracker() {
+  const [scrollY, setScrollY] = useState(0)
+  useEffect(() => {
+    const handler = () => {
+      startTransition(() => setScrollY(window.scrollY))
+    }
+    window.addEventListener('scroll', handler, { passive: true })
+    return () => window.removeEventListener('scroll', handler)
+  }, [])
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/server-after-nonblocking.md
+++ b/.claude/skills/vercel-react-best-practices/rules/server-after-nonblocking.md
@@ -0,0 +1,73 @@
+---
+title: Use after() for Non-Blocking Operations
+impact: MEDIUM
+impactDescription: faster response times
+tags: server, async, logging, analytics, side-effects
+---
+
+## Use after() for Non-Blocking Operations
+
+Use Next.js's `after()` to schedule work that should execute after a response is sent. This prevents logging, analytics, and other side effects from blocking the response.
+
+**Incorrect (blocks response):**
+
+```tsx
+import { logUserAction } from '@/app/utils'
+
+export async function POST(request: Request) {
+  // Perform mutation
+  await updateDatabase(request)
+  
+  // Logging blocks the response
+  const userAgent = request.headers.get('user-agent') || 'unknown'
+  await logUserAction({ userAgent })
+  
+  return new Response(JSON.stringify({ status: 'success' }), {
+    status: 200,
+    headers: { 'Content-Type': 'application/json' }
+  })
+}
+```
+
+**Correct (non-blocking):**
+
+```tsx
+import { after } from 'next/server'
+import { headers, cookies } from 'next/headers'
+import { logUserAction } from '@/app/utils'
+
+export async function POST(request: Request) {
+  // Perform mutation
+  await updateDatabase(request)
+  
+  // Log after response is sent
+  after(async () => {
+    const userAgent = (await headers()).get('user-agent') || 'unknown'
+    const sessionCookie = (await cookies()).get('session-id')?.value || 'anonymous'
+    
+    logUserAction({ sessionCookie, userAgent })
+  })
+  
+  return new Response(JSON.stringify({ status: 'success' }), {
+    status: 200,
+    headers: { 'Content-Type': 'application/json' }
+  })
+}
+```
+
+The response is sent immediately while logging happens in the background.
+
+**Common use cases:**
+
+- Analytics tracking
+- Audit logging
+- Sending notifications
+- Cache invalidation
+- Cleanup tasks
+
+**Important notes:**
+
+- `after()` runs even if the response fails or redirects
+- Works in Server Actions, Route Handlers, and Server Components
+
+Reference: [https://nextjs.org/docs/app/api-reference/functions/after](https://nextjs.org/docs/app/api-reference/functions/after)
--- a/.claude/skills/vercel-react-best-practices/rules/server-cache-lru.md
+++ b/.claude/skills/vercel-react-best-practices/rules/server-cache-lru.md
@@ -0,0 +1,41 @@
+---
+title: Cross-Request LRU Caching
+impact: HIGH
+impactDescription: caches across requests
+tags: server, cache, lru, cross-request
+---
+
+## Cross-Request LRU Caching
+
+`React.cache()` only works within one request. For data shared across sequential requests (user clicks button A then button B), use an LRU cache.
+
+**Implementation:**
+
+```typescript
+import { LRUCache } from 'lru-cache'
+
+const cache = new LRUCache<string, any>({
+  max: 1000,
+  ttl: 5 * 60 * 1000  // 5 minutes
+})
+
+export async function getUser(id: string) {
+  const cached = cache.get(id)
+  if (cached) return cached
+
+  const user = await db.user.findUnique({ where: { id } })
+  cache.set(id, user)
+  return user
+}
+
+// Request 1: DB query, result cached
+// Request 2: cache hit, no DB query
+```
+
+Use when sequential user actions hit multiple endpoints needing the same data within seconds.
+
+**With Vercel's [Fluid Compute](https://vercel.com/docs/fluid-compute):** LRU caching is especially effective because multiple concurrent requests can share the same function instance and cache. This means the cache persists across requests without needing external storage like Redis.
+
+**In traditional serverless:** Each invocation runs in isolation, so consider Redis for cross-process caching.
+
+Reference: [https://github.com/isaacs/node-lru-cache](https://github.com/isaacs/node-lru-cache)
--- a/.claude/skills/vercel-react-best-practices/rules/server-cache-react.md
+++ b/.claude/skills/vercel-react-best-practices/rules/server-cache-react.md
@@ -0,0 +1,76 @@
+---
+title: Per-Request Deduplication with React.cache()
+impact: MEDIUM
+impactDescription: deduplicates within request
+tags: server, cache, react-cache, deduplication
+---
+
+## Per-Request Deduplication with React.cache()
+
+Use `React.cache()` for server-side request deduplication. Authentication and database queries benefit most.
+
+**Usage:**
+
+```typescript
+import { cache } from 'react'
+
+export const getCurrentUser = cache(async () => {
+  const session = await auth()
+  if (!session?.user?.id) return null
+  return await db.user.findUnique({
+    where: { id: session.user.id }
+  })
+})
+```
+
+Within a single request, multiple calls to `getCurrentUser()` execute the query only once.
+
+**Avoid inline objects as arguments:**
+
+`React.cache()` uses shallow equality (`Object.is`) to determine cache hits. Inline objects create new references each call, preventing cache hits.
+
+**Incorrect (always cache miss):**
+
+```typescript
+const getUser = cache(async (params: { uid: number }) => {
+  return await db.user.findUnique({ where: { id: params.uid } })
+})
+
+// Each call creates new object, never hits cache
+getUser({ uid: 1 })
+getUser({ uid: 1 })  // Cache miss, runs query again
+```
+
+**Correct (cache hit):**
+
+```typescript
+const getUser = cache(async (uid: number) => {
+  return await db.user.findUnique({ where: { id: uid } })
+})
+
+// Primitive args use value equality
+getUser(1)
+getUser(1)  // Cache hit, returns cached result
+```
+
+If you must pass objects, pass the same reference:
+
+```typescript
+const params = { uid: 1 }
+getUser(params)  // Query runs
+getUser(params)  // Cache hit (same reference)
+```
+
+**Next.js-Specific Note:**
+
+In Next.js, the `fetch` API is automatically extended with request memoization. Requests with the same URL and options are automatically deduplicated within a single request, so you don't need `React.cache()` for `fetch` calls. However, `React.cache()` is still essential for other async tasks:
+
+- Database queries (Prisma, Drizzle, etc.)
+- Heavy computations
+- Authentication checks
+- File system operations
+- Any non-fetch async work
+
+Use `React.cache()` to deduplicate these operations across your component tree.
+
+Reference: [React.cache documentation](https://react.dev/reference/react/cache)
--- a/.claude/skills/vercel-react-best-practices/rules/server-parallel-fetching.md
+++ b/.claude/skills/vercel-react-best-practices/rules/server-parallel-fetching.md
@@ -0,0 +1,83 @@
+---
+title: Parallel Data Fetching with Component Composition
+impact: CRITICAL
+impactDescription: eliminates server-side waterfalls
+tags: server, rsc, parallel-fetching, composition
+---
+
+## Parallel Data Fetching with Component Composition
+
+React Server Components execute sequentially within a tree. Restructure with composition to parallelize data fetching.
+
+**Incorrect (Sidebar waits for Page's fetch to complete):**
+
+```tsx
+export default async function Page() {
+  const header = await fetchHeader()
+  return (
+    <div>
+      <div>{header}</div>
+      <Sidebar />
+    </div>
+  )
+}
+
+async function Sidebar() {
+  const items = await fetchSidebarItems()
+  return <nav>{items.map(renderItem)}</nav>
+}
+```
+
+**Correct (both fetch simultaneously):**
+
+```tsx
+async function Header() {
+  const data = await fetchHeader()
+  return <div>{data}</div>
+}
+
+async function Sidebar() {
+  const items = await fetchSidebarItems()
+  return <nav>{items.map(renderItem)}</nav>
+}
+
+export default function Page() {
+  return (
+    <div>
+      <Header />
+      <Sidebar />
+    </div>
+  )
+}
+```
+
+**Alternative with children prop:**
+
+```tsx
+async function Header() {
+  const data = await fetchHeader()
+  return <div>{data}</div>
+}
+
+async function Sidebar() {
+  const items = await fetchSidebarItems()
+  return <nav>{items.map(renderItem)}</nav>
+}
+
+function Layout({ children }: { children: ReactNode }) {
+  return (
+    <div>
+      <Header />
+      {children}
+    </div>
+  )
+}
+
+export default function Page() {
+  return (
+    <Layout>
+      <Sidebar />
+    </Layout>
+  )
+}
+```
--- a/.claude/skills/vercel-react-best-practices/rules/server-serialization.md
+++ b/.claude/skills/vercel-react-best-practices/rules/server-serialization.md
@@ -0,0 +1,38 @@
+---
+title: Minimize Serialization at RSC Boundaries
+impact: HIGH
+impactDescription: reduces data transfer size
+tags: server, rsc, serialization, props
+---
+
+## Minimize Serialization at RSC Boundaries
+
+The React Server/Client boundary serializes all object properties into strings and embeds them in the HTML response and subsequent RSC requests. This serialized data directly impacts page weight and load time, so **size matters a lot**. Only pass fields that the client actually uses.
+
+**Incorrect (serializes all 50 fields):**
+
+```tsx
+async function Page() {
+  const user = await fetchUser()  // 50 fields
+  return <Profile user={user} />
+}
+
+'use client'
+function Profile({ user }: { user: User }) {
+  return <div>{user.name}</div>  // uses 1 field
+}
+```
+
+**Correct (serializes only 1 field):**
+
+```tsx
+async function Page() {
+  const user = await fetchUser()
+  return <Profile name={user.name} />
+}
+
+'use client'
+function Profile({ name }: { name: string }) {
+  return <div>{name}</div>
+}
+```
--- a/.github/workflows/autofix.yml
+++ b/.github/workflows/autofix.yml
@@ -16,14 +16,14 @@ jobs:

      - name: Check Docker Compose inputs
        id: docker-compose-changes
-        uses: tj-actions/changed-files@v46
+        uses: tj-actions/changed-files@v47
        with:
          files: |
            docker/generate_docker_compose
            docker/.env.example
            docker/docker-compose-template.yaml
            docker/docker-compose.yaml
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
        with:
          python-version: "3.11"

@@ -82,6 +82,6 @@ jobs:
      # mdformat breaks YAML front matter in markdown files. Add --exclude for directories containing YAML front matter.
      - name: mdformat
        run: |
-          uvx --python 3.13 mdformat . --exclude ".claude/skills/**/SKILL.md"
+          uvx --python 3.13 mdformat . --exclude ".claude/skills/**"

      - uses: autofix-ci/action@635ffb0c9798bd160680f18fd73371e355b85f27
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -112,7 +112,7 @@ jobs:
            context: "web"
    steps:
      - name: Download digests
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
        with:
          path: /tmp/digests
          pattern: digests-${{ matrix.context }}-*
--- a/.github/workflows/deploy-agent-dev.yml
+++ b/.github/workflows/deploy-agent-dev.yml
@@ -19,7 +19,7 @@ jobs:
      github.event.workflow_run.head_branch == 'deploy/agent-dev'
    steps:
      - name: Deploy to server
-        uses: appleboy/ssh-action@v0.1.8
+        uses: appleboy/ssh-action@v1
        with:
          host: ${{ secrets.AGENT_DEV_SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -16,7 +16,7 @@ jobs:
      github.event.workflow_run.head_branch == 'deploy/dev'
    steps:
      - name: Deploy to server
-        uses: appleboy/ssh-action@v0.1.8
+        uses: appleboy/ssh-action@v1
        with:
          host: ${{ secrets.SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
--- a/.github/workflows/deploy-hitl.yml
+++ b/.github/workflows/deploy-hitl.yml
@@ -20,7 +20,7 @@ jobs:
      )
    steps:
      - name: Deploy to server
-        uses: appleboy/ssh-action@v0.1.8
+        uses: appleboy/ssh-action@v1
        with:
          host: ${{ secrets.HITL_SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -18,7 +18,7 @@ jobs:
      pull-requests: write

    steps:
-      - uses: actions/stale@v5
+      - uses: actions/stale@v10
        with:
          days-before-issue-stale: 15
          days-before-issue-close: 3
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -65,6 +65,9 @@ jobs:
    defaults:
      run:
        working-directory: ./web
+    permissions:
+      checks: write
+      pull-requests: read

    steps:
      - name: Checkout code
@@ -103,7 +106,16 @@ jobs:
        if: steps.changed-files.outputs.any_changed == 'true'
        working-directory: ./web
        run: |
-          pnpm run lint
+          pnpm run lint:ci
+        # pnpm run lint:report
+        # continue-on-error: true
+
+      # - name: Annotate Code
+      #   if: steps.changed-files.outputs.any_changed == 'true' && github.event_name == 'pull_request'
+      #   uses: DerLev/eslint-annotations@51347b3a0abfb503fc8734d5ae31c4b151297fae
+      #   with:
+      #     eslint-report: web/eslint_report.json
+      #     github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Web type check
        if: steps.changed-files.outputs.any_changed == 'true'
@@ -115,11 +127,6 @@ jobs:
        working-directory: ./web
        run: pnpm run knip

-      - name: Web build check
-        if: steps.changed-files.outputs.any_changed == 'true'
-        working-directory: ./web
-        run: pnpm run build
-
  superlinter:
    name: SuperLinter
    runs-on: ubuntu-latest
--- a/.github/workflows/trigger-i18n-sync.yml
+++ b/.github/workflows/trigger-i18n-sync.yml
@@ -21,7 +21,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0

--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@@ -366,3 +366,48 @@ jobs:
          path: web/coverage
          retention-days: 30
          if-no-files-found: error
+
+  web-build:
+    name: Web Build
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./web
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          persist-credentials: false
+
+      - name: Check changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v47
+        with:
+          files: |
+            web/**
+            .github/workflows/web-tests.yml
+
+      - name: Install pnpm
+        uses: pnpm/action-setup@v4
+        with:
+          package_json_file: web/package.json
+          run_install: false
+
+      - name: Setup NodeJS
+        uses: actions/setup-node@v6
+        if: steps.changed-files.outputs.any_changed == 'true'
+        with:
+          node-version: 24
+          cache: pnpm
+          cache-dependency-path: ./web/pnpm-lock.yaml
+
+      - name: Web dependencies
+        if: steps.changed-files.outputs.any_changed == 'true'
+        working-directory: ./web
+        run: pnpm install --frozen-lockfile
+
+      - name: Web build check
+        if: steps.changed-files.outputs.any_changed == 'true'
+        working-directory: ./web
+        run: pnpm run build
--- a/.gitignore
+++ b/.gitignore
@@ -209,7 +209,6 @@ api/.vscode
 .history

 .idea/
-web/migration/

 # pnpm
 /.pnpm-store
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -12,12 +12,8 @@ The codebase is split into:

 ## Backend Workflow

+- Read `api/AGENTS.md` for details
 - Run backend CLI commands through `uv run --project api <command>`.
-
- Before submission, all backend modifications must pass local checks: `make lint`, `make type-check`, and `uv run --project api --dev dev/pytest/pytest_unit_tests.sh`.
-
- Use Makefile targets for linting and formatting; `make lint` and `make type-check` cover the required checks.
-
 - Integration tests are CI-only and are not expected to run in the local environment.

 ## Frontend Workflow
--- a/12
+++ b/12
@@ -61,7 +61,8 @@ check:

 lint:
 	@echo "🔧 Running ruff format, check with fixes, import linter, and dotenv-linter..."
-	@uv run --project api --dev sh -c 'ruff format ./api && ruff check --fix ./api'
+	@uv run --project api --dev ruff format ./api
+	@uv run --project api --dev ruff check --fix ./api
 	@uv run --directory api --dev lint-imports
 	@uv run --project api --dev dotenv-linter ./api/.env.example ./web/.env.example
 	@echo "✅ Linting complete"
@@ -73,7 +74,12 @@ type-check:

 test:
 	@echo "🧪 Running backend unit tests..."
-	@uv run --project api --dev dev/pytest/pytest_unit_tests.sh
+	@if [ -n "$(TARGET_TESTS)" ]; then \
+		echo "Target: $(TARGET_TESTS)"; \
+		uv run --project api --dev pytest $(TARGET_TESTS); \
+	else \
+		uv run --project api --dev dev/pytest/pytest_unit_tests.sh; \
+	fi
 	@echo "✅ Tests complete"

 # Build Docker images
@@ -125,7 +131,7 @@ help:
 	@echo "  make check          - Check code with ruff"
 	@echo "  make lint           - Format, fix, and lint code (ruff, imports, dotenv)"
 	@echo "  make type-check     - Run type checking with basedpyright"
-	@echo "  make test           - Run backend unit tests"
+	@echo "  make test           - Run backend unit tests (or TARGET_TESTS=./api/tests/<target_tests>)"
 	@echo ""
 	@echo "Docker Build Targets:"
 	@echo "  make build-web      - Build web Docker image"
--- a/agent-notes/.gitkeep
+++ b/agent-notes/.gitkeep
--- a/api/AGENTS.md
+++ b/api/AGENTS.md
@@ -1,62 +1,236 @@
-# Agent Skill Index
+# API Agent Guide
+
+## Agent Notes (must-check)
+
+Before you start work on any backend file under `api/`, you MUST check whether a related note exists under:
+
+- `agent-notes/<same-relative-path-as-target-file>.md`
+
+Rules:
+
+- **Path mapping**: for a target file `<path>/<name>.py`, the note must be `agent-notes/<path>/<name>.py.md` (same folder structure, same filename, plus `.md`).
+- **Before working**:
+  - If the note exists, read it first and follow any constraints/decisions recorded there.
+  - If the note conflicts with the current code, or references an "origin" file/path that has been deleted, renamed, or migrated, treat the **code as the single source of truth** and update the note to match reality.
+  - If the note does not exist, create it with a short architecture/intent summary and any relevant invariants/edge cases.
+- **During working**:
+  - Keep the note in sync as you discover constraints, make decisions, or change approach.
+  - If you move/rename a file, migrate its note to the new mapped path (and fix any outdated references inside the note).
+  - Record non-obvious edge cases, trade-offs, and the test/verification plan as you go (not just at the end).
+  - Keep notes **coherent**: integrate new findings into the relevant sections and rewrite for clarity; avoid append-only “recent fix” / changelog-style additions unless the note is explicitly intended to be a changelog.
+- **When finishing work**:
+  - Update the related note(s) to reflect what changed, why, and any new edge cases/tests.
+  - If a file is deleted, remove or clearly deprecate the corresponding note so it cannot be mistaken as current guidance.
+  - Keep notes concise and accurate; they are meant to prevent repeated rediscovery.
+
+## Skill Index

 Start with the section that best matches your need. Each entry lists the problems it solves plus key files/concepts so you know what to expect before opening it.

-______________________________________________________________________
+### Platform Foundations

-## Platform Foundations
-
- **[Infrastructure Overview](agent_skills/infra.md)**\
-  When to read this:
+#### [Infrastructure Overview](agent_skills/infra.md)

+- **When to read this**
  - You need to understand where a feature belongs in the architecture.
  - You’re wiring storage, Redis, vector stores, or OTEL.
-  - You’re about to add CLI commands or async jobs.\
-    What it covers: configuration stack (`configs/app_config.py`, remote settings), storage entry points (`extensions/ext_storage.py`, `core/file/file_manager.py`), Redis conventions (`extensions/ext_redis.py`), plugin runtime topology, vector-store factory (`core/rag/datasource/vdb/*`), observability hooks, SSRF proxy usage, and core CLI commands.
+  - You’re about to add CLI commands or async jobs.
+- **What it covers**
+  - Configuration stack (`configs/app_config.py`, remote settings)
+  - Storage entry points (`extensions/ext_storage.py`, `core/file/file_manager.py`)
+  - Redis conventions (`extensions/ext_redis.py`)
+  - Plugin runtime topology
+  - Vector-store factory (`core/rag/datasource/vdb/*`)
+  - Observability hooks
+  - SSRF proxy usage
+  - Core CLI commands

- **[Coding Style](agent_skills/coding_style.md)**\
-  When to read this:
+### Plugin & Extension Development

-  - You’re writing or reviewing backend code and need the authoritative checklist.
-  - You’re unsure about Pydantic validators, SQLAlchemy session usage, or logging patterns.
-  - You want the exact lint/type/test commands used in PRs.\
-    Includes: Ruff & BasedPyright commands, no-annotation policy, session examples (`with Session(db.engine, ...)`), `@field_validator` usage, logging expectations, and the rule set for file size, helpers, and package management.
-
-______________________________________________________________________
-
-## Plugin & Extension Development
-
- **[Plugin Systems](agent_skills/plugin.md)**\
-  When to read this:
+#### [Plugin Systems](agent_skills/plugin.md)

+- **When to read this**
  - You’re building or debugging a marketplace plugin.
-  - You need to know how manifests, providers, daemons, and migrations fit together.\
-    What it covers: plugin manifests (`core/plugin/entities/plugin.py`), installation/upgrade flows (`services/plugin/plugin_service.py`, CLI commands), runtime adapters (`core/plugin/impl/*` for tool/model/datasource/trigger/endpoint/agent), daemon coordination (`core/plugin/entities/plugin_daemon.py`), and how provider registries surface capabilities to the rest of the platform.
+  - You need to know how manifests, providers, daemons, and migrations fit together.
+- **What it covers**
+  - Plugin manifests (`core/plugin/entities/plugin.py`)
+  - Installation/upgrade flows (`services/plugin/plugin_service.py`, CLI commands)
+  - Runtime adapters (`core/plugin/impl/*` for tool/model/datasource/trigger/endpoint/agent)
+  - Daemon coordination (`core/plugin/entities/plugin_daemon.py`)
+  - How provider registries surface capabilities to the rest of the platform

- **[Plugin OAuth](agent_skills/plugin_oauth.md)**\
-  When to read this:
+#### [Plugin OAuth](agent_skills/plugin_oauth.md)

+- **When to read this**
  - You must integrate OAuth for a plugin or datasource.
-  - You’re handling credential encryption or refresh flows.\
-    Topics: credential storage, encryption helpers (`core/helper/provider_encryption.py`), OAuth client bootstrap (`services/plugin/oauth_service.py`, `services/plugin/plugin_parameter_service.py`), and how console/API layers expose the flows.
+  - You’re handling credential encryption or refresh flows.
+- **Topics**
+  - Credential storage
+  - Encryption helpers (`core/helper/provider_encryption.py`)
+  - OAuth client bootstrap (`services/plugin/oauth_service.py`, `services/plugin/plugin_parameter_service.py`)
+  - How console/API layers expose the flows

-______________________________________________________________________
+### Workflow Entry & Execution

-## Workflow Entry & Execution
+#### [Trigger Concepts](agent_skills/trigger.md)

- **[Trigger Concepts](agent_skills/trigger.md)**\
-  When to read this:
+- **When to read this**
  - You’re debugging why a workflow didn’t start.
  - You’re adding a new trigger type or hook.
-  - You need to trace async execution, draft debugging, or webhook/schedule pipelines.\
-    Details: Start-node taxonomy, webhook & schedule internals (`core/workflow/nodes/trigger_*`, `services/trigger/*`), async orchestration (`services/async_workflow_service.py`, Celery queues), debug event bus, and storage/logging interactions.
+  - You need to trace async execution, draft debugging, or webhook/schedule pipelines.
+- **Details**
+  - Start-node taxonomy
+  - Webhook & schedule internals (`core/workflow/nodes/trigger_*`, `services/trigger/*`)
+  - Async orchestration (`services/async_workflow_service.py`, Celery queues)
+  - Debug event bus
+  - Storage/logging interactions

-______________________________________________________________________
+## General Reminders

-## Additional Notes for Agents
-
- All skill docs assume you follow the coding style guide—run Ruff/BasedPyright/tests listed there before submitting changes.
+- All skill docs assume you follow the coding style rules below—run the lint/type/test commands before submitting changes.
 - When you cannot find an answer in these briefs, search the codebase using the paths referenced (e.g., `core/plugin/impl/tool.py`, `services/dataset_service.py`).
 - If you run into cross-cutting concerns (tenancy, configuration, storage), check the infrastructure guide first; it links to most supporting modules.
 - Keep multi-tenancy and configuration central: everything flows through `configs.dify_config` and `tenant_id`.
 - When touching plugins or triggers, consult both the system overview and the specialised doc to ensure you adjust lifecycle, storage, and observability consistently.
+
+## Coding Style
+
+This is the default standard for backend code in this repo. Follow it for new code and use it as the checklist when reviewing changes.
+
+### Linting & Formatting
+
+- Use Ruff for formatting and linting (follow `.ruff.toml`).
+- Keep each line under 120 characters (including spaces).
+
+### Naming Conventions
+
+- Use `snake_case` for variables and functions.
+- Use `PascalCase` for classes.
+- Use `UPPER_CASE` for constants.
+
+### Typing & Class Layout
+
+- Code should usually include type annotations that match the repo’s current Python version (avoid untyped public APIs and “mystery” values).
+- Prefer modern typing forms (e.g. `list[str]`, `dict[str, int]`) and avoid `Any` unless there’s a strong reason.
+- For classes, declare member variables at the top of the class body (before `__init__`) so the class shape is obvious at a glance:
+
+```python
+from datetime import datetime
+
+
+class Example:
+    user_id: str
+    created_at: datetime
+
+    def __init__(self, user_id: str, created_at: datetime) -> None:
+        self.user_id = user_id
+        self.created_at = created_at
+```
+
+### General Rules
+
+- Use Pydantic v2 conventions.
+- Use `uv` for Python package management in this repo (usually with `--project api`).
+- Prefer simple functions over small “utility classes” for lightweight helpers.
+- Avoid implementing dunder methods unless it’s clearly needed and matches existing patterns.
+- Never start long-running services as part of agent work (`uv run app.py`, `flask run`, etc.); running tests is allowed.
+- Keep files below ~800 lines; split when necessary.
+- Keep code readable and explicit—avoid clever hacks.
+
+### Architecture & Boundaries
+
+- Mirror the layered architecture: controller → service → core/domain.
+- Reuse existing helpers in `core/`, `services/`, and `libs/` before creating new abstractions.
+- Optimise for observability: deterministic control flow, clear logging, actionable errors.
+
+### Logging & Errors
+
+- Never use `print`; use a module-level logger:
+  - `logger = logging.getLogger(__name__)`
+- Include tenant/app/workflow identifiers in log context when relevant.
+- Raise domain-specific exceptions (`services/errors`, `core/errors`) and translate them into HTTP responses in controllers.
+- Log retryable events at `warning`, terminal failures at `error`.
+
+### SQLAlchemy Patterns
+
+- Models inherit from `models.base.TypeBase`; do not create ad-hoc metadata or engines.
+- Open sessions with context managers:
+
+```python
+from sqlalchemy.orm import Session
+
+with Session(db.engine, expire_on_commit=False) as session:
+    stmt = select(Workflow).where(
+        Workflow.id == workflow_id,
+        Workflow.tenant_id == tenant_id,
+    )
+    workflow = session.execute(stmt).scalar_one_or_none()
+```
+
+- Prefer SQLAlchemy expressions; avoid raw SQL unless necessary.
+- Always scope queries by `tenant_id` and protect write paths with safeguards (`FOR UPDATE`, row counts, etc.).
+- Introduce repository abstractions only for very large tables (e.g., workflow executions) or when alternative storage strategies are required.
+
+### Storage & External I/O
+
+- Access storage via `extensions.ext_storage.storage`.
+- Use `core.helper.ssrf_proxy` for outbound HTTP fetches.
+- Background tasks that touch storage must be idempotent, and should log relevant object identifiers.
+
+### Pydantic Usage
+
+- Define DTOs with Pydantic v2 models and forbid extras by default.
+- Use `@field_validator` / `@model_validator` for domain rules.
+
+Example:
+
+```python
+from pydantic import BaseModel, ConfigDict, HttpUrl, field_validator
+
+
+class TriggerConfig(BaseModel):
+    endpoint: HttpUrl
+    secret: str
+
+    model_config = ConfigDict(extra="forbid")
+
+    @field_validator("secret")
+    def ensure_secret_prefix(cls, value: str) -> str:
+        if not value.startswith("dify_"):
+            raise ValueError("secret must start with dify_")
+        return value
+```
+
+### Generics & Protocols
+
+- Use `typing.Protocol` to define behavioural contracts (e.g., cache interfaces).
+- Apply generics (`TypeVar`, `Generic`) for reusable utilities like caches or providers.
+- Validate dynamic inputs at runtime when generics cannot enforce safety alone.
+
+### Tooling & Checks
+
+Quick checks while iterating:
+
+- Format: `make format`
+- Lint (includes auto-fix): `make lint`
+- Type check: `make type-check`
+- Targeted tests: `make test TARGET_TESTS=./api/tests/<target_tests>`
+
+Before opening a PR / submitting:
+
+- `make lint`
+- `make type-check`
+- `make test`
+
+### Controllers & Services
+
+- Controllers: parse input via Pydantic, invoke services, return serialised responses; no business logic.
+- Services: coordinate repositories, providers, background tasks; keep side effects explicit.
+- Document non-obvious behaviour with concise comments.
+
+### Miscellaneous
+
+- Use `configs.dify_config` for configuration—never read environment variables directly.
+- Maintain tenant awareness end-to-end; `tenant_id` must flow through every layer touching shared resources.
+- Queue async work through `services/async_workflow_service`; implement tasks under `tasks/` with explicit queue selection.
+- Keep experimental scripts under `dev/`; do not ship them in production builds.
--- a/api/agent_skills/coding_style.md
+++ b/api/agent_skills/coding_style.md
@@ -1,115 +0,0 @@
-## Linter
-
- Always follow `.ruff.toml`.
- Run `uv run ruff check --fix --unsafe-fixes`.
- Keep each line under 100 characters (including spaces).
-
-## Code Style
-
- `snake_case` for variables and functions.
- `PascalCase` for classes.
- `UPPER_CASE` for constants.
-
-## Rules
-
- Use Pydantic v2 standard.
- Use `uv` for package management.
- Do not override dunder methods like `__init__`, `__iadd__`, etc.
- Never launch services (`uv run app.py`, `flask run`, etc.); running tests under `tests/` is allowed.
- Prefer simple functions over classes for lightweight helpers.
- Keep files below 800 lines; split when necessary.
- Keep code readable—no clever hacks.
- Never use `print`; log with `logger = logging.getLogger(__name__)`.
-
-## Guiding Principles
-
- Mirror the project’s layered architecture: controller → service → core/domain.
- Reuse existing helpers in `core/`, `services/`, and `libs/` before creating new abstractions.
- Optimise for observability: deterministic control flow, clear logging, actionable errors.
-
-## SQLAlchemy Patterns
-
- Models inherit from `models.base.Base`; never create ad-hoc metadata or engines.
-
- Open sessions with context managers:
-
-  ```python
-  from sqlalchemy.orm import Session
-
-  with Session(db.engine, expire_on_commit=False) as session:
-      stmt = select(Workflow).where(
-          Workflow.id == workflow_id,
-          Workflow.tenant_id == tenant_id,
-      )
-      workflow = session.execute(stmt).scalar_one_or_none()
-  ```
-
- Use SQLAlchemy expressions; avoid raw SQL unless necessary.
-
- Introduce repository abstractions only for very large tables (e.g., workflow executions) to support alternative storage strategies.
-
- Always scope queries by `tenant_id` and protect write paths with safeguards (`FOR UPDATE`, row counts, etc.).
-
-## Storage & External IO
-
- Access storage via `extensions.ext_storage.storage`.
- Use `core.helper.ssrf_proxy` for outbound HTTP fetches.
- Background tasks that touch storage must be idempotent and log the relevant object identifiers.
-
-## Pydantic Usage
-
- Define DTOs with Pydantic v2 models and forbid extras by default.
-
- Use `@field_validator` / `@model_validator` for domain rules.
-
- Example:
-
-  ```python
-  from pydantic import BaseModel, ConfigDict, HttpUrl, field_validator
-
-  class TriggerConfig(BaseModel):
-      endpoint: HttpUrl
-      secret: str
-
-      model_config = ConfigDict(extra="forbid")
-
-      @field_validator("secret")
-      def ensure_secret_prefix(cls, value: str) -> str:
-          if not value.startswith("dify_"):
-              raise ValueError("secret must start with dify_")
-          return value
-  ```
-
-## Generics & Protocols
-
- Use `typing.Protocol` to define behavioural contracts (e.g., cache interfaces).
- Apply generics (`TypeVar`, `Generic`) for reusable utilities like caches or providers.
- Validate dynamic inputs at runtime when generics cannot enforce safety alone.
-
-## Error Handling & Logging
-
- Raise domain-specific exceptions (`services/errors`, `core/errors`) and translate to HTTP responses in controllers.
- Declare `logger = logging.getLogger(__name__)` at module top.
- Include tenant/app/workflow identifiers in log context.
- Log retryable events at `warning`, terminal failures at `error`.
-
-## Tooling & Checks
-
- Format/lint: `uv run --project api --dev ruff format ./api` and `uv run --project api --dev ruff check --fix --unsafe-fixes ./api`.
- Type checks: `uv run --directory api --dev basedpyright`.
- Tests: `uv run --project api --dev dev/pytest/pytest_unit_tests.sh`.
- Run all of the above before submitting your work.
-
-## Controllers & Services
-
- Controllers: parse input via Pydantic, invoke services, return serialised responses; no business logic.
- Services: coordinate repositories, providers, background tasks; keep side effects explicit.
- Avoid repositories unless necessary; direct SQLAlchemy usage is preferred for typical tables.
- Document non-obvious behaviour with concise comments.
-
-## Miscellaneous
-
- Use `configs.dify_config` for configuration—never read environment variables directly.
- Maintain tenant awareness end-to-end; `tenant_id` must flow through every layer touching shared resources.
- Queue async work through `services/async_workflow_service`; implement tasks under `tasks/` with explicit queue selection.
- Keep experimental scripts under `dev/`; do not ship them in production builds.
--- a/api/controllers/console/app/generator.py
+++ b/api/controllers/console/app/generator.py
@@ -55,35 +55,6 @@ class InstructionTemplatePayload(BaseModel):
    type: str = Field(..., description="Instruction template type")


-class ContextGeneratePayload(BaseModel):
-    """Payload for generating extractor code node."""
-
-    workflow_id: str = Field(..., description="Workflow ID")
-    node_id: str = Field(..., description="Current tool/llm node ID")
-    parameter_name: str = Field(..., description="Parameter name to generate code for")
-    language: str = Field(default="python3", description="Code language (python3/javascript)")
-    prompt_messages: list[dict[str, Any]] = Field(
-        ..., description="Multi-turn conversation history, last message is the current instruction"
-    )
-    model_config_data: dict[str, Any] = Field(..., alias="model_config", description="Model configuration")
-
-
-class SuggestedQuestionsPayload(BaseModel):
-    """Payload for generating suggested questions."""
-
-    workflow_id: str = Field(..., description="Workflow ID")
-    node_id: str = Field(..., description="Current tool/llm node ID")
-    parameter_name: str = Field(..., description="Parameter name")
-    language: str = Field(
-        default="English", description="Language for generated questions (e.g. English, Chinese, Japanese)"
-    )
-    model_config_data: dict[str, Any] | None = Field(
-        default=None,
-        alias="model_config",
-        description="Model configuration (optional, uses system default if not provided)",
-    )
-
-
 def reg(cls: type[BaseModel]):
    console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))

@@ -93,8 +64,6 @@ reg(RuleCodeGeneratePayload)
 reg(RuleStructuredOutputPayload)
 reg(InstructionGeneratePayload)
 reg(InstructionTemplatePayload)
-reg(ContextGeneratePayload)
-reg(SuggestedQuestionsPayload)


@console_ns.route("/rule-generate")
@@ -309,74 +278,3 @@ class InstructionGenerationTemplateApi(Resource):
                return {"data": INSTRUCTION_GENERATE_TEMPLATE_CODE}
            case _:
                raise ValueError(f"Invalid type: {args.type}")
-
-
-@console_ns.route("/context-generate")
-class ContextGenerateApi(Resource):
-    @console_ns.doc("generate_with_context")
-    @console_ns.doc(description="Generate with multi-turn conversation context")
-    @console_ns.expect(console_ns.models[ContextGeneratePayload.__name__])
-    @console_ns.response(200, "Content generated successfully")
-    @console_ns.response(400, "Invalid request parameters or workflow not found")
-    @console_ns.response(402, "Provider quota exceeded")
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def post(self):
-        from core.llm_generator.utils import deserialize_prompt_messages
-
-        args = ContextGeneratePayload.model_validate(console_ns.payload)
-        _, current_tenant_id = current_account_with_tenant()
-
-        prompt_messages = deserialize_prompt_messages(args.prompt_messages)
-
-        try:
-            return LLMGenerator.generate_with_context(
-                tenant_id=current_tenant_id,
-                workflow_id=args.workflow_id,
-                node_id=args.node_id,
-                parameter_name=args.parameter_name,
-                language=args.language,
-                prompt_messages=prompt_messages,
-                model_config=args.model_config_data,
-            )
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        except QuotaExceededError:
-            raise ProviderQuotaExceededError()
-        except ModelCurrentlyNotSupportError:
-            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeError as e:
-            raise CompletionRequestError(e.description)
-
-
-@console_ns.route("/context-generate/suggested-questions")
-class SuggestedQuestionsApi(Resource):
-    @console_ns.doc("generate_suggested_questions")
-    @console_ns.doc(description="Generate suggested questions for context generation")
-    @console_ns.expect(console_ns.models[SuggestedQuestionsPayload.__name__])
-    @console_ns.response(200, "Questions generated successfully")
-    @setup_required
-    @login_required
-    @account_initialization_required
-    def post(self):
-        args = SuggestedQuestionsPayload.model_validate(console_ns.payload)
-        _, current_tenant_id = current_account_with_tenant()
-
-        try:
-            return LLMGenerator.generate_suggested_questions(
-                tenant_id=current_tenant_id,
-                workflow_id=args.workflow_id,
-                node_id=args.node_id,
-                parameter_name=args.parameter_name,
-                language=args.language,
-                model_config=args.model_config_data,
-            )
-        except ProviderTokenNotInitError as ex:
-            raise ProviderNotInitializeError(ex.description)
-        except QuotaExceededError:
-            raise ProviderQuotaExceededError()
-        except ModelCurrentlyNotSupportError:
-            raise ProviderModelCurrentlyNotSupportError()
-        except InvokeError as e:
-            raise CompletionRequestError(e.description)
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -146,6 +146,7 @@ class DatasetUpdatePayload(BaseModel):
    embedding_model: str | None = None
    embedding_model_provider: str | None = None
    retrieval_model: dict[str, Any] | None = None
+    summary_index_setting: dict[str, Any] | None = None
    partial_member_list: list[dict[str, str]] | None = None
    external_retrieval_model: dict[str, Any] | None = None
    external_knowledge_id: str | None = None
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -39,9 +39,10 @@ from fields.document_fields import (
 from libs.datetime_utils import naive_utc_now
 from libs.login import current_account_with_tenant, login_required
 from models import DatasetProcessRule, Document, DocumentSegment, UploadFile
-from models.dataset import DocumentPipelineExecutionLog
+from models.dataset import DocumentPipelineExecutionLog, DocumentSegmentSummary
 from services.dataset_service import DatasetService, DocumentService
 from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig, ProcessRule, RetrievalModel
+from tasks.generate_summary_index_task import generate_summary_index_task

 from ..app.error import (
    ProviderModelCurrentlyNotSupportError,
@@ -104,6 +105,10 @@ class DocumentRenamePayload(BaseModel):
    name: str


+class GenerateSummaryPayload(BaseModel):
+    document_list: list[str]
+
+
 class DocumentDatasetListParam(BaseModel):
    page: int = Field(1, title="Page", description="Page number.")
    limit: int = Field(20, title="Limit", description="Page size.")
@@ -120,6 +125,7 @@ register_schema_models(
    RetrievalModel,
    DocumentRetryPayload,
    DocumentRenamePayload,
+    GenerateSummaryPayload,
 )


@@ -306,6 +312,89 @@ class DatasetDocumentListApi(Resource):

        paginated_documents = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)
        documents = paginated_documents.items
+
+        # Check if dataset has summary index enabled
+        has_summary_index = dataset.summary_index_setting and dataset.summary_index_setting.get("enable") is True
+
+        # Filter documents that need summary calculation
+        documents_need_summary = [doc for doc in documents if doc.need_summary is True]
+        document_ids_need_summary = [str(doc.id) for doc in documents_need_summary]
+
+        # Calculate summary_index_status for documents that need summary (only if dataset summary index is enabled)
+        summary_status_map = {}
+        if has_summary_index and document_ids_need_summary:
+            # Get all segments for these documents (excluding qa_model and re_segment)
+            segments = (
+                db.session.query(DocumentSegment.id, DocumentSegment.document_id)
+                .where(
+                    DocumentSegment.document_id.in_(document_ids_need_summary),
+                    DocumentSegment.status != "re_segment",
+                    DocumentSegment.tenant_id == current_tenant_id,
+                )
+                .all()
+            )
+
+            # Group segments by document_id
+            document_segments_map = {}
+            for segment in segments:
+                doc_id = str(segment.document_id)
+                if doc_id not in document_segments_map:
+                    document_segments_map[doc_id] = []
+                document_segments_map[doc_id].append(segment.id)
+
+            # Get all summary records for these segments
+            all_segment_ids = [seg.id for seg in segments]
+            summaries = {}
+            if all_segment_ids:
+                summary_records = (
+                    db.session.query(DocumentSegmentSummary)
+                    .where(
+                        DocumentSegmentSummary.chunk_id.in_(all_segment_ids),
+                        DocumentSegmentSummary.dataset_id == dataset_id,
+                        DocumentSegmentSummary.enabled == True,  # Only count enabled summaries
+                    )
+                    .all()
+                )
+                summaries = {summary.chunk_id: summary.status for summary in summary_records}
+
+            # Calculate summary_index_status for each document
+            for doc_id in document_ids_need_summary:
+                segment_ids = document_segments_map.get(doc_id, [])
+                if not segment_ids:
+                    # No segments, status is None (not started)
+                    summary_status_map[doc_id] = None
+                    continue
+
+                # Count summary statuses for this document's segments
+                status_counts = {"completed": 0, "generating": 0, "error": 0, "not_started": 0}
+                for segment_id in segment_ids:
+                    status = summaries.get(segment_id, "not_started")
+                    if status in status_counts:
+                        status_counts[status] += 1
+                    else:
+                        status_counts["not_started"] += 1
+
+                generating_count = status_counts["generating"]
+
+                # Determine overall status:
+                # - "SUMMARIZING" only when task is queued and at least one summary is generating
+                # - None (empty) for all other cases (not queued, all completed/error)
+                if generating_count > 0:
+                    # Task is queued and at least one summary is still generating
+                    summary_status_map[doc_id] = "SUMMARIZING"
+                else:
+                    # Task not queued yet, or all summaries are completed/error (task finished)
+                    summary_status_map[doc_id] = None
+
+        # Add summary_index_status to each document
+        for document in documents:
+            if has_summary_index and document.need_summary is True:
+                # Get status from map, default to None (not queued yet)
+                document.summary_index_status = summary_status_map.get(str(document.id))
+            else:
+                # Return null if summary index is not enabled or document doesn't need summary
+                document.summary_index_status = None
+
        if fetch:
            for document in documents:
                completed_segments = (
@@ -791,6 +880,7 @@ class DocumentApi(DocumentResource):
                "display_status": document.display_status,
                "doc_form": document.doc_form,
                "doc_language": document.doc_language,
+                "need_summary": document.need_summary if document.need_summary is not None else False,
            }
        else:
            dataset_process_rules = DatasetService.get_process_rules(dataset_id)
@@ -826,6 +916,7 @@ class DocumentApi(DocumentResource):
                "display_status": document.display_status,
                "doc_form": document.doc_form,
                "doc_language": document.doc_language,
+                "need_summary": document.need_summary if document.need_summary is not None else False,
            }

        return response, 200
@@ -1193,3 +1284,216 @@ class DocumentPipelineExecutionLogApi(DocumentResource):
            "input_data": log.input_data,
            "datasource_node_id": log.datasource_node_id,
        }, 200
+
+
+@console_ns.route("/datasets/<uuid:dataset_id>/documents/generate-summary")
+class DocumentGenerateSummaryApi(Resource):
+    @console_ns.doc("generate_summary_for_documents")
+    @console_ns.doc(description="Generate summary index for documents")
+    @console_ns.doc(params={"dataset_id": "Dataset ID"})
+    @console_ns.expect(console_ns.models[GenerateSummaryPayload.__name__])
+    @console_ns.response(200, "Summary generation started successfully")
+    @console_ns.response(400, "Invalid request or dataset configuration")
+    @console_ns.response(403, "Permission denied")
+    @console_ns.response(404, "Dataset not found")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_rate_limit_check("knowledge")
+    def post(self, dataset_id):
+        """
+        Generate summary index for specified documents.
+
+        This endpoint checks if the dataset configuration supports summary generation
+        (indexing_technique must be 'high_quality' and summary_index_setting.enable must be true),
+        then asynchronously generates summary indexes for the provided documents.
+        """
+        current_user, _ = current_account_with_tenant()
+        dataset_id = str(dataset_id)
+
+        # Get dataset
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+
+        # Check permissions
+        if not current_user.is_dataset_editor:
+            raise Forbidden()
+
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+
+        # Validate request payload
+        payload = GenerateSummaryPayload.model_validate(console_ns.payload or {})
+        document_list = payload.document_list
+
+        if not document_list:
+            raise ValueError("document_list cannot be empty.")
+
+        # Check if dataset configuration supports summary generation
+        if dataset.indexing_technique != "high_quality":
+            raise ValueError(
+                f"Summary generation is only available for 'high_quality' indexing technique. "
+                f"Current indexing technique: {dataset.indexing_technique}"
+            )
+
+        summary_index_setting = dataset.summary_index_setting
+        if not summary_index_setting or not summary_index_setting.get("enable"):
+            raise ValueError("Summary index is not enabled for this dataset. Please enable it in the dataset settings.")
+
+        # Verify all documents exist and belong to the dataset
+        documents = (
+            db.session.query(Document)
+            .filter(
+                Document.id.in_(document_list),
+                Document.dataset_id == dataset_id,
+            )
+            .all()
+        )
+
+        if len(documents) != len(document_list):
+            found_ids = {doc.id for doc in documents}
+            missing_ids = set(document_list) - found_ids
+            raise NotFound(f"Some documents not found: {list(missing_ids)}")
+
+        # Dispatch async tasks for each document
+        for document in documents:
+            # Skip qa_model documents as they don't generate summaries
+            if document.doc_form == "qa_model":
+                logger.info("Skipping summary generation for qa_model document %s", document.id)
+                continue
+
+            # Dispatch async task
+            generate_summary_index_task(dataset_id, document.id)
+            logger.info(
+                "Dispatched summary generation task for document %s in dataset %s",
+                document.id,
+                dataset_id,
+            )
+
+        return {"result": "success"}, 200
+
+
+@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/summary-status")
+class DocumentSummaryStatusApi(DocumentResource):
+    @console_ns.doc("get_document_summary_status")
+    @console_ns.doc(description="Get summary index generation status for a document")
+    @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @console_ns.response(200, "Summary status retrieved successfully")
+    @console_ns.response(404, "Document not found")
+    @setup_required
+    @login_required
+    @account_initialization_required
+    def get(self, dataset_id, document_id):
+        """
+        Get summary index generation status for a document.
+
+        Returns:
+        - total_segments: Total number of segments in the document
+        - summary_status: Dictionary with status counts
+          - completed: Number of summaries completed
+          - generating: Number of summaries being generated
+          - error: Number of summaries with errors
+          - not_started: Number of segments without summary records
+        - summaries: List of summary records with status and content preview
+        """
+        current_user, _ = current_account_with_tenant()
+        dataset_id = str(dataset_id)
+        document_id = str(document_id)
+
+        # Get document
+        document = self.get_document(dataset_id, document_id)
+
+        # Get dataset
+        dataset = DatasetService.get_dataset(dataset_id)
+        if not dataset:
+            raise NotFound("Dataset not found.")
+
+        # Check permissions
+        try:
+            DatasetService.check_dataset_permission(dataset, current_user)
+        except services.errors.account.NoPermissionError as e:
+            raise Forbidden(str(e))
+
+        # Get all segments for this document
+        segments = (
+            db.session.query(DocumentSegment)
+            .filter(
+                DocumentSegment.document_id == document_id,
+                DocumentSegment.dataset_id == dataset_id,
+                DocumentSegment.status == "completed",
+                DocumentSegment.enabled == True,
+            )
+            .all()
+        )
+
+        total_segments = len(segments)
+
+        # Get all summary records for these segments
+        segment_ids = [segment.id for segment in segments]
+        summaries = []
+        if segment_ids:
+            summaries = (
+                db.session.query(DocumentSegmentSummary)
+                .filter(
+                    DocumentSegmentSummary.document_id == document_id,
+                    DocumentSegmentSummary.dataset_id == dataset_id,
+                    DocumentSegmentSummary.chunk_id.in_(segment_ids),
+                    DocumentSegmentSummary.enabled == True,  # Only return enabled summaries
+                )
+                .all()
+            )
+
+        # Create a mapping of chunk_id to summary
+        summary_map = {summary.chunk_id: summary for summary in summaries}
+
+        # Count statuses
+        status_counts = {
+            "completed": 0,
+            "generating": 0,
+            "error": 0,
+            "not_started": 0,
+        }
+
+        summary_list = []
+        for segment in segments:
+            summary = summary_map.get(segment.id)
+            if summary:
+                status = summary.status
+                status_counts[status] = status_counts.get(status, 0) + 1
+                summary_list.append(
+                    {
+                        "segment_id": segment.id,
+                        "segment_position": segment.position,
+                        "status": summary.status,
+                        "summary_preview": (
+                            summary.summary_content[:100] + "..."
+                            if summary.summary_content and len(summary.summary_content) > 100
+                            else summary.summary_content
+                        ),
+                        "error": summary.error,
+                        "created_at": int(summary.created_at.timestamp()) if summary.created_at else None,
+                        "updated_at": int(summary.updated_at.timestamp()) if summary.updated_at else None,
+                    }
+                )
+            else:
+                status_counts["not_started"] += 1
+                summary_list.append(
+                    {
+                        "segment_id": segment.id,
+                        "segment_position": segment.position,
+                        "status": "not_started",
+                        "summary_preview": None,
+                        "error": None,
+                        "created_at": None,
+                        "updated_at": None,
+                    }
+                )
+
+        return {
+            "total_segments": total_segments,
+            "summary_status": status_counts,
+            "summaries": summary_list,
+        }, 200
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@@ -32,7 +32,7 @@ from extensions.ext_redis import redis_client
 from fields.segment_fields import child_chunk_fields, segment_fields
 from libs.helper import escape_like_pattern
 from libs.login import current_account_with_tenant, login_required
-from models.dataset import ChildChunk, DocumentSegment
+from models.dataset import ChildChunk, DocumentSegment, DocumentSegmentSummary
 from models.model import UploadFile
 from services.dataset_service import DatasetService, DocumentService, SegmentService
 from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs
@@ -41,6 +41,23 @@ from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingS
 from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task


+def _get_segment_with_summary(segment, dataset_id):
+    """Helper function to marshal segment and add summary information."""
+    segment_dict = marshal(segment, segment_fields)
+    # Query summary for this segment (only enabled summaries)
+    summary = (
+        db.session.query(DocumentSegmentSummary)
+        .where(
+            DocumentSegmentSummary.chunk_id == segment.id,
+            DocumentSegmentSummary.dataset_id == dataset_id,
+            DocumentSegmentSummary.enabled == True,  # Only return enabled summaries
+        )
+        .first()
+    )
+    segment_dict["summary"] = summary.summary_content if summary else None
+    return segment_dict
+
+
 class SegmentListQuery(BaseModel):
    limit: int = Field(default=20, ge=1, le=100)
    status: list[str] = Field(default_factory=list)
@@ -63,6 +80,7 @@ class SegmentUpdatePayload(BaseModel):
    keywords: list[str] | None = None
    regenerate_child_chunks: bool = False
    attachment_ids: list[str] | None = None
+    summary: str | None = None  # Summary content for summary index


 class BatchImportPayload(BaseModel):
@@ -180,8 +198,32 @@ class DatasetDocumentSegmentListApi(Resource):

        segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)

+        # Query summaries for all segments in this page (batch query for efficiency)
+        segment_ids = [segment.id for segment in segments.items]
+        summaries = {}
+        if segment_ids:
+            summary_records = (
+                db.session.query(DocumentSegmentSummary)
+                .where(
+                    DocumentSegmentSummary.chunk_id.in_(segment_ids),
+                    DocumentSegmentSummary.dataset_id == dataset_id,
+                )
+                .all()
+            )
+            # Only include enabled summaries
+            summaries = {
+                summary.chunk_id: summary.summary_content for summary in summary_records if summary.enabled is True
+            }
+
+        # Add summary to each segment
+        segments_with_summary = []
+        for segment in segments.items:
+            segment_dict = marshal(segment, segment_fields)
+            segment_dict["summary"] = summaries.get(segment.id)
+            segments_with_summary.append(segment_dict)
+
        response = {
-            "data": marshal(segments.items, segment_fields),
+            "data": segments_with_summary,
            "limit": limit,
            "total": segments.total,
            "total_pages": segments.pages,
@@ -327,7 +369,7 @@ class DatasetDocumentSegmentAddApi(Resource):
        payload_dict = payload.model_dump(exclude_none=True)
        SegmentService.segment_create_args_validate(payload_dict, document)
        segment = SegmentService.create_segment(payload_dict, document, dataset)
-        return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
+        return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200


@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>")
@@ -389,10 +431,12 @@ class DatasetDocumentSegmentUpdateApi(Resource):
        payload = SegmentUpdatePayload.model_validate(console_ns.payload or {})
        payload_dict = payload.model_dump(exclude_none=True)
        SegmentService.segment_create_args_validate(payload_dict, document)
+
+        # Update segment (summary update with change detection is handled in SegmentService.update_segment)
        segment = SegmentService.update_segment(
            SegmentUpdateArgs.model_validate(payload.model_dump(exclude_none=True)), segment, document, dataset
        )
-        return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
+        return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200

    @setup_required
    @login_required
--- a/api/controllers/console/datasets/hit_testing.py
+++ b/api/controllers/console/datasets/hit_testing.py
@@ -1,6 +1,13 @@
-from flask_restx import Resource
+from flask_restx import Resource, fields

 from controllers.common.schema import register_schema_model
+from fields.hit_testing_fields import (
+    child_chunk_fields,
+    document_fields,
+    files_fields,
+    hit_testing_record_fields,
+    segment_fields,
+)
 from libs.login import login_required

 from .. import console_ns
@@ -14,13 +21,45 @@ from ..wraps import (
 register_schema_model(console_ns, HitTestingPayload)


+def _get_or_create_model(model_name: str, field_def):
+    """Get or create a flask_restx model to avoid dict type issues in Swagger."""
+    existing = console_ns.models.get(model_name)
+    if existing is None:
+        existing = console_ns.model(model_name, field_def)
+    return existing
+
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+document_model = _get_or_create_model("HitTestingDocument", document_fields)
+
+segment_fields_copy = segment_fields.copy()
+segment_fields_copy["document"] = fields.Nested(document_model)
+segment_model = _get_or_create_model("HitTestingSegment", segment_fields_copy)
+
+child_chunk_model = _get_or_create_model("HitTestingChildChunk", child_chunk_fields)
+files_model = _get_or_create_model("HitTestingFile", files_fields)
+
+hit_testing_record_fields_copy = hit_testing_record_fields.copy()
+hit_testing_record_fields_copy["segment"] = fields.Nested(segment_model)
+hit_testing_record_fields_copy["child_chunks"] = fields.List(fields.Nested(child_chunk_model))
+hit_testing_record_fields_copy["files"] = fields.List(fields.Nested(files_model))
+hit_testing_record_model = _get_or_create_model("HitTestingRecord", hit_testing_record_fields_copy)
+
+# Response model for hit testing API
+hit_testing_response_fields = {
+    "query": fields.String,
+    "records": fields.List(fields.Nested(hit_testing_record_model)),
+}
+hit_testing_response_model = _get_or_create_model("HitTestingResponse", hit_testing_response_fields)
+
+
@console_ns.route("/datasets/<uuid:dataset_id>/hit-testing")
 class HitTestingApi(Resource, DatasetsHitTestingBase):
    @console_ns.doc("test_dataset_retrieval")
    @console_ns.doc(description="Test dataset knowledge retrieval")
    @console_ns.doc(params={"dataset_id": "Dataset ID"})
    @console_ns.expect(console_ns.models[HitTestingPayload.__name__])
-    @console_ns.response(200, "Hit testing completed successfully")
+    @console_ns.response(200, "Hit testing completed successfully", model=hit_testing_response_model)
    @console_ns.response(404, "Dataset not found")
    @console_ns.response(400, "Invalid parameters")
    @setup_required
--- a/api/controllers/console/tag/tags.py
+++ b/api/controllers/console/tag/tags.py
@@ -30,6 +30,11 @@ class TagBindingRemovePayload(BaseModel):
    type: Literal["knowledge", "app"] | None = Field(default=None, description="Tag type")


+class TagListQueryParam(BaseModel):
+    type: Literal["knowledge", "app", ""] = Field("", description="Tag type filter")
+    keyword: str | None = Field(None, description="Search keyword")
+
+
 register_schema_models(
    console_ns,
    TagBasePayload,
@@ -43,12 +48,15 @@ class TagListApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
+    @console_ns.doc(
+        params={"type": 'Tag type filter. Can be "knowledge" or "app".', "keyword": "Search keyword for tag name."}
+    )
    @marshal_with(dataset_tag_fields)
    def get(self):
        _, current_tenant_id = current_account_with_tenant()
-        tag_type = request.args.get("type", type=str, default="")
-        keyword = request.args.get("keyword", default=None, type=str)
-        tags = TagService.get_tags(tag_type, current_tenant_id, keyword)
+        raw_args = request.args.to_dict()
+        param = TagListQueryParam.model_validate(raw_args)
+        tags = TagService.get_tags(param.type, current_tenant_id, param.keyword)

        return tags, 200

--- a/api/core/app/apps/advanced_chat/generate_response_converter.py
+++ b/api/core/app/apps/advanced_chat/generate_response_converter.py
@@ -82,7 +82,7 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))
            yield response_chunk

    @classmethod
@@ -110,7 +110,7 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter):
            }

            if isinstance(sub_stream_response, MessageEndStreamResponse):
-                sub_stream_response_dict = sub_stream_response.model_dump(mode="json", exclude_none=True)
+                sub_stream_response_dict = sub_stream_response.model_dump(mode="json")
                metadata = sub_stream_response_dict.get("metadata", {})
                sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata)
                response_chunk.update(sub_stream_response_dict)
@@ -120,6 +120,6 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter):
            elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
                response_chunk.update(sub_stream_response.to_ignore_detail_dict())
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))

            yield response_chunk
--- a/api/core/app/apps/agent_chat/generate_response_converter.py
+++ b/api/core/app/apps/agent_chat/generate_response_converter.py
@@ -81,7 +81,7 @@ class AgentChatAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))
            yield response_chunk

    @classmethod
@@ -109,7 +109,7 @@ class AgentChatAppGenerateResponseConverter(AppGenerateResponseConverter):
            }

            if isinstance(sub_stream_response, MessageEndStreamResponse):
-                sub_stream_response_dict = sub_stream_response.model_dump(mode="json", exclude_none=True)
+                sub_stream_response_dict = sub_stream_response.model_dump(mode="json")
                metadata = sub_stream_response_dict.get("metadata", {})
                sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata)
                response_chunk.update(sub_stream_response_dict)
@@ -117,6 +117,6 @@ class AgentChatAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))

            yield response_chunk
--- a/api/core/app/apps/chat/generate_response_converter.py
+++ b/api/core/app/apps/chat/generate_response_converter.py
@@ -81,7 +81,7 @@ class ChatAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))
            yield response_chunk

    @classmethod
@@ -109,7 +109,7 @@ class ChatAppGenerateResponseConverter(AppGenerateResponseConverter):
            }

            if isinstance(sub_stream_response, MessageEndStreamResponse):
-                sub_stream_response_dict = sub_stream_response.model_dump(mode="json", exclude_none=True)
+                sub_stream_response_dict = sub_stream_response.model_dump(mode="json")
                metadata = sub_stream_response_dict.get("metadata", {})
                sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata)
                response_chunk.update(sub_stream_response_dict)
@@ -117,6 +117,6 @@ class ChatAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))

            yield response_chunk
--- a/api/core/app/apps/common/workflow_response_converter.py
+++ b/api/core/app/apps/common/workflow_response_converter.py
@@ -70,8 +70,6 @@ class _NodeSnapshot:
    """Empty string means the node is not executing inside an iteration."""
    loop_id: str = ""
    """Empty string means the node is not executing inside a loop."""
-    mention_parent_id: str = ""
-    """Empty string means the node is not an extractor node."""


 class WorkflowResponseConverter:
@@ -133,7 +131,6 @@ class WorkflowResponseConverter:
            start_at=event.start_at,
            iteration_id=event.in_iteration_id or "",
            loop_id=event.in_loop_id or "",
-            mention_parent_id=event.in_mention_parent_id or "",
        )
        node_execution_id = NodeExecutionId(event.node_execution_id)
        self._node_snapshots[node_execution_id] = snapshot
@@ -290,7 +287,6 @@ class WorkflowResponseConverter:
                created_at=int(snapshot.start_at.timestamp()),
                iteration_id=event.in_iteration_id,
                loop_id=event.in_loop_id,
-                mention_parent_id=event.in_mention_parent_id,
                agent_strategy=event.agent_strategy,
            ),
        )
@@ -377,7 +373,6 @@ class WorkflowResponseConverter:
                files=self.fetch_files_from_node_outputs(event.outputs or {}),
                iteration_id=event.in_iteration_id,
                loop_id=event.in_loop_id,
-                mention_parent_id=event.in_mention_parent_id,
            ),
        )

@@ -427,7 +422,6 @@ class WorkflowResponseConverter:
                files=self.fetch_files_from_node_outputs(event.outputs or {}),
                iteration_id=event.in_iteration_id,
                loop_id=event.in_loop_id,
-                mention_parent_id=event.in_mention_parent_id,
                retry_index=event.retry_index,
            ),
        )
--- a/api/core/app/apps/completion/generate_response_converter.py
+++ b/api/core/app/apps/completion/generate_response_converter.py
@@ -79,7 +79,7 @@ class CompletionAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))
            yield response_chunk

    @classmethod
@@ -106,7 +106,7 @@ class CompletionAppGenerateResponseConverter(AppGenerateResponseConverter):
            }

            if isinstance(sub_stream_response, MessageEndStreamResponse):
-                sub_stream_response_dict = sub_stream_response.model_dump(mode="json", exclude_none=True)
+                sub_stream_response_dict = sub_stream_response.model_dump(mode="json")
                metadata = sub_stream_response_dict.get("metadata", {})
                if not isinstance(metadata, dict):
                    metadata = {}
@@ -116,6 +116,6 @@ class CompletionAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))

            yield response_chunk
--- a/api/core/app/apps/pipeline/generate_response_converter.py
+++ b/api/core/app/apps/pipeline/generate_response_converter.py
@@ -60,7 +60,7 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(cast(dict, data))
            else:
-                response_chunk.update(sub_stream_response.model_dump(exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump())
            yield response_chunk

    @classmethod
@@ -91,5 +91,5 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
            elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
                response_chunk.update(cast(dict, sub_stream_response.to_ignore_detail_dict()))
            else:
-                response_chunk.update(sub_stream_response.model_dump(exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump())
            yield response_chunk
--- a/api/core/app/apps/workflow/generate_response_converter.py
+++ b/api/core/app/apps/workflow/generate_response_converter.py
@@ -60,7 +60,7 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
                data = cls._error_to_stream_response(sub_stream_response.err)
                response_chunk.update(data)
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))
            yield response_chunk

    @classmethod
@@ -91,5 +91,5 @@ class WorkflowAppGenerateResponseConverter(AppGenerateResponseConverter):
            elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse):
                response_chunk.update(sub_stream_response.to_ignore_detail_dict())
            else:
-                response_chunk.update(sub_stream_response.model_dump(mode="json", exclude_none=True))
+                response_chunk.update(sub_stream_response.model_dump(mode="json"))
            yield response_chunk
--- a/api/core/app/apps/workflow_app_runner.py
+++ b/api/core/app/apps/workflow_app_runner.py
@@ -385,7 +385,6 @@ class WorkflowBasedAppRunner:
                    start_at=event.start_at,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    in_mention_parent_id=event.in_mention_parent_id,
                    inputs=inputs,
                    process_data=process_data,
                    outputs=outputs,
@@ -406,7 +405,6 @@ class WorkflowBasedAppRunner:
                    start_at=event.start_at,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    in_mention_parent_id=event.in_mention_parent_id,
                    agent_strategy=event.agent_strategy,
                    provider_type=event.provider_type,
                    provider_id=event.provider_id,
@@ -430,7 +428,6 @@ class WorkflowBasedAppRunner:
                    execution_metadata=execution_metadata,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    in_mention_parent_id=event.in_mention_parent_id,
                )
            )
        elif isinstance(event, NodeRunFailedEvent):
@@ -447,7 +444,6 @@ class WorkflowBasedAppRunner:
                    execution_metadata=event.node_run_result.metadata,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    in_mention_parent_id=event.in_mention_parent_id,
                )
            )
        elif isinstance(event, NodeRunExceptionEvent):
@@ -464,7 +460,6 @@ class WorkflowBasedAppRunner:
                    execution_metadata=event.node_run_result.metadata,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    in_mention_parent_id=event.in_mention_parent_id,
                )
            )
        elif isinstance(event, NodeRunStreamChunkEvent):
@@ -474,7 +469,6 @@ class WorkflowBasedAppRunner:
                    from_variable_selector=list(event.selector),
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    in_mention_parent_id=event.in_mention_parent_id,
                )
            )
        elif isinstance(event, NodeRunRetrieverResourceEvent):
@@ -483,7 +477,6 @@ class WorkflowBasedAppRunner:
                    retriever_resources=event.retriever_resources,
                    in_iteration_id=event.in_iteration_id,
                    in_loop_id=event.in_loop_id,
-                    in_mention_parent_id=event.in_mention_parent_id,
                )
            )
        elif isinstance(event, NodeRunAgentLogEvent):
--- a/api/core/app/entities/queue_entities.py
+++ b/api/core/app/entities/queue_entities.py
@@ -190,8 +190,6 @@ class QueueTextChunkEvent(AppQueueEvent):
    """iteration id if node is in iteration"""
    in_loop_id: str | None = None
    """loop id if node is in loop"""
-    in_mention_parent_id: str | None = None
-    """parent node id if this is an extractor node event"""


 class QueueAgentMessageEvent(AppQueueEvent):
@@ -231,8 +229,6 @@ class QueueRetrieverResourcesEvent(AppQueueEvent):
    """iteration id if node is in iteration"""
    in_loop_id: str | None = None
    """loop id if node is in loop"""
-    in_mention_parent_id: str | None = None
-    """parent node id if this is an extractor node event"""


 class QueueAnnotationReplyEvent(AppQueueEvent):
@@ -310,8 +306,6 @@ class QueueNodeStartedEvent(AppQueueEvent):
    node_run_index: int = 1  # FIXME(-LAN-): may not used
    in_iteration_id: str | None = None
    in_loop_id: str | None = None
-    in_mention_parent_id: str | None = None
-    """parent node id if this is an extractor node event"""
    start_at: datetime
    agent_strategy: AgentNodeStrategyInit | None = None

@@ -334,8 +328,6 @@ class QueueNodeSucceededEvent(AppQueueEvent):
    """iteration id if node is in iteration"""
    in_loop_id: str | None = None
    """loop id if node is in loop"""
-    in_mention_parent_id: str | None = None
-    """parent node id if this is an extractor node event"""
    start_at: datetime

    inputs: Mapping[str, object] = Field(default_factory=dict)
@@ -391,8 +383,6 @@ class QueueNodeExceptionEvent(AppQueueEvent):
    """iteration id if node is in iteration"""
    in_loop_id: str | None = None
    """loop id if node is in loop"""
-    in_mention_parent_id: str | None = None
-    """parent node id if this is an extractor node event"""
    start_at: datetime

    inputs: Mapping[str, object] = Field(default_factory=dict)
@@ -417,8 +407,6 @@ class QueueNodeFailedEvent(AppQueueEvent):
    """iteration id if node is in iteration"""
    in_loop_id: str | None = None
    """loop id if node is in loop"""
-    in_mention_parent_id: str | None = None
-    """parent node id if this is an extractor node event"""
    start_at: datetime

    inputs: Mapping[str, object] = Field(default_factory=dict)
--- a/api/core/app/entities/task_entities.py
+++ b/api/core/app/entities/task_entities.py
@@ -262,7 +262,6 @@ class NodeStartStreamResponse(StreamResponse):
        extras: dict[str, object] = Field(default_factory=dict)
        iteration_id: str | None = None
        loop_id: str | None = None
-        mention_parent_id: str | None = None
        agent_strategy: AgentNodeStrategyInit | None = None

    event: StreamEvent = StreamEvent.NODE_STARTED
@@ -286,7 +285,6 @@ class NodeStartStreamResponse(StreamResponse):
                "extras": {},
                "iteration_id": self.data.iteration_id,
                "loop_id": self.data.loop_id,
-                "mention_parent_id": self.data.mention_parent_id,
            },
        }

@@ -322,7 +320,6 @@ class NodeFinishStreamResponse(StreamResponse):
        files: Sequence[Mapping[str, Any]] | None = []
        iteration_id: str | None = None
        loop_id: str | None = None
-        mention_parent_id: str | None = None

    event: StreamEvent = StreamEvent.NODE_FINISHED
    workflow_run_id: str
@@ -352,7 +349,6 @@ class NodeFinishStreamResponse(StreamResponse):
                "files": [],
                "iteration_id": self.data.iteration_id,
                "loop_id": self.data.loop_id,
-                "mention_parent_id": self.data.mention_parent_id,
            },
        }

@@ -388,7 +384,6 @@ class NodeRetryStreamResponse(StreamResponse):
        files: Sequence[Mapping[str, Any]] | None = []
        iteration_id: str | None = None
        loop_id: str | None = None
-        mention_parent_id: str | None = None
        retry_index: int = 0

    event: StreamEvent = StreamEvent.NODE_RETRY
@@ -419,7 +414,6 @@ class NodeRetryStreamResponse(StreamResponse):
                "files": [],
                "iteration_id": self.data.iteration_id,
                "loop_id": self.data.loop_id,
-                "mention_parent_id": self.data.mention_parent_id,
                "retry_index": self.data.retry_index,
            },
        }
--- a/api/core/entities/knowledge_entities.py
+++ b/api/core/entities/knowledge_entities.py
@@ -3,6 +3,7 @@ from pydantic import BaseModel, Field, field_validator

 class PreviewDetail(BaseModel):
    content: str
+    summary: str | None = None
    child_chunks: list[str] | None = None


--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -311,14 +311,18 @@ class IndexingRunner:
        qa_preview_texts: list[QAPreviewDetail] = []

        total_segments = 0
+        # doc_form represents the segmentation method (general, parent-child, QA)
        index_type = doc_form
        index_processor = IndexProcessorFactory(index_type).init_index_processor()
+        # one extract_setting is one source document
        for extract_setting in extract_settings:
            # extract
            processing_rule = DatasetProcessRule(
                mode=tmp_processing_rule["mode"], rules=json.dumps(tmp_processing_rule["rules"])
            )
+            # Extract document content
            text_docs = index_processor.extract(extract_setting, process_rule_mode=tmp_processing_rule["mode"])
+            # Cleaning and segmentation
            documents = index_processor.transform(
                text_docs,
                current_user=None,
@@ -361,6 +365,12 @@ class IndexingRunner:

        if doc_form and doc_form == "qa_model":
            return IndexingEstimate(total_segments=total_segments * 20, qa_preview=qa_preview_texts, preview=[])
+
+        # Generate summary preview
+        summary_index_setting = tmp_processing_rule.get("summary_index_setting")
+        if summary_index_setting and summary_index_setting.get("enable") and preview_texts:
+            preview_texts = index_processor.generate_summary_preview(tenant_id, preview_texts, summary_index_setting)
+
        return IndexingEstimate(total_segments=total_segments, preview=preview_texts)

    def _extract(
--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@@ -1,8 +1,8 @@
 import json
 import logging
 import re
-from collections.abc import Mapping, Sequence
-from typing import Any, Protocol, cast
+from collections.abc import Sequence
+from typing import Protocol, cast

 import json_repair

@@ -71,7 +71,7 @@ class LLMGenerator:
            response: LLMResult = model_instance.invoke_llm(
                prompt_messages=list(prompts), model_parameters={"max_tokens": 500, "temperature": 1}, stream=False
            )
-        answer = cast(str, response.message.content)
+        answer = response.message.get_text_content()
        if answer is None:
            return ""
        try:
@@ -113,9 +113,11 @@ class LLMGenerator:
        output_parser = SuggestedQuestionsAfterAnswerOutputParser()
        format_instructions = output_parser.get_format_instructions()

-        prompt_template = PromptTemplateParser(template="{{histories}}\n{{format_instructions}}\nquestions:\n")
+        prompt_template = PromptTemplateParser(
+            template="{{histories}}\n{{format_instructions}}\nquestions:\n")

-        prompt = prompt_template.format({"histories": histories, "format_instructions": format_instructions})
+        prompt = prompt_template.format(
+            {"histories": histories, "format_instructions": format_instructions})

        try:
            model_manager = ModelManager()
@@ -141,11 +143,13 @@ class LLMGenerator:
            )

            text_content = response.message.get_text_content()
-            questions = output_parser.parse(text_content) if text_content else []
+            questions = output_parser.parse(
+                text_content) if text_content else []
        except InvokeError:
            questions = []
        except Exception:
-            logger.exception("Failed to generate suggested questions after answer")
+            logger.exception(
+                "Failed to generate suggested questions after answer")
            questions = []

        return questions
@@ -156,10 +160,12 @@ class LLMGenerator:

        error = ""
        error_step = ""
-        rule_config = {"prompt": "", "variables": [], "opening_statement": "", "error": ""}
+        rule_config = {"prompt": "", "variables": [],
+                       "opening_statement": "", "error": ""}
        model_parameters = model_config.get("completion_params", {})
        if no_variable:
-            prompt_template = PromptTemplateParser(WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE)
+            prompt_template = PromptTemplateParser(
+                WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE)

            prompt_generate = prompt_template.format(
                inputs={
@@ -184,13 +190,14 @@ class LLMGenerator:
                    prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
                )

-                rule_config["prompt"] = cast(str, response.message.content)
+                rule_config["prompt"] = response.message.get_text_content()

            except InvokeError as e:
                error = str(e)
                error_step = "generate rule config"
            except Exception as e:
-                logger.exception("Failed to generate rule config, model: %s", model_config.get("name"))
+                logger.exception(
+                    "Failed to generate rule config, model: %s", model_config.get("name"))
                rule_config["error"] = str(e)

            rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
@@ -237,33 +244,34 @@ class LLMGenerator:

                return rule_config

-            rule_config["prompt"] = cast(str, prompt_content.message.content)
+            rule_config["prompt"] = prompt_content.message.get_text_content()

-            if not isinstance(prompt_content.message.content, str):
-                raise NotImplementedError("prompt content is not a string")
            parameter_generate_prompt = parameter_template.format(
                inputs={
-                    "INPUT_TEXT": prompt_content.message.content,
+                    "INPUT_TEXT": prompt_content.message.get_text_content(),
                },
                remove_template_variables=False,
            )
-            parameter_messages = [UserPromptMessage(content=parameter_generate_prompt)]
+            parameter_messages = [UserPromptMessage(
+                content=parameter_generate_prompt)]

            # the second step to generate the task_parameter and task_statement
            statement_generate_prompt = statement_template.format(
                inputs={
                    "TASK_DESCRIPTION": instruction,
-                    "INPUT_TEXT": prompt_content.message.content,
+                    "INPUT_TEXT": prompt_content.message.get_text_content(),
                },
                remove_template_variables=False,
            )
-            statement_messages = [UserPromptMessage(content=statement_generate_prompt)]
+            statement_messages = [UserPromptMessage(
+                content=statement_generate_prompt)]

            try:
                parameter_content: LLMResult = model_instance.invoke_llm(
                    prompt_messages=list(parameter_messages), model_parameters=model_parameters, stream=False
                )
-                rule_config["variables"] = re.findall(r'"\s*([^"]+)\s*"', cast(str, parameter_content.message.content))
+                rule_config["variables"] = re.findall(
+                    r'"\s*([^"]+)\s*"', parameter_content.message.get_text_content())
            except InvokeError as e:
                error = str(e)
                error_step = "generate variables"
@@ -272,13 +280,15 @@ class LLMGenerator:
                statement_content: LLMResult = model_instance.invoke_llm(
                    prompt_messages=list(statement_messages), model_parameters=model_parameters, stream=False
                )
-                rule_config["opening_statement"] = cast(str, statement_content.message.content)
+                rule_config["opening_statement"] = statement_content.message.get_text_content(
+                )
            except InvokeError as e:
                error = str(e)
                error_step = "generate conversation opener"

        except Exception as e:
-            logger.exception("Failed to generate rule config, model: %s", model_config.get("name"))
+            logger.exception(
+                "Failed to generate rule config, model: %s", model_config.get("name"))
            rule_config["error"] = str(e)

        rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
@@ -288,9 +298,11 @@ class LLMGenerator:
    @classmethod
    def generate_code(cls, tenant_id: str, instruction: str, model_config: dict, code_language: str = "javascript"):
        if code_language == "python":
-            prompt_template = PromptTemplateParser(PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE)
+            prompt_template = PromptTemplateParser(
+                PYTHON_CODE_GENERATOR_PROMPT_TEMPLATE)
        else:
-            prompt_template = PromptTemplateParser(JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE)
+            prompt_template = PromptTemplateParser(
+                JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE)

        prompt = prompt_template.format(
            inputs={
@@ -315,7 +327,7 @@ class LLMGenerator:
                prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
            )

-            generated_code = cast(str, response.message.content)
+            generated_code = response.message.get_text_content()
            return {"code": generated_code, "language": code_language, "error": ""}

        except InvokeError as e:
@@ -323,7 +335,8 @@ class LLMGenerator:
            return {"code": "", "language": code_language, "error": f"Failed to generate code. Error: {error}"}
        except Exception as e:
            logger.exception(
-                "Failed to invoke LLM model, model: %s, language: %s", model_config.get("name"), code_language
+                "Failed to invoke LLM model, model: %s, language: %s", model_config.get(
+                    "name"), code_language
            )
            return {"code": "", "language": code_language, "error": f"An unexpected error occurred: {str(e)}"}

@@ -337,7 +350,8 @@ class LLMGenerator:
            model_type=ModelType.LLM,
        )

-        prompt_messages: list[PromptMessage] = [SystemPromptMessage(content=prompt), UserPromptMessage(content=query)]
+        prompt_messages: list[PromptMessage] = [SystemPromptMessage(
+            content=prompt), UserPromptMessage(content=query)]

        # Explicitly use the non-streaming overload
        result = model_instance.invoke_llm(
@@ -351,7 +365,7 @@ class LLMGenerator:
            raise TypeError("Expected LLMResult when stream=False")
        response = result

-        answer = cast(str, response.message.content)
+        answer = response.message.get_text_content()
        return answer.strip()

    @classmethod
@@ -375,10 +389,7 @@ class LLMGenerator:
                prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
            )

-            raw_content = response.message.content
-
-            if not isinstance(raw_content, str):
-                raise ValueError(f"LLM response content must be a string, got: {type(raw_content)}")
+            raw_content = response.message.get_text_content()

            try:
                parsed_content = json.loads(raw_content)
@@ -386,506 +397,28 @@ class LLMGenerator:
                parsed_content = json_repair.loads(raw_content)

            if not isinstance(parsed_content, dict | list):
-                raise ValueError(f"Failed to parse structured output from llm: {raw_content}")
+                raise ValueError(
+                    f"Failed to parse structured output from llm: {raw_content}")

-            generated_json_schema = json.dumps(parsed_content, indent=2, ensure_ascii=False)
+            generated_json_schema = json.dumps(
+                parsed_content, indent=2, ensure_ascii=False)
            return {"output": generated_json_schema, "error": ""}

        except InvokeError as e:
            error = str(e)
            return {"output": "", "error": f"Failed to generate JSON Schema. Error: {error}"}
        except Exception as e:
-            logger.exception("Failed to invoke LLM model, model: %s", model_config.get("name"))
+            logger.exception(
+                "Failed to invoke LLM model, model: %s", model_config.get("name"))
            return {"output": "", "error": f"An unexpected error occurred: {str(e)}"}

-    @classmethod
-    def generate_with_context(
-        cls,
-        tenant_id: str,
-        workflow_id: str,
-        node_id: str,
-        parameter_name: str,
-        language: str,
-        prompt_messages: list[PromptMessage],
-        model_config: dict,
-    ) -> dict:
-        """
-        Generate extractor code node based on conversation context.
-
-        Args:
-            tenant_id: Tenant/workspace ID
-            workflow_id: Workflow ID
-            node_id: Current tool/llm node ID
-            parameter_name: Parameter name to generate code for
-            language: Code language (python3/javascript)
-            prompt_messages: Multi-turn conversation history (last message is instruction)
-            model_config: Model configuration (provider, name, completion_params)
-
-        Returns:
-            dict with CodeNodeData format:
-            - variables: Input variable selectors
-            - code_language: Code language
-            - code: Generated code
-            - outputs: Output definitions
-            - message: Explanation
-            - error: Error message if any
-        """
-        from sqlalchemy import select
-        from sqlalchemy.orm import Session
-
-        from services.workflow_service import WorkflowService
-
-        # Get workflow
-        with Session(db.engine) as session:
-            stmt = select(App).where(App.id == workflow_id)
-            app = session.scalar(stmt)
-            if not app:
-                return cls._error_response(f"App {workflow_id} not found")
-
-            workflow = WorkflowService().get_draft_workflow(app_model=app)
-            if not workflow:
-                return cls._error_response(f"Workflow for app {workflow_id} not found")
-
-        # Get upstream nodes via edge backtracking
-        upstream_nodes = cls._get_upstream_nodes(workflow.graph_dict, node_id)
-
-        # Get current node info
-        current_node = cls._get_node_by_id(workflow.graph_dict, node_id)
-        if not current_node:
-            return cls._error_response(f"Node {node_id} not found")
-
-        # Get parameter info
-        parameter_info = cls._get_parameter_info(
-            tenant_id=tenant_id,
-            node_data=current_node.get("data", {}),
-            parameter_name=parameter_name,
-        )
-
-        # Build system prompt
-        system_prompt = cls._build_extractor_system_prompt(
-            upstream_nodes=upstream_nodes,
-            current_node=current_node,
-            parameter_info=parameter_info,
-            language=language,
-        )
-
-        # Construct complete prompt_messages with system prompt
-        complete_messages: list[PromptMessage] = [
-            SystemPromptMessage(content=system_prompt),
-            *prompt_messages,
-        ]
-
-        from core.llm_generator.output_parser.structured_output import invoke_llm_with_structured_output
-
-        # Get model instance and schema
-        provider = model_config.get("provider", "")
-        model_name = model_config.get("name", "")
-        model_instance = ModelManager().get_model_instance(
-            tenant_id=tenant_id,
-            model_type=ModelType.LLM,
-            provider=provider,
-            model=model_name,
-        )
-
-        model_schema = model_instance.model_type_instance.get_model_schema(model_name, model_instance.credentials)
-        if not model_schema:
-            return cls._error_response(f"Model schema not found for {model_name}")
-
-        model_parameters = model_config.get("completion_params", {})
-        json_schema = cls._get_code_node_json_schema()
-
-        try:
-            response = invoke_llm_with_structured_output(
-                provider=provider,
-                model_schema=model_schema,
-                model_instance=model_instance,
-                prompt_messages=complete_messages,
-                json_schema=json_schema,
-                model_parameters=model_parameters,
-                stream=False,
-                tenant_id=tenant_id,
-            )
-
-            return cls._parse_code_node_output(
-                response.structured_output, language, parameter_info.get("type", "string")
-            )
-
-        except InvokeError as e:
-            return cls._error_response(str(e))
-        except Exception as e:
-            logger.exception("Failed to generate with context, model: %s", model_config.get("name"))
-            return cls._error_response(f"An unexpected error occurred: {str(e)}")
-
-    @classmethod
-    def _error_response(cls, error: str) -> dict:
-        """Return error response in CodeNodeData format."""
-        return {
-            "variables": [],
-            "code_language": "python3",
-            "code": "",
-            "outputs": {},
-            "message": "",
-            "error": error,
-        }
-
-    @classmethod
-    def generate_suggested_questions(
-        cls,
-        tenant_id: str,
-        workflow_id: str,
-        node_id: str,
-        parameter_name: str,
-        language: str,
-        model_config: dict | None = None,
-    ) -> dict:
-        """
-        Generate suggested questions for context generation.
-
-        Returns dict with questions array and error field.
-        """
-        from sqlalchemy import select
-        from sqlalchemy.orm import Session
-
-        from core.llm_generator.output_parser.structured_output import invoke_llm_with_structured_output
-        from services.workflow_service import WorkflowService
-
-        # Get workflow context (reuse existing logic)
-        with Session(db.engine) as session:
-            stmt = select(App).where(App.id == workflow_id)
-            app = session.scalar(stmt)
-            if not app:
-                return {"questions": [], "error": f"App {workflow_id} not found"}
-
-            workflow = WorkflowService().get_draft_workflow(app_model=app)
-            if not workflow:
-                return {"questions": [], "error": f"Workflow for app {workflow_id} not found"}
-
-        upstream_nodes = cls._get_upstream_nodes(workflow.graph_dict, node_id)
-        current_node = cls._get_node_by_id(workflow.graph_dict, node_id)
-        if not current_node:
-            return {"questions": [], "error": f"Node {node_id} not found"}
-
-        parameter_info = cls._get_parameter_info(
-            tenant_id=tenant_id,
-            node_data=current_node.get("data", {}),
-            parameter_name=parameter_name,
-        )
-
-        # Build prompt
-        system_prompt = cls._build_suggested_questions_prompt(
-            upstream_nodes=upstream_nodes,
-            current_node=current_node,
-            parameter_info=parameter_info,
-            language=language,
-        )
-
-        prompt_messages: list[PromptMessage] = [
-            SystemPromptMessage(content=system_prompt),
-        ]
-
-        # Get model instance - use default if model_config not provided
-        model_manager = ModelManager()
-        if model_config:
-            provider = model_config.get("provider", "")
-            model_name = model_config.get("name", "")
-            model_instance = model_manager.get_model_instance(
-                tenant_id=tenant_id,
-                model_type=ModelType.LLM,
-                provider=provider,
-                model=model_name,
-            )
-        else:
-            model_instance = model_manager.get_default_model_instance(
-                tenant_id=tenant_id,
-                model_type=ModelType.LLM,
-            )
-            model_name = model_instance.model
-
-        model_schema = model_instance.model_type_instance.get_model_schema(model_name, model_instance.credentials)
-        if not model_schema:
-            return {"questions": [], "error": f"Model schema not found for {model_name}"}
-
-        completion_params = model_config.get("completion_params", {}) if model_config else {}
-        model_parameters = {**completion_params, "max_tokens": 256}
-        json_schema = cls._get_suggested_questions_json_schema()
-
-        try:
-            response = invoke_llm_with_structured_output(
-                provider=model_instance.provider,
-                model_schema=model_schema,
-                model_instance=model_instance,
-                prompt_messages=prompt_messages,
-                json_schema=json_schema,
-                model_parameters=model_parameters,
-                stream=False,
-                tenant_id=tenant_id,
-            )
-
-            questions = response.structured_output.get("questions", []) if response.structured_output else []
-            return {"questions": questions, "error": ""}
-
-        except InvokeError as e:
-            return {"questions": [], "error": str(e)}
-        except Exception as e:
-            logger.exception("Failed to generate suggested questions, model: %s", model_name)
-            return {"questions": [], "error": f"An unexpected error occurred: {str(e)}"}
-
-    @classmethod
-    def _build_suggested_questions_prompt(
-        cls,
-        upstream_nodes: list[dict],
-        current_node: dict,
-        parameter_info: dict,
-        language: str = "English",
-    ) -> str:
-        """Build minimal prompt for suggested questions generation."""
-        # Simplify upstream nodes to reduce tokens
-        sources = [f"{n['title']}({','.join(n.get('outputs', {}).keys())})" for n in upstream_nodes[:5]]
-        param_type = parameter_info.get("type", "string")
-        param_desc = parameter_info.get("description", "")[:100]
-
-        return f"""Suggest 3 code generation questions for extracting data.
-Sources: {", ".join(sources)}
-Target: {parameter_info.get("name")}({param_type}) - {param_desc}
-Output 3 short, practical questions in {language}."""
-
-    @classmethod
-    def _get_suggested_questions_json_schema(cls) -> dict:
-        """Return JSON Schema for suggested questions."""
-        return {
-            "type": "object",
-            "properties": {
-                "questions": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "minItems": 3,
-                    "maxItems": 3,
-                    "description": "3 suggested questions",
-                },
-            },
-            "required": ["questions"],
-        }
-
-    @classmethod
-    def _get_code_node_json_schema(cls) -> dict:
-        """Return JSON Schema for structured output."""
-        return {
-            "type": "object",
-            "properties": {
-                "variables": {
-                    "type": "array",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "variable": {"type": "string", "description": "Variable name in code"},
-                            "value_selector": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "Path like [node_id, output_name]",
-                            },
-                        },
-                        "required": ["variable", "value_selector"],
-                    },
-                },
-                "code": {"type": "string", "description": "Generated code with main function"},
-                "outputs": {
-                    "type": "object",
-                    "additionalProperties": {
-                        "type": "object",
-                        "properties": {"type": {"type": "string"}},
-                    },
-                    "description": "Output definitions, key is output name",
-                },
-                "explanation": {"type": "string", "description": "Brief explanation of the code"},
-            },
-            "required": ["variables", "code", "outputs", "explanation"],
-        }
-
-    @classmethod
-    def _get_upstream_nodes(cls, graph_dict: Mapping[str, Any], node_id: str) -> list[dict]:
-        """
-        Get all upstream nodes via edge backtracking.
-
-        Traverses the graph backwards from node_id to collect all reachable nodes.
-        """
-        from collections import defaultdict
-
-        nodes = {n["id"]: n for n in graph_dict.get("nodes", [])}
-        edges = graph_dict.get("edges", [])
-
-        # Build reverse adjacency list
-        reverse_adj: dict[str, list[str]] = defaultdict(list)
-        for edge in edges:
-            reverse_adj[edge["target"]].append(edge["source"])
-
-        # BFS to find all upstream nodes
-        visited: set[str] = set()
-        queue = [node_id]
-        upstream: list[dict] = []
-
-        while queue:
-            current = queue.pop(0)
-            for source in reverse_adj.get(current, []):
-                if source not in visited:
-                    visited.add(source)
-                    queue.append(source)
-                    if source in nodes:
-                        upstream.append(cls._extract_node_info(nodes[source]))
-
-        return upstream
-
-    @classmethod
-    def _get_node_by_id(cls, graph_dict: Mapping[str, Any], node_id: str) -> dict | None:
-        """Get node by ID from graph."""
-        for node in graph_dict.get("nodes", []):
-            if node["id"] == node_id:
-                return node
-        return None
-
-    @classmethod
-    def _extract_node_info(cls, node: dict) -> dict:
-        """Extract minimal node info with outputs based on node type."""
-        node_type = node["data"]["type"]
-        node_data = node.get("data", {})
-
-        # Build outputs based on node type (only type, no description to reduce tokens)
-        outputs: dict[str, str] = {}
-        match node_type:
-            case "start":
-                for var in node_data.get("variables", []):
-                    name = var.get("variable", var.get("name", ""))
-                    outputs[name] = var.get("type", "string")
-            case "llm":
-                outputs["text"] = "string"
-            case "code":
-                for name, output in node_data.get("outputs", {}).items():
-                    outputs[name] = output.get("type", "string")
-            case "http-request":
-                outputs = {"body": "string", "status_code": "number", "headers": "object"}
-            case "knowledge-retrieval":
-                outputs["result"] = "array[object]"
-            case "tool":
-                outputs = {"text": "string", "json": "object"}
-            case _:
-                outputs["output"] = "string"
-
-        info: dict = {
-            "id": node["id"],
-            "title": node_data.get("title", node["id"]),
-            "outputs": outputs,
-        }
-        # Only include description if not empty
-        desc = node_data.get("desc", "")
-        if desc:
-            info["desc"] = desc
-
-        return info
-
-    @classmethod
-    def _get_parameter_info(cls, tenant_id: str, node_data: dict, parameter_name: str) -> dict:
-        """Get parameter info from tool schema using ToolManager."""
-        default_info = {"name": parameter_name, "type": "string", "description": ""}
-
-        if node_data.get("type") != "tool":
-            return default_info
-
-        try:
-            from core.app.entities.app_invoke_entities import InvokeFrom
-            from core.tools.entities.tool_entities import ToolProviderType
-            from core.tools.tool_manager import ToolManager
-
-            provider_type_str = node_data.get("provider_type", "")
-            provider_type = ToolProviderType(provider_type_str) if provider_type_str else ToolProviderType.BUILT_IN
-
-            tool_runtime = ToolManager.get_tool_runtime(
-                provider_type=provider_type,
-                provider_id=node_data.get("provider_id", ""),
-                tool_name=node_data.get("tool_name", ""),
-                tenant_id=tenant_id,
-                invoke_from=InvokeFrom.DEBUGGER,
-            )
-
-            parameters = tool_runtime.get_merged_runtime_parameters()
-            for param in parameters:
-                if param.name == parameter_name:
-                    return {
-                        "name": param.name,
-                        "type": param.type.value if hasattr(param.type, "value") else str(param.type),
-                        "description": param.llm_description
-                        or (param.human_description.en_US if param.human_description else ""),
-                        "required": param.required,
-                    }
-        except Exception as e:
-            logger.debug("Failed to get parameter info from ToolManager: %s", e)
-
-        return default_info
-
-    @classmethod
-    def _build_extractor_system_prompt(
-        cls,
-        upstream_nodes: list[dict],
-        current_node: dict,
-        parameter_info: dict,
-        language: str,
-    ) -> str:
-        """Build system prompt for extractor code generation."""
-        upstream_json = json.dumps(upstream_nodes, indent=2, ensure_ascii=False)
-        param_type = parameter_info.get("type", "string")
-        return f"""You are a code generator for workflow automation.
-
-Generate {language} code to extract/transform upstream node outputs for the target parameter.
-
-## Upstream Nodes
-{upstream_json}
-
-## Target
-Node: {current_node["data"].get("title", current_node["id"])}
-Parameter: {parameter_info.get("name")} ({param_type}) - {parameter_info.get("description", "")}
-
-## Requirements
- Write a main function that returns type: {param_type}
- Use value_selector format: ["node_id", "output_name"]
-"""
-
-    @classmethod
-    def _parse_code_node_output(cls, content: Mapping[str, Any] | None, language: str, parameter_type: str) -> dict:
-        """
-        Parse structured output to CodeNodeData format.
-
-        Args:
-            content: Structured output dict from invoke_llm_with_structured_output
-            language: Code language
-            parameter_type: Expected parameter type
-
-        Returns dict with variables, code_language, code, outputs, message, error.
-        """
-        if content is None:
-            return cls._error_response("Empty or invalid response from LLM")
-
-        # Validate and normalize variables
-        variables = [
-            {"variable": v.get("variable", ""), "value_selector": v.get("value_selector", [])}
-            for v in content.get("variables", [])
-            if isinstance(v, dict)
-        ]
-
-        outputs = content.get("outputs", {"result": {"type": parameter_type}})
-
-        return {
-            "variables": variables,
-            "code_language": language,
-            "code": content.get("code", ""),
-            "outputs": outputs,
-            "message": content.get("explanation", ""),
-            "error": "",
-        }
-
    @staticmethod
    def instruction_modify_legacy(
        tenant_id: str, flow_id: str, current: str, instruction: str, model_config: dict, ideal_output: str | None
    ):
        last_run: Message | None = (
-            db.session.query(Message).where(Message.app_id == flow_id).order_by(Message.created_at.desc()).first()
+            db.session.query(Message).where(Message.app_id == flow_id).order_by(
+                Message.created_at.desc()).first()
        )
        if not last_run:
            return LLMGenerator.__instruction_modify_common(
@@ -933,7 +466,8 @@ Parameter: {parameter_info.get("name")} ({param_type}) - {parameter_info.get("de
        workflow = workflow_service.get_draft_workflow(app_model=app)
        if not workflow:
            raise ValueError("Workflow not found for the given app model.")
-        last_run = workflow_service.get_node_last_run(app_model=app, workflow=workflow, node_id=node_id)
+        last_run = workflow_service.get_node_last_run(
+            app_model=app, workflow=workflow, node_id=node_id)
        try:
            node_type = cast(WorkflowNodeExecutionModel, last_run).node_type
        except Exception:
@@ -957,7 +491,8 @@ Parameter: {parameter_info.get("name")} ({param_type}) - {parameter_info.get("de
            )

        def agent_log_of(node_execution: WorkflowNodeExecutionModel) -> Sequence:
-            raw_agent_log = node_execution.execution_metadata_dict.get(WorkflowNodeExecutionMetadataKey.AGENT_LOG, [])
+            raw_agent_log = node_execution.execution_metadata_dict.get(
+                WorkflowNodeExecutionMetadataKey.AGENT_LOG, [])
            if not raw_agent_log:
                return []

@@ -1005,11 +540,14 @@ Parameter: {parameter_info.get("name")} ({param_type}) - {parameter_info.get("de
        ERROR_MESSAGE = "{{#error_message#}}"
        injected_instruction = instruction
        if LAST_RUN in injected_instruction:
-            injected_instruction = injected_instruction.replace(LAST_RUN, json.dumps(last_run))
+            injected_instruction = injected_instruction.replace(
+                LAST_RUN, json.dumps(last_run))
        if CURRENT in injected_instruction:
-            injected_instruction = injected_instruction.replace(CURRENT, current or "null")
+            injected_instruction = injected_instruction.replace(
+                CURRENT, current or "null")
        if ERROR_MESSAGE in injected_instruction:
-            injected_instruction = injected_instruction.replace(ERROR_MESSAGE, error_message or "null")
+            injected_instruction = injected_instruction.replace(
+                ERROR_MESSAGE, error_message or "null")
        model_instance = ModelManager().get_model_instance(
            tenant_id=tenant_id,
            model_type=ModelType.LLM,
@@ -1047,11 +585,13 @@ Parameter: {parameter_info.get("name")} ({param_type}) - {parameter_info.get("de
            first_brace = generated_raw.find("{")
            last_brace = generated_raw.rfind("}")
            if first_brace == -1 or last_brace == -1 or last_brace < first_brace:
-                raise ValueError(f"Could not find a valid JSON object in response: {generated_raw}")
-            json_str = generated_raw[first_brace : last_brace + 1]
+                raise ValueError(
+                    f"Could not find a valid JSON object in response: {generated_raw}")
+            json_str = generated_raw[first_brace: last_brace + 1]
            data = json_repair.loads(json_str)
            if not isinstance(data, dict):
-                raise TypeError(f"Expected a JSON object, but got {type(data).__name__}")
+                raise TypeError(
+                    f"Expected a JSON object, but got {type(data).__name__}")
            return data
        except InvokeError as e:
            error = str(e)
--- a/api/core/llm_generator/output_parser/file_ref.py
+++ b/api/core/llm_generator/output_parser/file_ref.py
@@ -1,188 +0,0 @@
-"""
-File reference detection and conversion for structured output.
-
-This module provides utilities to:
-1. Detect file reference fields in JSON Schema (format: "dify-file-ref")
-2. Convert file ID strings to File objects after LLM returns
-"""
-
-import uuid
-from collections.abc import Mapping
-from typing import Any
-
-from core.file import File
-from core.variables.segments import ArrayFileSegment, FileSegment
-from factories.file_factory import build_from_mapping
-
-FILE_REF_FORMAT = "dify-file-ref"
-
-
-def is_file_ref_property(schema: dict) -> bool:
-    """Check if a schema property is a file reference."""
-    return schema.get("type") == "string" and schema.get("format") == FILE_REF_FORMAT
-
-
-def detect_file_ref_fields(schema: Mapping[str, Any], path: str = "") -> list[str]:
-    """
-    Recursively detect file reference fields in schema.
-
-    Args:
-        schema: JSON Schema to analyze
-        path: Current path in the schema (used for recursion)
-
-    Returns:
-        List of JSON paths containing file refs, e.g., ["image_id", "files[*]"]
-    """
-    file_ref_paths: list[str] = []
-    schema_type = schema.get("type")
-
-    if schema_type == "object":
-        for prop_name, prop_schema in schema.get("properties", {}).items():
-            current_path = f"{path}.{prop_name}" if path else prop_name
-
-            if is_file_ref_property(prop_schema):
-                file_ref_paths.append(current_path)
-            elif isinstance(prop_schema, dict):
-                file_ref_paths.extend(detect_file_ref_fields(prop_schema, current_path))
-
-    elif schema_type == "array":
-        items_schema = schema.get("items", {})
-        array_path = f"{path}[*]" if path else "[*]"
-
-        if is_file_ref_property(items_schema):
-            file_ref_paths.append(array_path)
-        elif isinstance(items_schema, dict):
-            file_ref_paths.extend(detect_file_ref_fields(items_schema, array_path))
-
-    return file_ref_paths
-
-
-def convert_file_refs_in_output(
-    output: Mapping[str, Any],
-    json_schema: Mapping[str, Any],
-    tenant_id: str,
-) -> dict[str, Any]:
-    """
-    Convert file ID strings to File objects based on schema.
-
-    Args:
-        output: The structured_output from LLM result
-        json_schema: The original JSON schema (to detect file ref fields)
-        tenant_id: Tenant ID for file lookup
-
-    Returns:
-        Output with file references converted to File objects
-    """
-    file_ref_paths = detect_file_ref_fields(json_schema)
-    if not file_ref_paths:
-        return dict(output)
-
-    result = _deep_copy_dict(output)
-
-    for path in file_ref_paths:
-        _convert_path_in_place(result, path.split("."), tenant_id)
-
-    return result
-
-
-def _deep_copy_dict(obj: Mapping[str, Any]) -> dict[str, Any]:
-    """Deep copy a mapping to a mutable dict."""
-    result: dict[str, Any] = {}
-    for key, value in obj.items():
-        if isinstance(value, Mapping):
-            result[key] = _deep_copy_dict(value)
-        elif isinstance(value, list):
-            result[key] = [_deep_copy_dict(item) if isinstance(item, Mapping) else item for item in value]
-        else:
-            result[key] = value
-    return result
-
-
-def _convert_path_in_place(obj: dict, path_parts: list[str], tenant_id: str) -> None:
-    """Convert file refs at the given path in place, wrapping in Segment types."""
-    if not path_parts:
-        return
-
-    current = path_parts[0]
-    remaining = path_parts[1:]
-
-    # Handle array notation like "files[*]"
-    if current.endswith("[*]"):
-        key = current[:-3] if current != "[*]" else None
-        target = obj.get(key) if key else obj
-
-        if isinstance(target, list):
-            if remaining:
-                # Nested array with remaining path - recurse into each item
-                for item in target:
-                    if isinstance(item, dict):
-                        _convert_path_in_place(item, remaining, tenant_id)
-            else:
-                # Array of file IDs - convert all and wrap in ArrayFileSegment
-                files: list[File] = []
-                for item in target:
-                    file = _convert_file_id(item, tenant_id)
-                    if file is not None:
-                        files.append(file)
-                # Replace the array with ArrayFileSegment
-                if key:
-                    obj[key] = ArrayFileSegment(value=files)
-        return
-
-    if not remaining:
-        # Leaf node - convert the value and wrap in FileSegment
-        if current in obj:
-            file = _convert_file_id(obj[current], tenant_id)
-            if file is not None:
-                obj[current] = FileSegment(value=file)
-            else:
-                obj[current] = None
-    else:
-        # Recurse into nested object
-        if current in obj and isinstance(obj[current], dict):
-            _convert_path_in_place(obj[current], remaining, tenant_id)
-
-
-def _convert_file_id(file_id: Any, tenant_id: str) -> File | None:
-    """
-    Convert a file ID string to a File object.
-
-    Tries multiple file sources in order:
-    1. ToolFile (files generated by tools/workflows)
-    2. UploadFile (files uploaded by users)
-    """
-    if not isinstance(file_id, str):
-        return None
-
-    # Validate UUID format
-    try:
-        uuid.UUID(file_id)
-    except ValueError:
-        return None
-
-    # Try ToolFile first (files generated by tools/workflows)
-    try:
-        return build_from_mapping(
-            mapping={
-                "transfer_method": "tool_file",
-                "tool_file_id": file_id,
-            },
-            tenant_id=tenant_id,
-        )
-    except ValueError:
-        pass
-
-    # Try UploadFile (files uploaded by users)
-    try:
-        return build_from_mapping(
-            mapping={
-                "transfer_method": "local_file",
-                "upload_file_id": file_id,
-            },
-            tenant_id=tenant_id,
-        )
-    except ValueError:
-        pass
-
-    # File not found in any source
-    return None
--- a/api/core/llm_generator/output_parser/structured_output.py
+++ b/api/core/llm_generator/output_parser/structured_output.py
@@ -8,7 +8,6 @@ import json_repair
 from pydantic import TypeAdapter, ValidationError

 from core.llm_generator.output_parser.errors import OutputParserError
-from core.llm_generator.output_parser.file_ref import convert_file_refs_in_output
 from core.llm_generator.prompts import STRUCTURED_OUTPUT_PROMPT
 from core.model_manager import ModelInstance
 from core.model_runtime.callbacks.base_callback import Callback
@@ -58,7 +57,6 @@ def invoke_llm_with_structured_output(
    stream: Literal[True],
    user: str | None = None,
    callbacks: list[Callback] | None = None,
-    tenant_id: str | None = None,
 ) -> Generator[LLMResultChunkWithStructuredOutput, None, None]: ...
@overload
 def invoke_llm_with_structured_output(
@@ -74,7 +72,6 @@ def invoke_llm_with_structured_output(
    stream: Literal[False],
    user: str | None = None,
    callbacks: list[Callback] | None = None,
-    tenant_id: str | None = None,
 ) -> LLMResultWithStructuredOutput: ...
@overload
 def invoke_llm_with_structured_output(
@@ -90,7 +87,6 @@ def invoke_llm_with_structured_output(
    stream: bool = True,
    user: str | None = None,
    callbacks: list[Callback] | None = None,
-    tenant_id: str | None = None,
 ) -> LLMResultWithStructuredOutput | Generator[LLMResultChunkWithStructuredOutput, None, None]: ...
 def invoke_llm_with_structured_output(
    *,
@@ -105,28 +101,20 @@ def invoke_llm_with_structured_output(
    stream: bool = True,
    user: str | None = None,
    callbacks: list[Callback] | None = None,
-    tenant_id: str | None = None,
 ) -> LLMResultWithStructuredOutput | Generator[LLMResultChunkWithStructuredOutput, None, None]:
    """
-    Invoke large language model with structured output.
+    Invoke large language model with structured output
+    1. This method invokes model_instance.invoke_llm with json_schema
+    2. Try to parse the result as structured output

-    This method invokes model_instance.invoke_llm with json_schema and parses
-    the result as structured output.
-
-    :param provider: model provider name
-    :param model_schema: model schema entity
-    :param model_instance: model instance to invoke
    :param prompt_messages: prompt messages
-    :param json_schema: json schema for structured output
+    :param json_schema: json schema
    :param model_parameters: model parameters
    :param tools: tools for tool calling
    :param stop: stop words
    :param stream: is stream response
    :param user: unique user id
    :param callbacks: callbacks
-    :param tenant_id: tenant ID for file reference conversion. When provided and
-                      json_schema contains file reference fields (format: "dify-file-ref"),
-                      file IDs in the output will be automatically converted to File objects.
    :return: full response or stream response chunk generator result
    """

@@ -165,18 +153,8 @@ def invoke_llm_with_structured_output(
                f"Failed to parse structured output, LLM result is not a string: {llm_result.message.content}"
            )

-        structured_output = _parse_structured_output(llm_result.message.content)
-
-        # Convert file references if tenant_id is provided
-        if tenant_id is not None:
-            structured_output = convert_file_refs_in_output(
-                output=structured_output,
-                json_schema=json_schema,
-                tenant_id=tenant_id,
-            )
-
        return LLMResultWithStructuredOutput(
-            structured_output=structured_output,
+            structured_output=_parse_structured_output(llm_result.message.content),
            model=llm_result.model,
            message=llm_result.message,
            usage=llm_result.usage,
@@ -208,18 +186,8 @@ def invoke_llm_with_structured_output(
                    delta=event.delta,
                )

-            structured_output = _parse_structured_output(result_text)
-
-            # Convert file references if tenant_id is provided
-            if tenant_id is not None:
-                structured_output = convert_file_refs_in_output(
-                    output=structured_output,
-                    json_schema=json_schema,
-                    tenant_id=tenant_id,
-                )
-
            yield LLMResultChunkWithStructuredOutput(
-                structured_output=structured_output,
+                structured_output=_parse_structured_output(result_text),
                model=model_schema.model,
                prompt_messages=prompt_messages,
                system_fingerprint=system_fingerprint,
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -434,3 +434,20 @@ INSTRUCTION_GENERATE_TEMPLATE_PROMPT = """The output of this prompt is not as ex
 You should edit the prompt according to the IDEAL OUTPUT."""

 INSTRUCTION_GENERATE_TEMPLATE_CODE = """Please fix the errors in the {{#error_message#}}."""
+
+DEFAULT_GENERATOR_SUMMARY_PROMPT = (
+    """Summarize the following content. Extract only the key information and main points. """
+    """Remove redundant details.
+
+Requirements:
+1. Write a concise summary in plain text
+2. Use the same language as the input content
+3. Focus on important facts, concepts, and details
+4. If images are included, describe their key information
+5. Do not use words like "好的", "ok", "I understand", "This text discusses", "The content mentions"
+6. Write directly without extra words
+
+Output only the summary text. Start summarizing now:
+
+"""
+)
--- a/api/core/llm_generator/utils.py
+++ b/api/core/llm_generator/utils.py
@@ -1,45 +0,0 @@
-"""Utility functions for LLM generator."""
-
-from core.model_runtime.entities.message_entities import (
-    AssistantPromptMessage,
-    PromptMessage,
-    PromptMessageRole,
-    SystemPromptMessage,
-    ToolPromptMessage,
-    UserPromptMessage,
-)
-
-
-def deserialize_prompt_messages(messages: list[dict]) -> list[PromptMessage]:
-    """
-    Deserialize list of dicts to list[PromptMessage].
-
-    Expected format:
-        [
-            {"role": "user", "content": "..."},
-            {"role": "assistant", "content": "..."},
-        ]
-    """
-    result: list[PromptMessage] = []
-    for msg in messages:
-        role = PromptMessageRole.value_of(msg["role"])
-        content = msg.get("content", "")
-
-        match role:
-            case PromptMessageRole.USER:
-                result.append(UserPromptMessage(content=content))
-            case PromptMessageRole.ASSISTANT:
-                result.append(AssistantPromptMessage(content=content))
-            case PromptMessageRole.SYSTEM:
-                result.append(SystemPromptMessage(content=content))
-            case PromptMessageRole.TOOL:
-                result.append(ToolPromptMessage(content=content, tool_call_id=msg.get("tool_call_id", "")))
-
-    return result
-
-
-def serialize_prompt_messages(messages: list[PromptMessage]) -> list[dict]:
-    """
-    Serialize list[PromptMessage] to list of dicts.
-    """
-    return [{"role": msg.role.value, "content": msg.content} for msg in messages]
--- a/api/core/memory/README.md
+++ b/api/core/memory/README.md
@@ -1,434 +0,0 @@
-# Memory Module
-
-This module provides memory management for LLM conversations, enabling context retention across dialogue turns.
-
-## Overview
-
-The memory module contains two types of memory implementations:
-
-1. **TokenBufferMemory** - Conversation-level memory (existing)
-2. **NodeTokenBufferMemory** - Node-level memory (to be implemented, **Chatflow only**)
-
-> **Note**: `NodeTokenBufferMemory` is only available in **Chatflow** (advanced-chat mode).
-> This is because it requires both `conversation_id` and `node_id`, which are only present in Chatflow.
-> Standard Workflow mode does not have `conversation_id` and therefore cannot use node-level memory.
-
-```
-┌─────────────────────────────────────────────────────────────────────────────┐
-│                           Memory Architecture                               │
-├─────────────────────────────────────────────────────────────────────────────┤
-│                                                                             │
-│  ┌─────────────────────────────────────────────────────────────────────-┐   │
-│  │                      TokenBufferMemory                               │   │
-│  │  Scope: Conversation                                                 │   │
-│  │  Storage: Database (Message table)                                   │   │
-│  │  Key: conversation_id                                                │   │
-│  └─────────────────────────────────────────────────────────────────────-┘   │
-│                                                                             │
-│  ┌─────────────────────────────────────────────────────────────────────-┐   │
-│  │                    NodeTokenBufferMemory                             │   │
-│  │  Scope: Node within Conversation                                     │   │
-│  │  Storage: Object Storage (JSON file)                                 │   │
-│  │  Key: (app_id, conversation_id, node_id)                             │   │
-│  └─────────────────────────────────────────────────────────────────────-┘   │
-│                                                                             │
-└─────────────────────────────────────────────────────────────────────────────┘
-```
-
---
-
-## TokenBufferMemory (Existing)
-
-### Purpose
-
-`TokenBufferMemory` retrieves conversation history from the `Message` table and converts it to `PromptMessage` objects for LLM context.
-
-### Key Features
-
- **Conversation-scoped**: All messages within a conversation are candidates
- **Thread-aware**: Uses `parent_message_id` to extract only the current thread (supports regeneration scenarios)
- **Token-limited**: Truncates history to fit within `max_token_limit`
- **File support**: Handles `MessageFile` attachments (images, documents, etc.)
-
-### Data Flow
-
-```
-Message Table                    TokenBufferMemory              LLM
-     │                                  │                        │
-     │  SELECT * FROM messages          │                        │
-     │  WHERE conversation_id = ?       │                        │
-     │  ORDER BY created_at DESC        │                        │
-     ├─────────────────────────────────▶│                        │
-     │                                  │                        │
-     │                    extract_thread_messages()              │
-     │                                  │                        │
-     │                    build_prompt_message_with_files()      │
-     │                                  │                        │
-     │                    truncate by max_token_limit            │
-     │                                  │                        │
-     │                                  │  Sequence[PromptMessage]
-     │                                  ├───────────────────────▶│
-     │                                  │                        │
-```
-
-### Thread Extraction
-
-When a user regenerates a response, a new thread is created:
-
-```
-Message A (user)
-    └── Message A' (assistant)
-            └── Message B (user)
-                    └── Message B' (assistant)
-            └── Message A'' (assistant, regenerated)  ← New thread
-                    └── Message C (user)
-                            └── Message C' (assistant)
-```
-
-`extract_thread_messages()` traces back from the latest message using `parent_message_id` to get only the current thread: `[A, A'', C, C']`
-
-### Usage
-
-```python
-from core.memory.token_buffer_memory import TokenBufferMemory
-
-memory = TokenBufferMemory(conversation=conversation, model_instance=model_instance)
-history = memory.get_history_prompt_messages(max_token_limit=2000, message_limit=100)
-```
-
---
-
-## NodeTokenBufferMemory (To Be Implemented)
-
-### Purpose
-
-`NodeTokenBufferMemory` provides **node-scoped memory** within a conversation. Each LLM node in a workflow can maintain its own independent conversation history.
-
-### Use Cases
-
-1. **Multi-LLM Workflows**: Different LLM nodes need separate context
-2. **Iterative Processing**: An LLM node in a loop needs to accumulate context across iterations
-3. **Specialized Agents**: Each agent node maintains its own dialogue history
-
-### Design Decisions
-
-#### Storage: Object Storage for Messages (No New Database Table)
-
-| Aspect                    | Database             | Object Storage     |
-| ------------------------- | -------------------- | ------------------ |
-| Cost                      | High                 | Low                |
-| Query Flexibility         | High                 | Low                |
-| Schema Changes            | Migration required   | None               |
-| Consistency with existing | ConversationVariable | File uploads, logs |
-
-**Decision**: Store message data in object storage, but still use existing database tables for file metadata.
-
-**What is stored in Object Storage:**
-
- Message content (text)
- Message metadata (role, token_count, created_at)
- File references (upload_file_id, tool_file_id, etc.)
- Thread relationships (message_id, parent_message_id)
-
-**What still requires Database queries:**
-
- File reconstruction: When reading node memory, file references are used to query
-  `UploadFile` / `ToolFile` tables via `file_factory.build_from_mapping()` to rebuild
-  complete `File` objects with storage_key, mime_type, etc.
-
-**Why this hybrid approach:**
-
- No database migration required (no new tables)
- Message data may be large, object storage is cost-effective
- File metadata is already in database, no need to duplicate
- Aligns with existing storage patterns (file uploads, logs)
-
-#### Storage Key Format
-
-```
-node_memory/{app_id}/{conversation_id}/{node_id}.json
-```
-
-#### Data Structure
-
-```json
-{
-  "version": 1,
-  "messages": [
-    {
-      "message_id": "msg-001",
-      "parent_message_id": null,
-      "role": "user",
-      "content": "Analyze this image",
-      "files": [
-        {
-          "type": "image",
-          "transfer_method": "local_file",
-          "upload_file_id": "file-uuid-123",
-          "belongs_to": "user"
-        }
-      ],
-      "token_count": 15,
-      "created_at": "2026-01-07T10:00:00Z"
-    },
-    {
-      "message_id": "msg-002",
-      "parent_message_id": "msg-001",
-      "role": "assistant",
-      "content": "This is a landscape image...",
-      "files": [],
-      "token_count": 50,
-      "created_at": "2026-01-07T10:00:01Z"
-    }
-  ]
-}
-```
-
-### Thread Support
-
-Node memory also supports thread extraction (for regeneration scenarios):
-
-```python
-def _extract_thread(
-    self,
-    messages: list[NodeMemoryMessage],
-    current_message_id: str
-) -> list[NodeMemoryMessage]:
-    """
-    Extract messages belonging to the thread of current_message_id.
-    Similar to extract_thread_messages() in TokenBufferMemory.
-    """
-    ...
-```
-
-### File Handling
-
-Files are stored as references (not full metadata):
-
-```python
-class NodeMemoryFile(BaseModel):
-    type: str                        # image, audio, video, document, custom
-    transfer_method: str             # local_file, remote_url, tool_file
-    upload_file_id: str | None       # for local_file
-    tool_file_id: str | None         # for tool_file
-    url: str | None                  # for remote_url
-    belongs_to: str                  # user / assistant
-```
-
-When reading, files are rebuilt using `file_factory.build_from_mapping()`.
-
-### API Design
-
-```python
-class NodeTokenBufferMemory:
-    def __init__(
-        self,
-        app_id: str,
-        conversation_id: str,
-        node_id: str,
-        model_instance: ModelInstance,
-    ):
-        """
-        Initialize node-level memory.
-
-        :param app_id: Application ID
-        :param conversation_id: Conversation ID
-        :param node_id: Node ID in the workflow
-        :param model_instance: Model instance for token counting
-        """
-        ...
-
-    def add_messages(
-        self,
-        message_id: str,
-        parent_message_id: str | None,
-        user_content: str,
-        user_files: Sequence[File],
-        assistant_content: str,
-        assistant_files: Sequence[File],
-    ) -> None:
-        """
-        Append a dialogue turn (user + assistant) to node memory.
-        Call this after LLM node execution completes.
-
-        :param message_id: Current message ID (from Message table)
-        :param parent_message_id: Parent message ID (for thread tracking)
-        :param user_content: User's text input
-        :param user_files: Files attached by user
-        :param assistant_content: Assistant's text response
-        :param assistant_files: Files generated by assistant
-        """
-        ...
-
-    def get_history_prompt_messages(
-        self,
-        current_message_id: str,
-        tenant_id: str,
-        max_token_limit: int = 2000,
-        file_upload_config: FileUploadConfig | None = None,
-    ) -> Sequence[PromptMessage]:
-        """
-        Retrieve history as PromptMessage sequence.
-
-        :param current_message_id: Current message ID (for thread extraction)
-        :param tenant_id: Tenant ID (for file reconstruction)
-        :param max_token_limit: Maximum tokens for history
-        :param file_upload_config: File upload configuration
-        :return: Sequence of PromptMessage for LLM context
-        """
-        ...
-
-    def flush(self) -> None:
-        """
-        Persist buffered changes to object storage.
-        Call this at the end of node execution.
-        """
-        ...
-
-    def clear(self) -> None:
-        """
-        Clear all messages in this node's memory.
-        """
-        ...
-```
-
-### Data Flow
-
-```
-Object Storage                  NodeTokenBufferMemory           LLM Node
-     │                                  │                           │
-     │                                  │◀── get_history_prompt_messages()
-     │  storage.load(key)               │                           │
-     │◀─────────────────────────────────┤                           │
-     │                                  │                           │
-     │  JSON data                       │                           │
-     ├─────────────────────────────────▶│                           │
-     │                                  │                           │
-     │                    _extract_thread()                         │
-     │                                  │                           │
-     │                    _rebuild_files() via file_factory         │
-     │                                  │                           │
-     │                    _build_prompt_messages()                  │
-     │                                  │                           │
-     │                    _truncate_by_tokens()                     │
-     │                                  │                           │
-     │                                  │  Sequence[PromptMessage]  │
-     │                                  ├──────────────────────────▶│
-     │                                  │                           │
-     │                                  │◀── LLM execution complete │
-     │                                  │                           │
-     │                                  │◀── add_messages()         │
-     │                                  │                           │
-     │  storage.save(key, data)         │                           │
-     │◀─────────────────────────────────┤                           │
-     │                                  │                           │
-```
-
-### Integration with LLM Node
-
-```python
-# In LLM Node execution
-
-# 1. Fetch memory based on mode
-if node_data.memory and node_data.memory.mode == MemoryMode.NODE:
-    # Node-level memory (Chatflow only)
-    memory = fetch_node_memory(
-        variable_pool=variable_pool,
-        app_id=app_id,
-        node_id=self.node_id,
-        node_data_memory=node_data.memory,
-        model_instance=model_instance,
-    )
-elif node_data.memory and node_data.memory.mode == MemoryMode.CONVERSATION:
-    # Conversation-level memory (existing behavior)
-    memory = fetch_memory(
-        variable_pool=variable_pool,
-        app_id=app_id,
-        node_data_memory=node_data.memory,
-        model_instance=model_instance,
-    )
-else:
-    memory = None
-
-# 2. Get history for context
-if memory:
-    if isinstance(memory, NodeTokenBufferMemory):
-        history = memory.get_history_prompt_messages(
-            current_message_id=current_message_id,
-            tenant_id=tenant_id,
-            max_token_limit=max_token_limit,
-        )
-    else:  # TokenBufferMemory
-        history = memory.get_history_prompt_messages(
-            max_token_limit=max_token_limit,
-        )
-    prompt_messages = [*history, *current_messages]
-else:
-    prompt_messages = current_messages
-
-# 3. Call LLM
-response = model_instance.invoke(prompt_messages)
-
-# 4. Append to node memory (only for NodeTokenBufferMemory)
-if isinstance(memory, NodeTokenBufferMemory):
-    memory.add_messages(
-        message_id=message_id,
-        parent_message_id=parent_message_id,
-        user_content=user_input,
-        user_files=user_files,
-        assistant_content=response.content,
-        assistant_files=response_files,
-    )
-    memory.flush()
-```
-
-### Configuration
-
-Add to `MemoryConfig` in `core/workflow/nodes/llm/entities.py`:
-
-```python
-class MemoryMode(StrEnum):
-    CONVERSATION = "conversation"  # Use TokenBufferMemory (default, existing behavior)
-    NODE = "node"                  # Use NodeTokenBufferMemory (new, Chatflow only)
-
-class MemoryConfig(BaseModel):
-    # Existing fields
-    role_prefix: RolePrefix | None = None
-    window: MemoryWindowConfig | None = None
-    query_prompt_template: str | None = None
-
-    # Memory mode (new)
-    mode: MemoryMode = MemoryMode.CONVERSATION
-```
-
-**Mode Behavior:**
-
-| Mode           | Memory Class          | Scope                    | Availability  |
-| -------------- | --------------------- | ------------------------ | ------------- |
-| `conversation` | TokenBufferMemory     | Entire conversation      | All app modes |
-| `node`         | NodeTokenBufferMemory | Per-node in conversation | Chatflow only |
-
-> When `mode=node` is used in a non-Chatflow context (no conversation_id), it should
-> fall back to no memory or raise a configuration error.
-
---
-
-## Comparison
-
-| Feature        | TokenBufferMemory        | NodeTokenBufferMemory     |
-| -------------- | ------------------------ | ------------------------- |
-| Scope          | Conversation             | Node within Conversation  |
-| Storage        | Database (Message table) | Object Storage (JSON)     |
-| Thread Support | Yes                      | Yes                       |
-| File Support   | Yes (via MessageFile)    | Yes (via file references) |
-| Token Limit    | Yes                      | Yes                       |
-| Use Case       | Standard chat apps       | Complex workflows         |
-
---
-
-## Future Considerations
-
-1. **Cleanup Task**: Add a Celery task to clean up old node memory files
-2. **Concurrency**: Consider Redis lock for concurrent node executions
-3. **Compression**: Compress large memory files to reduce storage costs
-4. **Extension**: Other nodes (Agent, Tool) may also benefit from node-level memory
--- a/api/core/memory/init.py
+++ b/api/core/memory/init.py
@@ -1,15 +0,0 @@
-from core.memory.base import BaseMemory
-from core.memory.node_token_buffer_memory import (
-    NodeMemoryData,
-    NodeMemoryFile,
-    NodeTokenBufferMemory,
-)
-from core.memory.token_buffer_memory import TokenBufferMemory
-
-__all__ = [
-    "BaseMemory",
-    "NodeMemoryData",
-    "NodeMemoryFile",
-    "NodeTokenBufferMemory",
-    "TokenBufferMemory",
-]
--- a/api/core/memory/base.py
+++ b/api/core/memory/base.py
@@ -1,83 +0,0 @@
-"""
-Base memory interfaces and types.
-
-This module defines the common protocol for memory implementations.
-"""
-
-from abc import ABC, abstractmethod
-from collections.abc import Sequence
-
-from core.model_runtime.entities import ImagePromptMessageContent, PromptMessage
-
-
-class BaseMemory(ABC):
-    """
-    Abstract base class for memory implementations.
-
-    Provides a common interface for both conversation-level and node-level memory.
-    """
-
-    @abstractmethod
-    def get_history_prompt_messages(
-        self,
-        *,
-        max_token_limit: int = 2000,
-        message_limit: int | None = None,
-    ) -> Sequence[PromptMessage]:
-        """
-        Get history prompt messages.
-
-        :param max_token_limit: Maximum tokens for history
-        :param message_limit: Maximum number of messages
-        :return: Sequence of PromptMessage for LLM context
-        """
-        pass
-
-    def get_history_prompt_text(
-        self,
-        human_prefix: str = "Human",
-        ai_prefix: str = "Assistant",
-        max_token_limit: int = 2000,
-        message_limit: int | None = None,
-    ) -> str:
-        """
-        Get history prompt as formatted text.
-
-        :param human_prefix: Prefix for human messages
-        :param ai_prefix: Prefix for assistant messages
-        :param max_token_limit: Maximum tokens for history
-        :param message_limit: Maximum number of messages
-        :return: Formatted history text
-        """
-        from core.model_runtime.entities import (
-            PromptMessageRole,
-            TextPromptMessageContent,
-        )
-
-        prompt_messages = self.get_history_prompt_messages(
-            max_token_limit=max_token_limit,
-            message_limit=message_limit,
-        )
-
-        string_messages = []
-        for m in prompt_messages:
-            if m.role == PromptMessageRole.USER:
-                role = human_prefix
-            elif m.role == PromptMessageRole.ASSISTANT:
-                role = ai_prefix
-            else:
-                continue
-
-            if isinstance(m.content, list):
-                inner_msg = ""
-                for content in m.content:
-                    if isinstance(content, TextPromptMessageContent):
-                        inner_msg += f"{content.data}\n"
-                    elif isinstance(content, ImagePromptMessageContent):
-                        inner_msg += "[image]\n"
-                string_messages.append(f"{role}: {inner_msg.strip()}")
-            else:
-                message = f"{role}: {m.content}"
-                string_messages.append(message)
-
-        return "\n".join(string_messages)
--- a/api/core/memory/node_token_buffer_memory.py
+++ b/api/core/memory/node_token_buffer_memory.py
@@ -1,353 +0,0 @@
-"""
-Node-level Token Buffer Memory for Chatflow.
-
-This module provides node-scoped memory within a conversation.
-Each LLM node in a workflow can maintain its own independent conversation history.
-
-Note: This is only available in Chatflow (advanced-chat mode) because it requires
-both conversation_id and node_id.
-
-Design:
- Storage is indexed by workflow_run_id (each execution stores one turn)
- Thread tracking leverages Message table's parent_message_id structure
- On read: query Message table for current thread, then filter Node Memory by workflow_run_ids
-"""
-
-import logging
-from collections.abc import Sequence
-
-from pydantic import BaseModel
-from sqlalchemy import select
-
-from core.file import File, FileTransferMethod
-from core.memory.base import BaseMemory
-from core.model_manager import ModelInstance
-from core.model_runtime.entities import (
-    AssistantPromptMessage,
-    ImagePromptMessageContent,
-    PromptMessage,
-    TextPromptMessageContent,
-    UserPromptMessage,
-)
-from core.prompt.utils.extract_thread_messages import extract_thread_messages
-from extensions.ext_database import db
-from extensions.ext_storage import storage
-from models.model import Message
-
-logger = logging.getLogger(__name__)
-
-
-class NodeMemoryFile(BaseModel):
-    """File reference stored in node memory."""
-
-    type: str  # image, audio, video, document, custom
-    transfer_method: str  # local_file, remote_url, tool_file
-    upload_file_id: str | None = None
-    tool_file_id: str | None = None
-    url: str | None = None
-
-
-class NodeMemoryTurn(BaseModel):
-    """A single dialogue turn (user + assistant) in node memory."""
-
-    user_content: str = ""
-    user_files: list[NodeMemoryFile] = []
-    assistant_content: str = ""
-    assistant_files: list[NodeMemoryFile] = []
-
-
-class NodeMemoryData(BaseModel):
-    """Root data structure for node memory storage."""
-
-    version: int = 1
-    # Key: workflow_run_id, Value: dialogue turn
-    turns: dict[str, NodeMemoryTurn] = {}
-
-
-class NodeTokenBufferMemory(BaseMemory):
-    """
-    Node-level Token Buffer Memory.
-
-    Provides node-scoped memory within a conversation. Each LLM node can maintain
-    its own independent conversation history, stored in object storage.
-
-    Key design: Thread tracking is delegated to Message table's parent_message_id.
-    Storage is indexed by workflow_run_id for easy filtering.
-
-    Storage key format: node_memory/{app_id}/{conversation_id}/{node_id}.json
-    """
-
-    def __init__(
-        self,
-        app_id: str,
-        conversation_id: str,
-        node_id: str,
-        tenant_id: str,
-        model_instance: ModelInstance,
-    ):
-        """
-        Initialize node-level memory.
-
-        :param app_id: Application ID
-        :param conversation_id: Conversation ID
-        :param node_id: Node ID in the workflow
-        :param tenant_id: Tenant ID for file reconstruction
-        :param model_instance: Model instance for token counting
-        """
-        self.app_id = app_id
-        self.conversation_id = conversation_id
-        self.node_id = node_id
-        self.tenant_id = tenant_id
-        self.model_instance = model_instance
-        self._storage_key = f"node_memory/{app_id}/{conversation_id}/{node_id}.json"
-        self._data: NodeMemoryData | None = None
-        self._dirty = False
-
-    def _load(self) -> NodeMemoryData:
-        """Load data from object storage."""
-        if self._data is not None:
-            return self._data
-
-        try:
-            raw = storage.load_once(self._storage_key)
-            self._data = NodeMemoryData.model_validate_json(raw)
-        except Exception:
-            # File not found or parse error, start fresh
-            self._data = NodeMemoryData()
-
-        return self._data
-
-    def _save(self) -> None:
-        """Save data to object storage."""
-        if self._data is not None:
-            storage.save(self._storage_key, self._data.model_dump_json().encode("utf-8"))
-            self._dirty = False
-
-    def _file_to_memory_file(self, file: File) -> NodeMemoryFile:
-        """Convert File object to NodeMemoryFile reference."""
-        return NodeMemoryFile(
-            type=file.type.value if hasattr(file.type, "value") else str(file.type),
-            transfer_method=(
-                file.transfer_method.value if hasattr(file.transfer_method, "value") else str(file.transfer_method)
-            ),
-            upload_file_id=file.related_id if file.transfer_method == FileTransferMethod.LOCAL_FILE else None,
-            tool_file_id=file.related_id if file.transfer_method == FileTransferMethod.TOOL_FILE else None,
-            url=file.remote_url if file.transfer_method == FileTransferMethod.REMOTE_URL else None,
-        )
-
-    def _memory_file_to_mapping(self, memory_file: NodeMemoryFile) -> dict:
-        """Convert NodeMemoryFile to mapping for file_factory."""
-        mapping: dict = {
-            "type": memory_file.type,
-            "transfer_method": memory_file.transfer_method,
-        }
-        if memory_file.upload_file_id:
-            mapping["upload_file_id"] = memory_file.upload_file_id
-        if memory_file.tool_file_id:
-            mapping["tool_file_id"] = memory_file.tool_file_id
-        if memory_file.url:
-            mapping["url"] = memory_file.url
-        return mapping
-
-    def _rebuild_files(self, memory_files: list[NodeMemoryFile]) -> list[File]:
-        """Rebuild File objects from NodeMemoryFile references."""
-        if not memory_files:
-            return []
-
-        from factories import file_factory
-
-        files = []
-        for mf in memory_files:
-            try:
-                mapping = self._memory_file_to_mapping(mf)
-                file = file_factory.build_from_mapping(mapping=mapping, tenant_id=self.tenant_id)
-                files.append(file)
-            except Exception as e:
-                logger.warning("Failed to rebuild file from memory: %s", e)
-                continue
-        return files
-
-    def _build_prompt_message(
-        self,
-        role: str,
-        content: str,
-        files: list[File],
-        detail: ImagePromptMessageContent.DETAIL = ImagePromptMessageContent.DETAIL.HIGH,
-    ) -> PromptMessage:
-        """Build PromptMessage from content and files."""
-        from core.file import file_manager
-
-        if not files:
-            if role == "user":
-                return UserPromptMessage(content=content)
-            else:
-                return AssistantPromptMessage(content=content)
-
-        # Build multimodal content
-        prompt_contents: list = []
-        for file in files:
-            try:
-                prompt_content = file_manager.to_prompt_message_content(file, image_detail_config=detail)
-                prompt_contents.append(prompt_content)
-            except Exception as e:
-                logger.warning("Failed to convert file to prompt content: %s", e)
-                continue
-
-        prompt_contents.append(TextPromptMessageContent(data=content))
-
-        if role == "user":
-            return UserPromptMessage(content=prompt_contents)
-        else:
-            return AssistantPromptMessage(content=prompt_contents)
-
-    def _get_thread_workflow_run_ids(self) -> list[str]:
-        """
-        Get workflow_run_ids for the current thread by querying Message table.
-
-        Returns workflow_run_ids in chronological order (oldest first).
-        """
-        # Query messages for this conversation
-        stmt = (
-            select(Message).where(Message.conversation_id == self.conversation_id).order_by(Message.created_at.desc())
-        )
-        messages = db.session.scalars(stmt.limit(500)).all()
-
-        if not messages:
-            return []
-
-        # Extract thread messages using existing logic
-        thread_messages = extract_thread_messages(messages)
-
-        # For newly created message, its answer is temporarily empty, skip it
-        if thread_messages and not thread_messages[0].answer and thread_messages[0].answer_tokens == 0:
-            thread_messages.pop(0)
-
-        # Reverse to get chronological order, extract workflow_run_ids
-        workflow_run_ids = []
-        for msg in reversed(thread_messages):
-            if msg.workflow_run_id:
-                workflow_run_ids.append(msg.workflow_run_id)
-
-        return workflow_run_ids
-
-    def add_messages(
-        self,
-        workflow_run_id: str,
-        user_content: str,
-        user_files: Sequence[File] | None = None,
-        assistant_content: str = "",
-        assistant_files: Sequence[File] | None = None,
-    ) -> None:
-        """
-        Add a dialogue turn to node memory.
-        Call this after LLM node execution completes.
-
-        :param workflow_run_id: Current workflow execution ID
-        :param user_content: User's text input
-        :param user_files: Files attached by user
-        :param assistant_content: Assistant's text response
-        :param assistant_files: Files generated by assistant
-        """
-        data = self._load()
-
-        # Convert files to memory file references
-        user_memory_files = [self._file_to_memory_file(f) for f in (user_files or [])]
-        assistant_memory_files = [self._file_to_memory_file(f) for f in (assistant_files or [])]
-
-        # Store the turn indexed by workflow_run_id
-        data.turns[workflow_run_id] = NodeMemoryTurn(
-            user_content=user_content,
-            user_files=user_memory_files,
-            assistant_content=assistant_content,
-            assistant_files=assistant_memory_files,
-        )
-
-        self._dirty = True
-
-    def get_history_prompt_messages(
-        self,
-        *,
-        max_token_limit: int = 2000,
-        message_limit: int | None = None,
-    ) -> Sequence[PromptMessage]:
-        """
-        Retrieve history as PromptMessage sequence.
-
-        Thread tracking is handled by querying Message table's parent_message_id structure.
-
-        :param max_token_limit: Maximum tokens for history
-        :param message_limit: unused, for interface compatibility
-        :return: Sequence of PromptMessage for LLM context
-        """
-        # message_limit is unused in NodeTokenBufferMemory (uses token limit instead)
-        _ = message_limit
-        detail = ImagePromptMessageContent.DETAIL.HIGH
-        data = self._load()
-
-        if not data.turns:
-            return []
-
-        # Get workflow_run_ids for current thread from Message table
-        thread_workflow_run_ids = self._get_thread_workflow_run_ids()
-
-        if not thread_workflow_run_ids:
-            return []
-
-        # Build prompt messages in thread order
-        prompt_messages: list[PromptMessage] = []
-        for wf_run_id in thread_workflow_run_ids:
-            turn = data.turns.get(wf_run_id)
-            if not turn:
-                # This workflow execution didn't have node memory stored
-                continue
-
-            # Build user message
-            user_files = self._rebuild_files(turn.user_files) if turn.user_files else []
-            user_msg = self._build_prompt_message(
-                role="user",
-                content=turn.user_content,
-                files=user_files,
-                detail=detail,
-            )
-            prompt_messages.append(user_msg)
-
-            # Build assistant message
-            assistant_files = self._rebuild_files(turn.assistant_files) if turn.assistant_files else []
-            assistant_msg = self._build_prompt_message(
-                role="assistant",
-                content=turn.assistant_content,
-                files=assistant_files,
-                detail=detail,
-            )
-            prompt_messages.append(assistant_msg)
-
-        if not prompt_messages:
-            return []
-
-        # Truncate by token limit
-        try:
-            current_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)
-            while current_tokens > max_token_limit and len(prompt_messages) > 1:
-                prompt_messages.pop(0)
-                current_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)
-        except Exception as e:
-            logger.warning("Failed to count tokens for truncation: %s", e)
-
-        return prompt_messages
-
-    def flush(self) -> None:
-        """
-        Persist buffered changes to object storage.
-        Call this at the end of node execution.
-        """
-        if self._dirty:
-            self._save()
-
-    def clear(self) -> None:
-        """Clear all messages in this node's memory."""
-        self._data = NodeMemoryData()
-        self._save()
-
-    def exists(self) -> bool:
-        """Check if node memory exists in storage."""
-        return storage.exists(self._storage_key)
--- a/api/core/memory/token_buffer_memory.py
+++ b/api/core/memory/token_buffer_memory.py
@@ -5,12 +5,12 @@ from sqlalchemy.orm import sessionmaker

 from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
 from core.file import file_manager
-from core.memory.base import BaseMemory
 from core.model_manager import ModelInstance
 from core.model_runtime.entities import (
    AssistantPromptMessage,
    ImagePromptMessageContent,
    PromptMessage,
+    PromptMessageRole,
    TextPromptMessageContent,
    UserPromptMessage,
 )
@@ -24,7 +24,7 @@ from repositories.api_workflow_run_repository import APIWorkflowRunRepository
 from repositories.factory import DifyAPIRepositoryFactory


-class TokenBufferMemory(BaseMemory):
+class TokenBufferMemory:
    def __init__(
        self,
        conversation: Conversation,
@@ -115,14 +115,10 @@ class TokenBufferMemory(BaseMemory):
                return AssistantPromptMessage(content=prompt_message_contents)

    def get_history_prompt_messages(
-        self,
-        *,
-        max_token_limit: int = 2000,
-        message_limit: int | None = None,
+        self, max_token_limit: int = 2000, message_limit: int | None = None
    ) -> Sequence[PromptMessage]:
        """
        Get history prompt messages.
-
        :param max_token_limit: max token limit
        :param message_limit: message limit
        """
@@ -204,3 +200,44 @@ class TokenBufferMemory(BaseMemory):
                curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)

        return prompt_messages
+
+    def get_history_prompt_text(
+        self,
+        human_prefix: str = "Human",
+        ai_prefix: str = "Assistant",
+        max_token_limit: int = 2000,
+        message_limit: int | None = None,
+    ) -> str:
+        """
+        Get history prompt text.
+        :param human_prefix: human prefix
+        :param ai_prefix: ai prefix
+        :param max_token_limit: max token limit
+        :param message_limit: message limit
+        :return:
+        """
+        prompt_messages = self.get_history_prompt_messages(max_token_limit=max_token_limit, message_limit=message_limit)
+
+        string_messages = []
+        for m in prompt_messages:
+            if m.role == PromptMessageRole.USER:
+                role = human_prefix
+            elif m.role == PromptMessageRole.ASSISTANT:
+                role = ai_prefix
+            else:
+                continue
+
+            if isinstance(m.content, list):
+                inner_msg = ""
+                for content in m.content:
+                    if isinstance(content, TextPromptMessageContent):
+                        inner_msg += f"{content.data}\n"
+                    elif isinstance(content, ImagePromptMessageContent):
+                        inner_msg += "[image]\n"
+
+                string_messages.append(f"{role}: {inner_msg.strip()}")
+            else:
+                message = f"{role}: {m.content}"
+                string_messages.append(message)
+
+        return "\n".join(string_messages)
--- a/api/core/prompt/advanced_prompt_transform.py
+++ b/api/core/prompt/advanced_prompt_transform.py
@@ -5,7 +5,7 @@ from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEnti
 from core.file import file_manager
 from core.file.models import File
 from core.helper.code_executor.jinja2.jinja2_formatter import Jinja2Formatter
-from core.memory.base import BaseMemory
+from core.memory.token_buffer_memory import TokenBufferMemory
 from core.model_runtime.entities import (
    AssistantPromptMessage,
    PromptMessage,
@@ -43,7 +43,7 @@ class AdvancedPromptTransform(PromptTransform):
        files: Sequence[File],
        context: str | None,
        memory_config: MemoryConfig | None,
-        memory: BaseMemory | None,
+        memory: TokenBufferMemory | None,
        model_config: ModelConfigWithCredentialsEntity,
        image_detail_config: ImagePromptMessageContent.DETAIL | None = None,
    ) -> list[PromptMessage]:
@@ -84,7 +84,7 @@ class AdvancedPromptTransform(PromptTransform):
        files: Sequence[File],
        context: str | None,
        memory_config: MemoryConfig | None,
-        memory: BaseMemory | None,
+        memory: TokenBufferMemory | None,
        model_config: ModelConfigWithCredentialsEntity,
        image_detail_config: ImagePromptMessageContent.DETAIL | None = None,
    ) -> list[PromptMessage]:
@@ -145,7 +145,7 @@ class AdvancedPromptTransform(PromptTransform):
        files: Sequence[File],
        context: str | None,
        memory_config: MemoryConfig | None,
-        memory: BaseMemory | None,
+        memory: TokenBufferMemory | None,
        model_config: ModelConfigWithCredentialsEntity,
        image_detail_config: ImagePromptMessageContent.DETAIL | None = None,
    ) -> list[PromptMessage]:
@@ -270,7 +270,7 @@ class AdvancedPromptTransform(PromptTransform):

    def _set_histories_variable(
        self,
-        memory: BaseMemory,
+        memory: TokenBufferMemory,
        memory_config: MemoryConfig,
        raw_prompt: str,
        role_prefix: MemoryConfig.RolePrefix,
--- a/api/core/prompt/entities/advanced_prompt_entities.py
+++ b/api/core/prompt/entities/advanced_prompt_entities.py
@@ -1,4 +1,3 @@
-from enum import StrEnum
 from typing import Literal

 from pydantic import BaseModel
@@ -6,13 +5,6 @@ from pydantic import BaseModel
 from core.model_runtime.entities.message_entities import PromptMessageRole


-class MemoryMode(StrEnum):
-    """Memory mode for LLM nodes."""
-
-    CONVERSATION = "conversation"  # Use TokenBufferMemory (default, existing behavior)
-    NODE = "node"  # Use NodeTokenBufferMemory (Chatflow only)
-
-
 class ChatModelMessage(BaseModel):
    """
    Chat Message.
@@ -56,4 +48,3 @@ class MemoryConfig(BaseModel):
    role_prefix: RolePrefix | None = None
    window: WindowConfig
    query_prompt_template: str | None = None
-    mode: MemoryMode = MemoryMode.CONVERSATION
--- a/api/core/prompt/prompt_transform.py
+++ b/api/core/prompt/prompt_transform.py
@@ -1,7 +1,7 @@
 from typing import Any

 from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity
-from core.memory.base import BaseMemory
+from core.memory.token_buffer_memory import TokenBufferMemory
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.message_entities import PromptMessage
 from core.model_runtime.entities.model_entities import ModelPropertyKey
@@ -11,7 +11,7 @@ from core.prompt.entities.advanced_prompt_entities import MemoryConfig
 class PromptTransform:
    def _append_chat_histories(
        self,
-        memory: BaseMemory,
+        memory: TokenBufferMemory,
        memory_config: MemoryConfig,
        prompt_messages: list[PromptMessage],
        model_config: ModelConfigWithCredentialsEntity,
@@ -52,7 +52,7 @@ class PromptTransform:

    def _get_history_messages_from_memory(
        self,
-        memory: BaseMemory,
+        memory: TokenBufferMemory,
        memory_config: MemoryConfig,
        max_token_limit: int,
        human_prefix: str | None = None,
@@ -73,7 +73,7 @@ class PromptTransform:
        return memory.get_history_prompt_text(**kwargs)

    def _get_history_messages_list_from_memory(
-        self, memory: BaseMemory, memory_config: MemoryConfig, max_token_limit: int
+        self, memory: TokenBufferMemory, memory_config: MemoryConfig, max_token_limit: int
    ) -> list[PromptMessage]:
        """Get memory messages."""
        return list(
--- a/api/core/rag/datasource/retrieval_service.py
+++ b/api/core/rag/datasource/retrieval_service.py
@@ -389,15 +389,14 @@ class RetrievalService:
                .all()
            }

-            records = []
-            include_segment_ids = set()
-            segment_child_map = {}
-
            valid_dataset_documents = {}
            image_doc_ids: list[Any] = []
            child_index_node_ids = []
            index_node_ids = []
            doc_to_document_map = {}
+            summary_segment_ids = set()  # Track segments retrieved via summary
+
+            # First pass: collect all document IDs and identify summary documents
            for document in documents:
                document_id = document.metadata.get("document_id")
                if document_id not in dataset_documents:
@@ -408,16 +407,24 @@ class RetrievalService:
                    continue
                valid_dataset_documents[document_id] = dataset_document

+                doc_id = document.metadata.get("doc_id") or ""
+                doc_to_document_map[doc_id] = document
+
+                # Check if this is a summary document
+                is_summary = document.metadata.get("is_summary", False)
+                if is_summary:
+                    # For summary documents, find the original chunk via original_chunk_id
+                    original_chunk_id = document.metadata.get("original_chunk_id")
+                    if original_chunk_id:
+                        summary_segment_ids.add(original_chunk_id)
+                    continue  # Skip adding to other lists for summary documents
+
                if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
-                    doc_id = document.metadata.get("doc_id") or ""
-                    doc_to_document_map[doc_id] = document
                    if document.metadata.get("doc_type") == DocType.IMAGE:
                        image_doc_ids.append(doc_id)
                    else:
                        child_index_node_ids.append(doc_id)
                else:
-                    doc_id = document.metadata.get("doc_id") or ""
-                    doc_to_document_map[doc_id] = document
                    if document.metadata.get("doc_type") == DocType.IMAGE:
                        image_doc_ids.append(doc_id)
                    else:
@@ -433,6 +440,7 @@ class RetrievalService:
            attachment_map: dict[str, list[dict[str, Any]]] = {}
            child_chunk_map: dict[str, list[ChildChunk]] = {}
            doc_segment_map: dict[str, list[str]] = {}
+            segment_summary_map: dict[str, str] = {}  # Map segment_id to summary content

            with session_factory.create_session() as session:
                attachments = cls.get_segment_attachment_infos(image_doc_ids, session)
@@ -447,6 +455,7 @@ class RetrievalService:
                        doc_segment_map[attachment["segment_id"]].append(attachment["attachment_id"])
                    else:
                        doc_segment_map[attachment["segment_id"]] = [attachment["attachment_id"]]
+
                child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id.in_(child_index_node_ids))
                child_index_nodes = session.execute(child_chunk_stmt).scalars().all()

@@ -470,6 +479,7 @@ class RetrievalService:
                    index_node_segments = session.execute(document_segment_stmt).scalars().all()  # type: ignore
                    for index_node_segment in index_node_segments:
                        doc_segment_map[index_node_segment.id] = [index_node_segment.index_node_id]
+
                if segment_ids:
                    document_segment_stmt = select(DocumentSegment).where(
                        DocumentSegment.enabled == True,
@@ -481,6 +491,42 @@ class RetrievalService:
                if index_node_segments:
                    segments.extend(index_node_segments)

+                # Handle summary documents: query segments by original_chunk_id
+                if summary_segment_ids:
+                    summary_segment_ids_list = list(summary_segment_ids)
+                    summary_segment_stmt = select(DocumentSegment).where(
+                        DocumentSegment.enabled == True,
+                        DocumentSegment.status == "completed",
+                        DocumentSegment.id.in_(summary_segment_ids_list),
+                    )
+                    summary_segments = session.execute(summary_segment_stmt).scalars().all()  # type: ignore
+                    segments.extend(summary_segments)
+                    # Add summary segment IDs to segment_ids for summary query
+                    for seg in summary_segments:
+                        if seg.id not in segment_ids:
+                            segment_ids.append(seg.id)
+
+                # Batch query summaries for segments retrieved via summary (only enabled summaries)
+                if summary_segment_ids:
+                    from models.dataset import DocumentSegmentSummary
+
+                    summaries = (
+                        session.query(DocumentSegmentSummary)
+                        .filter(
+                            DocumentSegmentSummary.chunk_id.in_(list(summary_segment_ids)),
+                            DocumentSegmentSummary.status == "completed",
+                            DocumentSegmentSummary.enabled == True,  # Only retrieve enabled summaries
+                        )
+                        .all()
+                    )
+                    for summary in summaries:
+                        if summary.summary_content:
+                            segment_summary_map[summary.chunk_id] = summary.summary_content
+
+            include_segment_ids = set()
+            segment_child_map: dict[str, dict[str, Any]] = {}
+            records: list[dict[str, Any]] = []
+
            for segment in segments:
                child_chunks: list[ChildChunk] = child_chunk_map.get(segment.id, [])
                attachment_infos: list[dict[str, Any]] = attachment_map.get(segment.id, [])
@@ -493,7 +539,7 @@ class RetrievalService:
                            child_chunk_details = []
                            max_score = 0.0
                            for child_chunk in child_chunks:
-                                document = doc_to_document_map[child_chunk.index_node_id]
+                                document = doc_to_document_map.get(child_chunk.index_node_id)
                                child_chunk_detail = {
                                    "id": child_chunk.id,
                                    "content": child_chunk.content,
@@ -503,7 +549,7 @@ class RetrievalService:
                                child_chunk_details.append(child_chunk_detail)
                                max_score = max(max_score, document.metadata.get("score", 0.0) if document else 0.0)
                            for attachment_info in attachment_infos:
-                                file_document = doc_to_document_map[attachment_info["id"]]
+                                file_document = doc_to_document_map.get(attachment_info["id"])
                                max_score = max(
                                    max_score, file_document.metadata.get("score", 0.0) if file_document else 0.0
                                )
@@ -576,9 +622,16 @@ class RetrievalService:
                    else None
                )

+                # Extract summary if this segment was retrieved via summary
+                summary_content = segment_summary_map.get(segment.id)
+
                # Create RetrievalSegments object
                retrieval_segment = RetrievalSegments(
-                    segment=segment, child_chunks=child_chunks_list, score=score, files=files
+                    segment=segment,
+                    child_chunks=child_chunks_list,
+                    score=score,
+                    files=files,
+                    summary=summary_content,
                )
                result.append(retrieval_segment)

--- a/api/core/rag/embedding/retrieval.py
+++ b/api/core/rag/embedding/retrieval.py
@@ -20,3 +20,4 @@ class RetrievalSegments(BaseModel):
    child_chunks: list[RetrievalChildChunk] | None = None
    score: float | None = None
    files: list[dict[str, str | int]] | None = None
+    summary: str | None = None  # Summary content if retrieved via summary index
--- a/Show More
+++ b/Show More