Files
Temp_MSSPLASHPage/SQL/Scanning/02_OPTIMIZE_CAGG_DAILY_PHASE2.sql
Jose Andres 5b3d334ac7 changes: - Default admin password
- Materialized view optimization tips
2025-10-21 14:19:02 -06:00

269 lines
10 KiB
SQL

-- =====================================================================
-- FASE 2: Optimización de Continuous Aggregate Daily
-- Propósito: Pre-calcular campos de timezone para evitar conversiones costosas
-- Tiempo estimado: 2-3 horas (incluye refresh inicial)
-- Mejora esperada: 30-40% adicional
-- =====================================================================
-- IMPORTANTE: Este script recrea el CAGG con campos adicionales
-- Requiere:
-- 1. Backup de datos actual
-- 2. Ventana de mantenimiento (el refresh inicial puede tardar)
-- 3. Verificar que existe cagg_scanning_report_hourly
SET statement_timeout = 0;
SET lock_timeout = 0;
-- =====================================================================
-- Paso 1: Verificar CAGG hourly existe y está funcionando
-- =====================================================================
SELECT
view_name,
materialized_only,
compression_enabled
FROM timescaledb_information.continuous_aggregates
WHERE view_name = 'cagg_scanning_report_hourly';
-- Si no existe, DETENER AQUÍ y crearlo primero
-- (Ver script 02a_CREATE_CAGG_HOURLY.sql)
-- =====================================================================
-- Paso 2: Crear política de retención para el CAGG actual (backup)
-- =====================================================================
-- Esto asegura que podamos revertir si algo sale mal
CREATE TABLE IF NOT EXISTS backup_cagg_scanning_report_daily AS
SELECT * FROM cagg_scanning_report_daily
LIMIT 0; -- Solo estructura, sin datos (para referencia)
-- =====================================================================
-- Paso 3: Eliminar política de refresh del CAGG actual
-- =====================================================================
SELECT remove_continuous_aggregate_policy('cagg_scanning_report_daily', if_not_exists => true);
-- =====================================================================
-- Paso 4: Eliminar CAGG actual
-- =====================================================================
DROP MATERIALIZED VIEW IF EXISTS cagg_scanning_report_daily CASCADE;
-- =====================================================================
-- Paso 5: Crear nuevo CAGG optimizado con campos pre-calculados
-- =====================================================================
CREATE MATERIALIZED VIEW cagg_scanning_report_daily
WITH (timescaledb.continuous) AS
SELECT
-- ✅ Bucket diario
time_bucket('1 day', "CreationTime") AS "CreationDate",
-- ✅ Pre-calcular fecha local
DATE("CreationTime" AT TIME ZONE 'America/Mexico_City') AS "LocalDate",
-- ✅ Agrupación
"ClientMac",
"NetworkId",
"NearestApMac",
-- ✅ Datos del dispositivo (tomar el primer valor del día)
FIRST("Manufacturer", "CreationTime") AS "Manufacturer",
FIRST("OS", "CreationTime") AS "OS",
FIRST("SSID", "CreationTime") AS "SSID",
-- ✅ CORRECCIÓN: Métricas agregadas correctamente
MIN("MinimumRssi") AS "MinimumRssi",
MAX("MaximumRssi") AS "MaximumRssi",
-- ✅ CRÍTICO: Promedio ponderado correcto
SUM("RssiSum") / NULLIF(SUM("DetectionsCount"), 0) AS "AverageRssi",
-- ✅ CRÍTICO: Duración total del día
EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 AS "DurationInMinutes",
-- ✅ Fechas min/max
MIN("FirstDetection") AS "FirstDetection",
MAX("LastDetection") AS "LastDetection",
MIN("LocalFirstDetection") AS "LocalFirstDetection",
MAX("LocalLastDetection") AS "LocalLastDetection",
-- ✅ Conteos
SUM("DetectionsCount") AS "DetectionsCount",
COUNT(*) AS "HourlyPeriodsDetected",
-- ✅ NUEVOS CAMPOS: Pre-calcular timezone conversions
EXTRACT(dow FROM MIN("LocalFirstDetection")) AS "DayOfWeek",
EXTRACT(day FROM MIN("LocalFirstDetection")) AS "DayNumber",
EXTRACT(week FROM MIN("LocalFirstDetection")) AS "WeekNumber",
EXTRACT(month FROM MIN("LocalFirstDetection")) AS "MonthNumber",
EXTRACT(year FROM MIN("LocalFirstDetection")) AS "Year",
to_char(MIN("LocalFirstDetection"), 'Day') AS "DayName",
to_char(MIN("LocalFirstDetection"), 'Month') AS "MonthName",
-- ✅ CRÍTICO: Clasificación basada en métricas correctas
CASE
WHEN SUM("RssiSum") / NULLIF(SUM("DetectionsCount"), 0) > -60
AND EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 >= 5 THEN 'Visitor'
WHEN SUM("RssiSum") / NULLIF(SUM("DetectionsCount"), 0) <= -60 THEN 'PasserBy'
ELSE 'Unclassified'
END AS "PersonType",
-- ✅ Categoría de presencia
CASE
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 < 1 THEN 'Less than 1 minute'
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 1 AND 5 THEN '1-5 minutes'
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 5 AND 15 THEN '5-15 minutes'
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 15 AND 30 THEN '15-30 minutes'
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 30 AND 60 THEN '30-60 minutes'
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 60 AND 120 THEN '1-2 hours'
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 > 120 THEN 'More than 2 hours'
ELSE NULL
END AS "PresenceCategory"
FROM cagg_scanning_report_hourly
WHERE "CreationTime" < date_trunc('day', now())
GROUP BY
time_bucket('1 day', "CreationTime"),
DATE("CreationTime" AT TIME ZONE 'America/Mexico_City'),
"ClientMac",
"NetworkId",
"NearestApMac"
ORDER BY 1 DESC;
-- =====================================================================
-- Paso 6: Crear índices en el CAGG optimizado
-- =====================================================================
-- Estos índices mejoran las queries contra el CAGG directamente
CREATE INDEX CONCURRENTLY idx_cagg_daily_creation_date
ON cagg_scanning_report_daily ("CreationDate" DESC);
CREATE INDEX CONCURRENTLY idx_cagg_daily_local_date
ON cagg_scanning_report_daily ("LocalDate" DESC);
CREATE INDEX CONCURRENTLY idx_cagg_daily_network_date
ON cagg_scanning_report_daily ("NetworkId", "LocalDate" DESC);
CREATE INDEX CONCURRENTLY idx_cagg_daily_client_mac
ON cagg_scanning_report_daily ("ClientMac");
CREATE INDEX CONCURRENTLY idx_cagg_daily_person_type
ON cagg_scanning_report_daily ("PersonType")
WHERE "PersonType" IS NOT NULL;
-- =====================================================================
-- Paso 7: Aplicar política de refresh optimizada
-- =====================================================================
-- Refresh cada 15 minutos, procesando últimos 3 días
SELECT add_continuous_aggregate_policy('cagg_scanning_report_daily',
start_offset => INTERVAL '3 day',
end_offset => INTERVAL '1 day',
schedule_interval => INTERVAL '15 minutes'
);
-- =====================================================================
-- Paso 8: Trigger manual del refresh inicial
-- =====================================================================
-- ADVERTENCIA: Esto puede tardar mucho tiempo (10-60 minutos)
-- Monitorear con: SELECT * FROM timescaledb_information.job_stats;
CALL refresh_continuous_aggregate('cagg_scanning_report_daily', NULL, NULL);
-- =====================================================================
-- Paso 9: Actualizar estadísticas
-- =====================================================================
ANALYZE cagg_scanning_report_daily;
-- =====================================================================
-- Paso 10: Verificar datos del nuevo CAGG
-- =====================================================================
SELECT
'Total registros' AS metric,
COUNT(*) AS value
FROM cagg_scanning_report_daily
UNION ALL
SELECT
'Rango de fechas' AS metric,
MIN("LocalDate")::text || ' a ' || MAX("LocalDate")::text AS value
FROM cagg_scanning_report_daily
UNION ALL
SELECT
'Redes únicas' AS metric,
COUNT(DISTINCT "NetworkId")::text AS value
FROM cagg_scanning_report_daily
UNION ALL
SELECT
'Tipos de persona' AS metric,
string_agg(DISTINCT "PersonType", ', ') AS value
FROM cagg_scanning_report_daily;
-- =====================================================================
-- Paso 11: Comparar rendimiento (opcional)
-- =====================================================================
-- Query de ejemplo para medir mejora
EXPLAIN ANALYZE
SELECT
"NetworkId",
"LocalDate",
COUNT(*) AS detections,
SUM("DetectionsCount") AS total_count,
AVG("DurationInMinutes") AS avg_duration
FROM cagg_scanning_report_daily
WHERE "LocalDate" BETWEEN CURRENT_DATE - INTERVAL '30 days' AND CURRENT_DATE
AND "NetworkId" IN (1, 2, 3, 4, 5)
AND "PersonType" = 'Visitor'
GROUP BY "NetworkId", "LocalDate"
ORDER BY "LocalDate" DESC;
-- =====================================================================
-- Paso 12: Monitoreo de jobs de TimescaleDB
-- =====================================================================
SELECT
job_id,
application_name,
schedule_interval,
last_run_status,
last_run_started_at,
last_successful_finish,
next_start
FROM timescaledb_information.job_stats
WHERE application_name LIKE '%cagg_scanning_report_daily%';
-- =====================================================================
-- ROLLBACK PLAN (si algo sale mal)
-- =====================================================================
/*
-- 1. Eliminar nuevo CAGG
DROP MATERIALIZED VIEW IF EXISTS cagg_scanning_report_daily CASCADE;
-- 2. Restaurar CAGG original
CREATE MATERIALIZED VIEW cagg_scanning_report_daily
WITH (timescaledb.continuous) AS
... (usar definición original de cagg_scanning_report_daily.sql)
-- 3. Re-aplicar política
SELECT add_continuous_aggregate_policy('cagg_scanning_report_daily',
start_offset => INTERVAL '3 day',
end_offset => INTERVAL '1 day',
schedule_interval => INTERVAL '15 minutes'
);
-- 4. Refresh manual
CALL refresh_continuous_aggregate('cagg_scanning_report_daily', NULL, NULL);
*/
-- =====================================================================
-- PRÓXIMO PASO: Actualizar vista scanning_report_daily_full
-- =====================================================================
-- Ver script: 03_OPTIMIZE_FULL_VIEW_PHASE2.sql
-- Ese script actualizará la vista para usar los nuevos campos pre-calculados
-- =====================================================================
-- FIN DE FASE 2 - PARTE 1 (CAGG)
-- =====================================================================