269 lines
10 KiB
SQL
269 lines
10 KiB
SQL
-- =====================================================================
|
|
-- FASE 2: Optimización de Continuous Aggregate Daily
|
|
-- Propósito: Pre-calcular campos de timezone para evitar conversiones costosas
|
|
-- Tiempo estimado: 2-3 horas (incluye refresh inicial)
|
|
-- Mejora esperada: 30-40% adicional
|
|
-- =====================================================================
|
|
|
|
-- IMPORTANTE: Este script recrea el CAGG con campos adicionales
|
|
-- Requiere:
|
|
-- 1. Backup de datos actual
|
|
-- 2. Ventana de mantenimiento (el refresh inicial puede tardar)
|
|
-- 3. Verificar que existe cagg_scanning_report_hourly
|
|
|
|
SET statement_timeout = 0;
|
|
SET lock_timeout = 0;
|
|
|
|
-- =====================================================================
|
|
-- Paso 1: Verificar CAGG hourly existe y está funcionando
|
|
-- =====================================================================
|
|
SELECT
|
|
view_name,
|
|
materialized_only,
|
|
compression_enabled
|
|
FROM timescaledb_information.continuous_aggregates
|
|
WHERE view_name = 'cagg_scanning_report_hourly';
|
|
|
|
-- Si no existe, DETENER AQUÍ y crearlo primero
|
|
-- (Ver script 02a_CREATE_CAGG_HOURLY.sql)
|
|
|
|
-- =====================================================================
|
|
-- Paso 2: Crear política de retención para el CAGG actual (backup)
|
|
-- =====================================================================
|
|
-- Esto asegura que podamos revertir si algo sale mal
|
|
|
|
CREATE TABLE IF NOT EXISTS backup_cagg_scanning_report_daily AS
|
|
SELECT * FROM cagg_scanning_report_daily
|
|
LIMIT 0; -- Solo estructura, sin datos (para referencia)
|
|
|
|
-- =====================================================================
|
|
-- Paso 3: Eliminar política de refresh del CAGG actual
|
|
-- =====================================================================
|
|
SELECT remove_continuous_aggregate_policy('cagg_scanning_report_daily', if_not_exists => true);
|
|
|
|
-- =====================================================================
|
|
-- Paso 4: Eliminar CAGG actual
|
|
-- =====================================================================
|
|
DROP MATERIALIZED VIEW IF EXISTS cagg_scanning_report_daily CASCADE;
|
|
|
|
-- =====================================================================
|
|
-- Paso 5: Crear nuevo CAGG optimizado con campos pre-calculados
|
|
-- =====================================================================
|
|
CREATE MATERIALIZED VIEW cagg_scanning_report_daily
|
|
WITH (timescaledb.continuous) AS
|
|
SELECT
|
|
-- ✅ Bucket diario
|
|
time_bucket('1 day', "CreationTime") AS "CreationDate",
|
|
|
|
-- ✅ Pre-calcular fecha local
|
|
DATE("CreationTime" AT TIME ZONE 'America/Mexico_City') AS "LocalDate",
|
|
|
|
-- ✅ Agrupación
|
|
"ClientMac",
|
|
"NetworkId",
|
|
"NearestApMac",
|
|
|
|
-- ✅ Datos del dispositivo (tomar el primer valor del día)
|
|
FIRST("Manufacturer", "CreationTime") AS "Manufacturer",
|
|
FIRST("OS", "CreationTime") AS "OS",
|
|
FIRST("SSID", "CreationTime") AS "SSID",
|
|
|
|
-- ✅ CORRECCIÓN: Métricas agregadas correctamente
|
|
MIN("MinimumRssi") AS "MinimumRssi",
|
|
MAX("MaximumRssi") AS "MaximumRssi",
|
|
|
|
-- ✅ CRÍTICO: Promedio ponderado correcto
|
|
SUM("RssiSum") / NULLIF(SUM("DetectionsCount"), 0) AS "AverageRssi",
|
|
|
|
-- ✅ CRÍTICO: Duración total del día
|
|
EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 AS "DurationInMinutes",
|
|
|
|
-- ✅ Fechas min/max
|
|
MIN("FirstDetection") AS "FirstDetection",
|
|
MAX("LastDetection") AS "LastDetection",
|
|
MIN("LocalFirstDetection") AS "LocalFirstDetection",
|
|
MAX("LocalLastDetection") AS "LocalLastDetection",
|
|
|
|
-- ✅ Conteos
|
|
SUM("DetectionsCount") AS "DetectionsCount",
|
|
COUNT(*) AS "HourlyPeriodsDetected",
|
|
|
|
-- ✅ NUEVOS CAMPOS: Pre-calcular timezone conversions
|
|
EXTRACT(dow FROM MIN("LocalFirstDetection")) AS "DayOfWeek",
|
|
EXTRACT(day FROM MIN("LocalFirstDetection")) AS "DayNumber",
|
|
EXTRACT(week FROM MIN("LocalFirstDetection")) AS "WeekNumber",
|
|
EXTRACT(month FROM MIN("LocalFirstDetection")) AS "MonthNumber",
|
|
EXTRACT(year FROM MIN("LocalFirstDetection")) AS "Year",
|
|
to_char(MIN("LocalFirstDetection"), 'Day') AS "DayName",
|
|
to_char(MIN("LocalFirstDetection"), 'Month') AS "MonthName",
|
|
|
|
-- ✅ CRÍTICO: Clasificación basada en métricas correctas
|
|
CASE
|
|
WHEN SUM("RssiSum") / NULLIF(SUM("DetectionsCount"), 0) > -60
|
|
AND EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 >= 5 THEN 'Visitor'
|
|
WHEN SUM("RssiSum") / NULLIF(SUM("DetectionsCount"), 0) <= -60 THEN 'PasserBy'
|
|
ELSE 'Unclassified'
|
|
END AS "PersonType",
|
|
|
|
-- ✅ Categoría de presencia
|
|
CASE
|
|
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 < 1 THEN 'Less than 1 minute'
|
|
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 1 AND 5 THEN '1-5 minutes'
|
|
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 5 AND 15 THEN '5-15 minutes'
|
|
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 15 AND 30 THEN '15-30 minutes'
|
|
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 30 AND 60 THEN '30-60 minutes'
|
|
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 BETWEEN 60 AND 120 THEN '1-2 hours'
|
|
WHEN EXTRACT(EPOCH FROM (MAX("LastDetection") - MIN("FirstDetection"))) / 60 > 120 THEN 'More than 2 hours'
|
|
ELSE NULL
|
|
END AS "PresenceCategory"
|
|
|
|
FROM cagg_scanning_report_hourly
|
|
WHERE "CreationTime" < date_trunc('day', now())
|
|
GROUP BY
|
|
time_bucket('1 day', "CreationTime"),
|
|
DATE("CreationTime" AT TIME ZONE 'America/Mexico_City'),
|
|
"ClientMac",
|
|
"NetworkId",
|
|
"NearestApMac"
|
|
ORDER BY 1 DESC;
|
|
|
|
-- =====================================================================
|
|
-- Paso 6: Crear índices en el CAGG optimizado
|
|
-- =====================================================================
|
|
-- Estos índices mejoran las queries contra el CAGG directamente
|
|
|
|
CREATE INDEX CONCURRENTLY idx_cagg_daily_creation_date
|
|
ON cagg_scanning_report_daily ("CreationDate" DESC);
|
|
|
|
CREATE INDEX CONCURRENTLY idx_cagg_daily_local_date
|
|
ON cagg_scanning_report_daily ("LocalDate" DESC);
|
|
|
|
CREATE INDEX CONCURRENTLY idx_cagg_daily_network_date
|
|
ON cagg_scanning_report_daily ("NetworkId", "LocalDate" DESC);
|
|
|
|
CREATE INDEX CONCURRENTLY idx_cagg_daily_client_mac
|
|
ON cagg_scanning_report_daily ("ClientMac");
|
|
|
|
CREATE INDEX CONCURRENTLY idx_cagg_daily_person_type
|
|
ON cagg_scanning_report_daily ("PersonType")
|
|
WHERE "PersonType" IS NOT NULL;
|
|
|
|
-- =====================================================================
|
|
-- Paso 7: Aplicar política de refresh optimizada
|
|
-- =====================================================================
|
|
-- Refresh cada 15 minutos, procesando últimos 3 días
|
|
|
|
SELECT add_continuous_aggregate_policy('cagg_scanning_report_daily',
|
|
start_offset => INTERVAL '3 day',
|
|
end_offset => INTERVAL '1 day',
|
|
schedule_interval => INTERVAL '15 minutes'
|
|
);
|
|
|
|
-- =====================================================================
|
|
-- Paso 8: Trigger manual del refresh inicial
|
|
-- =====================================================================
|
|
-- ADVERTENCIA: Esto puede tardar mucho tiempo (10-60 minutos)
|
|
-- Monitorear con: SELECT * FROM timescaledb_information.job_stats;
|
|
|
|
CALL refresh_continuous_aggregate('cagg_scanning_report_daily', NULL, NULL);
|
|
|
|
-- =====================================================================
|
|
-- Paso 9: Actualizar estadísticas
|
|
-- =====================================================================
|
|
ANALYZE cagg_scanning_report_daily;
|
|
|
|
-- =====================================================================
|
|
-- Paso 10: Verificar datos del nuevo CAGG
|
|
-- =====================================================================
|
|
SELECT
|
|
'Total registros' AS metric,
|
|
COUNT(*) AS value
|
|
FROM cagg_scanning_report_daily
|
|
|
|
UNION ALL
|
|
|
|
SELECT
|
|
'Rango de fechas' AS metric,
|
|
MIN("LocalDate")::text || ' a ' || MAX("LocalDate")::text AS value
|
|
FROM cagg_scanning_report_daily
|
|
|
|
UNION ALL
|
|
|
|
SELECT
|
|
'Redes únicas' AS metric,
|
|
COUNT(DISTINCT "NetworkId")::text AS value
|
|
FROM cagg_scanning_report_daily
|
|
|
|
UNION ALL
|
|
|
|
SELECT
|
|
'Tipos de persona' AS metric,
|
|
string_agg(DISTINCT "PersonType", ', ') AS value
|
|
FROM cagg_scanning_report_daily;
|
|
|
|
-- =====================================================================
|
|
-- Paso 11: Comparar rendimiento (opcional)
|
|
-- =====================================================================
|
|
-- Query de ejemplo para medir mejora
|
|
|
|
EXPLAIN ANALYZE
|
|
SELECT
|
|
"NetworkId",
|
|
"LocalDate",
|
|
COUNT(*) AS detections,
|
|
SUM("DetectionsCount") AS total_count,
|
|
AVG("DurationInMinutes") AS avg_duration
|
|
FROM cagg_scanning_report_daily
|
|
WHERE "LocalDate" BETWEEN CURRENT_DATE - INTERVAL '30 days' AND CURRENT_DATE
|
|
AND "NetworkId" IN (1, 2, 3, 4, 5)
|
|
AND "PersonType" = 'Visitor'
|
|
GROUP BY "NetworkId", "LocalDate"
|
|
ORDER BY "LocalDate" DESC;
|
|
|
|
-- =====================================================================
|
|
-- Paso 12: Monitoreo de jobs de TimescaleDB
|
|
-- =====================================================================
|
|
SELECT
|
|
job_id,
|
|
application_name,
|
|
schedule_interval,
|
|
last_run_status,
|
|
last_run_started_at,
|
|
last_successful_finish,
|
|
next_start
|
|
FROM timescaledb_information.job_stats
|
|
WHERE application_name LIKE '%cagg_scanning_report_daily%';
|
|
|
|
-- =====================================================================
|
|
-- ROLLBACK PLAN (si algo sale mal)
|
|
-- =====================================================================
|
|
/*
|
|
-- 1. Eliminar nuevo CAGG
|
|
DROP MATERIALIZED VIEW IF EXISTS cagg_scanning_report_daily CASCADE;
|
|
|
|
-- 2. Restaurar CAGG original
|
|
CREATE MATERIALIZED VIEW cagg_scanning_report_daily
|
|
WITH (timescaledb.continuous) AS
|
|
... (usar definición original de cagg_scanning_report_daily.sql)
|
|
|
|
-- 3. Re-aplicar política
|
|
SELECT add_continuous_aggregate_policy('cagg_scanning_report_daily',
|
|
start_offset => INTERVAL '3 day',
|
|
end_offset => INTERVAL '1 day',
|
|
schedule_interval => INTERVAL '15 minutes'
|
|
);
|
|
|
|
-- 4. Refresh manual
|
|
CALL refresh_continuous_aggregate('cagg_scanning_report_daily', NULL, NULL);
|
|
*/
|
|
|
|
-- =====================================================================
|
|
-- PRÓXIMO PASO: Actualizar vista scanning_report_daily_full
|
|
-- =====================================================================
|
|
-- Ver script: 03_OPTIMIZE_FULL_VIEW_PHASE2.sql
|
|
-- Ese script actualizará la vista para usar los nuevos campos pre-calculados
|
|
|
|
-- =====================================================================
|
|
-- FIN DE FASE 2 - PARTE 1 (CAGG)
|
|
-- =====================================================================
|