Añadir kit_eda

This commit is contained in:
Neo And 2025-08-23 01:09:18 -06:00
parent ad396d79c8
commit dd433d5ab2

140
kit_eda Normal file
View File

@ -0,0 +1,140 @@
/* ==========================================================
KIT EDA POSTGRESQL 15 QUERIES CLAVE
Autor: NeoAnd
📌 Instrucciones rápidas:
-- Ejecuta:
SELECT run_eda_auto_export('public','clientes');
-- Luego en DBeaver:
-- Busca en el navegador las tablas temporales que empiezan con "tmp_"
-- Ej: tmp_total_rows, tmp_nulls, tmp_top10_micolumna, tmp_stats_micolumna, tmp_time_midate, tmp_corr
Ejecuta por bloque (Cmd+Enter en Mac, Ctrl+Enter en Windows).
========================================================== */
/* ==========================================================
Función EDA Autoadaptativa con Export
Autor: Anderson + Copilot
Uso: SELECT run_eda_auto_export('schema','tabla');
========================================================== */
CREATE OR REPLACE FUNCTION run_eda_auto_export(p_schema TEXT, p_table TEXT)
RETURNS void AS
DECLARE
r RECORD;
col_num TEXT[];
col_cat TEXT[];
col_date TEXT[];
sql_cmd TEXT;
BEGIN
-- 🔍 Clasificación automática de columnas
SELECT array_agg(column_name) FILTER (WHERE data_type LIKE '%int%' OR data_type LIKE '%numeric%' OR data_type LIKE '%double%')
INTO col_num
FROM information_schema.columns
WHERE table_schema = p_schema AND table_name = p_table;
SELECT array_agg(column_name) FILTER (WHERE data_type IN ('character varying','text','uuid'))
INTO col_cat
FROM information_schema.columns
WHERE table_schema = p_schema AND table_name = p_table;
SELECT array_agg(column_name) FILTER (WHERE data_type LIKE '%date%' OR data_type LIKE '%timestamp%')
INTO col_date
FROM information_schema.columns
WHERE table_schema = p_schema AND table_name = p_table;
-- 📊 1. Conteo total
EXECUTE format('DROP TABLE IF EXISTS tmp_total_rows;
CREATE TEMP TABLE tmp_total_rows AS
SELECT COUNT(*) AS total FROM %I.%I',
p_schema, p_table);
-- 📊 2. Nulos por columna
sql_cmd := '';
FOR r IN
SELECT column_name
FROM information_schema.columns
WHERE table_schema = p_schema AND table_name = p_table
LOOP
sql_cmd := sql_cmd || format(
'SELECT %L AS columna, COUNT(*) AS nulls FROM %I.%I WHERE %I IS NULL UNION ALL ',
r.column_name, p_schema, p_table, r.column_name
);
END LOOP;
sql_cmd := left(sql_cmd, length(sql_cmd)-11); -- quitar último UNION ALL
EXECUTE format('DROP TABLE IF EXISTS tmp_nulls;
CREATE TEMP TABLE tmp_nulls AS %s', sql_cmd);
-- 📊 3. Top 10 valores más frecuentes por columna categórica
IF col_cat IS NOT NULL THEN
FOREACH r IN ARRAY col_cat LOOP
EXECUTE format(
'DROP TABLE IF EXISTS tmp_top10_%I;
CREATE TEMP TABLE tmp_top10_%I AS
SELECT %I, COUNT(*) AS freq
FROM %I.%I
GROUP BY %I
ORDER BY freq DESC
LIMIT 10',
r, r, r, p_schema, p_table, r
);
END LOOP;
END IF;
-- 📊 4. Estadísticas descriptivas de columnas numéricas
IF col_num IS NOT NULL THEN
FOREACH r IN ARRAY col_num LOOP
EXECUTE format(
'DROP TABLE IF EXISTS tmp_stats_%I;
CREATE TEMP TABLE tmp_stats_%I AS
SELECT MIN(%I) AS minimo,
MAX(%I) AS maximo,
AVG(%I) AS media,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY %I) AS mediana
FROM %I.%I',
r, r, r, r, r, r, r, p_schema, p_table
);
END LOOP;
END IF;
-- 📊 5. Distribución temporal por columnas de fecha
IF col_date IS NOT NULL THEN
FOREACH r IN ARRAY col_date LOOP
EXECUTE format(
'DROP TABLE IF EXISTS tmp_time_%I;
CREATE TEMP TABLE tmp_time_%I AS
SELECT date_trunc(''day'', %I) AS dia, COUNT(*)
FROM %I.%I
GROUP BY dia
ORDER BY dia',
r, r, r, p_schema, p_table
);
END LOOP;
END IF;
-- 📊 6. Correlaciones numéricas
IF array_length(col_num,1) >= 2 THEN
EXECUTE 'DROP TABLE IF EXISTS tmp_corr;
CREATE TEMP TABLE tmp_corr (col_x TEXT, col_y TEXT, correlacion NUMERIC)';
FOR i IN 1..array_length(col_num,1)-1 LOOP
FOR j IN i+1..array_length(col_num,1) LOOP
EXECUTE format(
'INSERT INTO tmp_corr
SELECT %L, %L, corr(%I,%I)
FROM %I.%I',
col_num[i], col_num[j], col_num[i], col_num[j], p_schema, p_table
);
END LOOP;
END LOOP;
END IF;
RAISE NOTICE '✅ Tablas temporales creadas. Revísalas en el navegador de DBeaver (prefijo tmp_)';
END;
LANGUAGE plpgsql;