Añadir kit_eda
This commit is contained in:
parent
ad396d79c8
commit
dd433d5ab2
140
kit_eda
Normal file
140
kit_eda
Normal file
@ -0,0 +1,140 @@
|
||||
/* ==========================================================
|
||||
KIT EDA POSTGRESQL – 15 QUERIES CLAVE
|
||||
Autor: NeoAnd
|
||||
|
||||
📌 Instrucciones rápidas:
|
||||
|
||||
-- Ejecuta:
|
||||
SELECT run_eda_auto_export('public','clientes');
|
||||
|
||||
-- Luego en DBeaver:
|
||||
-- Busca en el navegador las tablas temporales que empiezan con "tmp_"
|
||||
-- Ej: tmp_total_rows, tmp_nulls, tmp_top10_micolumna, tmp_stats_micolumna, tmp_time_midate, tmp_corr
|
||||
|
||||
Ejecuta por bloque (Cmd+Enter en Mac, Ctrl+Enter en Windows).
|
||||
========================================================== */
|
||||
|
||||
|
||||
/* ==========================================================
|
||||
Función EDA Autoadaptativa con Export
|
||||
Autor: Anderson + Copilot
|
||||
Uso: SELECT run_eda_auto_export('schema','tabla');
|
||||
========================================================== */
|
||||
|
||||
CREATE OR REPLACE FUNCTION run_eda_auto_export(p_schema TEXT, p_table TEXT)
|
||||
RETURNS void AS
|
||||
|
||||
|
||||
DECLARE
|
||||
r RECORD;
|
||||
col_num TEXT[];
|
||||
col_cat TEXT[];
|
||||
col_date TEXT[];
|
||||
sql_cmd TEXT;
|
||||
BEGIN
|
||||
-- 🔍 Clasificación automática de columnas
|
||||
SELECT array_agg(column_name) FILTER (WHERE data_type LIKE '%int%' OR data_type LIKE '%numeric%' OR data_type LIKE '%double%')
|
||||
INTO col_num
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = p_schema AND table_name = p_table;
|
||||
|
||||
SELECT array_agg(column_name) FILTER (WHERE data_type IN ('character varying','text','uuid'))
|
||||
INTO col_cat
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = p_schema AND table_name = p_table;
|
||||
|
||||
SELECT array_agg(column_name) FILTER (WHERE data_type LIKE '%date%' OR data_type LIKE '%timestamp%')
|
||||
INTO col_date
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = p_schema AND table_name = p_table;
|
||||
|
||||
-- 📊 1. Conteo total
|
||||
EXECUTE format('DROP TABLE IF EXISTS tmp_total_rows;
|
||||
CREATE TEMP TABLE tmp_total_rows AS
|
||||
SELECT COUNT(*) AS total FROM %I.%I',
|
||||
p_schema, p_table);
|
||||
|
||||
-- 📊 2. Nulos por columna
|
||||
sql_cmd := '';
|
||||
FOR r IN
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = p_schema AND table_name = p_table
|
||||
LOOP
|
||||
sql_cmd := sql_cmd || format(
|
||||
'SELECT %L AS columna, COUNT(*) AS nulls FROM %I.%I WHERE %I IS NULL UNION ALL ',
|
||||
r.column_name, p_schema, p_table, r.column_name
|
||||
);
|
||||
END LOOP;
|
||||
sql_cmd := left(sql_cmd, length(sql_cmd)-11); -- quitar último UNION ALL
|
||||
EXECUTE format('DROP TABLE IF EXISTS tmp_nulls;
|
||||
CREATE TEMP TABLE tmp_nulls AS %s', sql_cmd);
|
||||
|
||||
-- 📊 3. Top 10 valores más frecuentes por columna categórica
|
||||
IF col_cat IS NOT NULL THEN
|
||||
FOREACH r IN ARRAY col_cat LOOP
|
||||
EXECUTE format(
|
||||
'DROP TABLE IF EXISTS tmp_top10_%I;
|
||||
CREATE TEMP TABLE tmp_top10_%I AS
|
||||
SELECT %I, COUNT(*) AS freq
|
||||
FROM %I.%I
|
||||
GROUP BY %I
|
||||
ORDER BY freq DESC
|
||||
LIMIT 10',
|
||||
r, r, r, p_schema, p_table, r
|
||||
);
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- 📊 4. Estadísticas descriptivas de columnas numéricas
|
||||
IF col_num IS NOT NULL THEN
|
||||
FOREACH r IN ARRAY col_num LOOP
|
||||
EXECUTE format(
|
||||
'DROP TABLE IF EXISTS tmp_stats_%I;
|
||||
CREATE TEMP TABLE tmp_stats_%I AS
|
||||
SELECT MIN(%I) AS minimo,
|
||||
MAX(%I) AS maximo,
|
||||
AVG(%I) AS media,
|
||||
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY %I) AS mediana
|
||||
FROM %I.%I',
|
||||
r, r, r, r, r, r, r, p_schema, p_table
|
||||
);
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- 📊 5. Distribución temporal por columnas de fecha
|
||||
IF col_date IS NOT NULL THEN
|
||||
FOREACH r IN ARRAY col_date LOOP
|
||||
EXECUTE format(
|
||||
'DROP TABLE IF EXISTS tmp_time_%I;
|
||||
CREATE TEMP TABLE tmp_time_%I AS
|
||||
SELECT date_trunc(''day'', %I) AS dia, COUNT(*)
|
||||
FROM %I.%I
|
||||
GROUP BY dia
|
||||
ORDER BY dia',
|
||||
r, r, r, p_schema, p_table
|
||||
);
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- 📊 6. Correlaciones numéricas
|
||||
IF array_length(col_num,1) >= 2 THEN
|
||||
EXECUTE 'DROP TABLE IF EXISTS tmp_corr;
|
||||
CREATE TEMP TABLE tmp_corr (col_x TEXT, col_y TEXT, correlacion NUMERIC)';
|
||||
FOR i IN 1..array_length(col_num,1)-1 LOOP
|
||||
FOR j IN i+1..array_length(col_num,1) LOOP
|
||||
EXECUTE format(
|
||||
'INSERT INTO tmp_corr
|
||||
SELECT %L, %L, corr(%I,%I)
|
||||
FROM %I.%I',
|
||||
col_num[i], col_num[j], col_num[i], col_num[j], p_schema, p_table
|
||||
);
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
RAISE NOTICE '✅ Tablas temporales creadas. Revísalas en el navegador de DBeaver (prefijo tmp_)';
|
||||
END;
|
||||
|
||||
|
||||
LANGUAGE plpgsql;
|
||||
Loading…
Reference in New Issue
Block a user