/* ========================================================== KIT EDA POSTGRESQL – 15 QUERIES CLAVE Autor: NeoAnd 📌 Instrucciones rápidas: -- Ejecuta: SELECT run_eda_auto_export('public','clientes'); -- Luego en DBeaver: -- Busca en el navegador las tablas temporales que empiezan con "tmp_" -- Ej: tmp_total_rows, tmp_nulls, tmp_top10_micolumna, tmp_stats_micolumna, tmp_time_midate, tmp_corr Ejecuta por bloque (Cmd+Enter en Mac, Ctrl+Enter en Windows). ========================================================== */ CREATE OR REPLACE FUNCTION run_eda_auto_export(p_schema TEXT, p_table TEXT) RETURNS void AS DECLARE r RECORD; col_num TEXT[]; col_cat TEXT[]; col_date TEXT[]; sql_cmd TEXT; BEGIN -- 🔍 Clasificación automática de columnas SELECT array_agg(column_name) FILTER (WHERE data_type LIKE '%int%' OR data_type LIKE '%numeric%' OR data_type LIKE '%double%') INTO col_num FROM information_schema.columns WHERE table_schema = p_schema AND table_name = p_table; SELECT array_agg(column_name) FILTER (WHERE data_type IN ('character varying','text','uuid')) INTO col_cat FROM information_schema.columns WHERE table_schema = p_schema AND table_name = p_table; SELECT array_agg(column_name) FILTER (WHERE data_type LIKE '%date%' OR data_type LIKE '%timestamp%') INTO col_date FROM information_schema.columns WHERE table_schema = p_schema AND table_name = p_table; -- 📊 1. Conteo total EXECUTE format('DROP TABLE IF EXISTS tmp_total_rows; CREATE TEMP TABLE tmp_total_rows AS SELECT COUNT(*) AS total FROM %I.%I', p_schema, p_table); -- 📊 2. Nulos por columna sql_cmd := ''; FOR r IN SELECT column_name FROM information_schema.columns WHERE table_schema = p_schema AND table_name = p_table LOOP sql_cmd := sql_cmd || format( 'SELECT %L AS columna, COUNT(*) AS nulls FROM %I.%I WHERE %I IS NULL UNION ALL ', r.column_name, p_schema, p_table, r.column_name ); END LOOP; sql_cmd := left(sql_cmd, length(sql_cmd)-11); -- quitar último UNION ALL EXECUTE format('DROP TABLE IF EXISTS tmp_nulls; CREATE TEMP TABLE tmp_nulls AS %s', sql_cmd); -- 📊 3. Top 10 valores más frecuentes por columna categórica IF col_cat IS NOT NULL THEN FOREACH r IN ARRAY col_cat LOOP EXECUTE format( 'DROP TABLE IF EXISTS tmp_top10_%I; CREATE TEMP TABLE tmp_top10_%I AS SELECT %I, COUNT(*) AS freq FROM %I.%I GROUP BY %I ORDER BY freq DESC LIMIT 10', r, r, r, p_schema, p_table, r ); END LOOP; END IF; -- 📊 4. Estadísticas descriptivas de columnas numéricas IF col_num IS NOT NULL THEN FOREACH r IN ARRAY col_num LOOP EXECUTE format( 'DROP TABLE IF EXISTS tmp_stats_%I; CREATE TEMP TABLE tmp_stats_%I AS SELECT MIN(%I) AS minimo, MAX(%I) AS maximo, AVG(%I) AS media, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY %I) AS mediana FROM %I.%I', r, r, r, r, r, r, r, p_schema, p_table ); END LOOP; END IF; -- 📊 5. Distribución temporal por columnas de fecha IF col_date IS NOT NULL THEN FOREACH r IN ARRAY col_date LOOP EXECUTE format( 'DROP TABLE IF EXISTS tmp_time_%I; CREATE TEMP TABLE tmp_time_%I AS SELECT date_trunc(''day'', %I) AS dia, COUNT(*) FROM %I.%I GROUP BY dia ORDER BY dia', r, r, r, p_schema, p_table ); END LOOP; END IF; -- 📊 6. Correlaciones numéricas IF array_length(col_num,1) >= 2 THEN EXECUTE 'DROP TABLE IF EXISTS tmp_corr; CREATE TEMP TABLE tmp_corr (col_x TEXT, col_y TEXT, correlacion NUMERIC)'; FOR i IN 1..array_length(col_num,1)-1 LOOP FOR j IN i+1..array_length(col_num,1) LOOP EXECUTE format( 'INSERT INTO tmp_corr SELECT %L, %L, corr(%I,%I) FROM %I.%I', col_num[i], col_num[j], col_num[i], col_num[j], p_schema, p_table ); END LOOP; END LOOP; END IF; RAISE NOTICE '✅ Tablas temporales creadas. Revísalas en el navegador de DBeaver (prefijo tmp_)'; END; LANGUAGE plpgsql;