From dd433d5ab21b91607c9d653f864efb45495b5f9f Mon Sep 17 00:00:00 2001 From: Neo And Date: Sat, 23 Aug 2025 01:09:18 -0600 Subject: [PATCH] =?UTF-8?q?A=C3=B1adir=20kit=5Feda?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- kit_eda | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 kit_eda diff --git a/kit_eda b/kit_eda new file mode 100644 index 0000000..4801ba7 --- /dev/null +++ b/kit_eda @@ -0,0 +1,140 @@ +/* ========================================================== + KIT EDA POSTGRESQL – 15 QUERIES CLAVE + Autor: NeoAnd + + 📌 Instrucciones rápidas: + +-- Ejecuta: +SELECT run_eda_auto_export('public','clientes'); + +-- Luego en DBeaver: +-- Busca en el navegador las tablas temporales que empiezan con "tmp_" +-- Ej: tmp_total_rows, tmp_nulls, tmp_top10_micolumna, tmp_stats_micolumna, tmp_time_midate, tmp_corr + +Ejecuta por bloque (Cmd+Enter en Mac, Ctrl+Enter en Windows). + ========================================================== */ + + +/* ========================================================== + Función EDA Autoadaptativa con Export + Autor: Anderson + Copilot + Uso: SELECT run_eda_auto_export('schema','tabla'); + ========================================================== */ + +CREATE OR REPLACE FUNCTION run_eda_auto_export(p_schema TEXT, p_table TEXT) +RETURNS void AS + + +DECLARE + r RECORD; + col_num TEXT[]; + col_cat TEXT[]; + col_date TEXT[]; + sql_cmd TEXT; +BEGIN + -- 🔍 Clasificación automática de columnas + SELECT array_agg(column_name) FILTER (WHERE data_type LIKE '%int%' OR data_type LIKE '%numeric%' OR data_type LIKE '%double%') + INTO col_num + FROM information_schema.columns + WHERE table_schema = p_schema AND table_name = p_table; + + SELECT array_agg(column_name) FILTER (WHERE data_type IN ('character varying','text','uuid')) + INTO col_cat + FROM information_schema.columns + WHERE table_schema = p_schema AND table_name = p_table; + + SELECT array_agg(column_name) FILTER (WHERE data_type LIKE '%date%' OR data_type LIKE '%timestamp%') + INTO col_date + FROM information_schema.columns + WHERE table_schema = p_schema AND table_name = p_table; + + -- 📊 1. Conteo total + EXECUTE format('DROP TABLE IF EXISTS tmp_total_rows; + CREATE TEMP TABLE tmp_total_rows AS + SELECT COUNT(*) AS total FROM %I.%I', + p_schema, p_table); + + -- 📊 2. Nulos por columna + sql_cmd := ''; + FOR r IN + SELECT column_name + FROM information_schema.columns + WHERE table_schema = p_schema AND table_name = p_table + LOOP + sql_cmd := sql_cmd || format( + 'SELECT %L AS columna, COUNT(*) AS nulls FROM %I.%I WHERE %I IS NULL UNION ALL ', + r.column_name, p_schema, p_table, r.column_name + ); + END LOOP; + sql_cmd := left(sql_cmd, length(sql_cmd)-11); -- quitar último UNION ALL + EXECUTE format('DROP TABLE IF EXISTS tmp_nulls; + CREATE TEMP TABLE tmp_nulls AS %s', sql_cmd); + + -- 📊 3. Top 10 valores más frecuentes por columna categórica + IF col_cat IS NOT NULL THEN + FOREACH r IN ARRAY col_cat LOOP + EXECUTE format( + 'DROP TABLE IF EXISTS tmp_top10_%I; + CREATE TEMP TABLE tmp_top10_%I AS + SELECT %I, COUNT(*) AS freq + FROM %I.%I + GROUP BY %I + ORDER BY freq DESC + LIMIT 10', + r, r, r, p_schema, p_table, r + ); + END LOOP; + END IF; + + -- 📊 4. Estadísticas descriptivas de columnas numéricas + IF col_num IS NOT NULL THEN + FOREACH r IN ARRAY col_num LOOP + EXECUTE format( + 'DROP TABLE IF EXISTS tmp_stats_%I; + CREATE TEMP TABLE tmp_stats_%I AS + SELECT MIN(%I) AS minimo, + MAX(%I) AS maximo, + AVG(%I) AS media, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY %I) AS mediana + FROM %I.%I', + r, r, r, r, r, r, r, p_schema, p_table + ); + END LOOP; + END IF; + + -- 📊 5. Distribución temporal por columnas de fecha + IF col_date IS NOT NULL THEN + FOREACH r IN ARRAY col_date LOOP + EXECUTE format( + 'DROP TABLE IF EXISTS tmp_time_%I; + CREATE TEMP TABLE tmp_time_%I AS + SELECT date_trunc(''day'', %I) AS dia, COUNT(*) + FROM %I.%I + GROUP BY dia + ORDER BY dia', + r, r, r, p_schema, p_table + ); + END LOOP; + END IF; + + -- 📊 6. Correlaciones numéricas + IF array_length(col_num,1) >= 2 THEN + EXECUTE 'DROP TABLE IF EXISTS tmp_corr; + CREATE TEMP TABLE tmp_corr (col_x TEXT, col_y TEXT, correlacion NUMERIC)'; + FOR i IN 1..array_length(col_num,1)-1 LOOP + FOR j IN i+1..array_length(col_num,1) LOOP + EXECUTE format( + 'INSERT INTO tmp_corr + SELECT %L, %L, corr(%I,%I) + FROM %I.%I', + col_num[i], col_num[j], col_num[i], col_num[j], p_schema, p_table + ); + END LOOP; + END LOOP; + END IF; + + RAISE NOTICE '✅ Tablas temporales creadas. Revísalas en el navegador de DBeaver (prefijo tmp_)'; +END; + + +LANGUAGE plpgsql; \ No newline at end of file