import json import unicodedata def normalize(text): if not text: return "" text = text.upper() # Eliminar acentos text = ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn') # Limpieza de términos comunes for word in ['MUNICIPALIDAD DE ', 'CIUDAD ', 'VILLA ', 'SAN ', 'SANTA ', 'DOCTOR ', 'DR. ']: text = text.replace(word, '') return text.strip() def levenshtein(s1, s2): if len(s1) < len(s2): return levenshtein(s2, s1) if len(s2) == 0: return len(s1) previous_row = range(len(s2) + 1) for i, c1 in enumerate(s1): current_row = [i + 1] for j, c2 in enumerate(s2): insertions = previous_row[j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (c1 != c2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1] ENTIDADES_FILE = "/yvyape/proyectos/sigem-gis/sigem_entidades.txt" JSON_FILE = "/yvyape/proyectos/sigem-gis/snc_ly_dist.json" OUTPUT_FILE = "/yvyape/proyectos/sigem-gis/reconstruccion_maestra_268.sql" # Cargar entidades SIGEM entidades = {} with open(ENTIDADES_FILE, 'r', encoding='utf-8') as f: for line in f: parts = line.strip().split('|') if len(parts) >= 2: raw_name = parts[1] entidades[normalize(raw_name)] = parts[0] # Procesar JSON del SNC with open(JSON_FILE, 'r', encoding='utf-8') as f: data = json.load(f) sql_lines = [] for feature in data['features']: props = feature['properties'] dpto = props.get('cod_dpto') dist = props.get('cod_dist') nombre = normalize(props.get('nom_dist', '')) # Intento 1: Match Exacto Normalizado match_id = entidades.get(nombre) # Intento 2: Fuzzy Match (Levenshtein) if not match_id: best_score = 999 for sigem_name, sigem_id in entidades.items(): dist_val = levenshtein(nombre, sigem_name) if dist_val < 3 and dist_val < best_score: best_score = dist_val match_id = sigem_id if not match_id: match_id = f"99{dpto}{dist}" sql_lines.append(f"('{match_id}', '{dpto}', {dist})") # Escribir SQL with open(OUTPUT_FILE, 'w', encoding='utf-8') as f: f.write("DELETE FROM public.snc_catalog_mapping;\n") f.write("INSERT INTO public.snc_catalog_mapping (entidad_id, dpto_snc, dist_snc) VALUES \n") f.write(",\n".join(sql_lines)) f.write(";\n") print(f"Reconstrucción finalizada: {len(sql_lines)} registros con lógica Fuzzy Match.")