primera parte del insertar

[z.facultad/75.06/emufs.git] / emufs / indice_b.c
diff --git a/emufs/indice_b.c b/emufs/indice_b.c

index 30d0658222b34fbfeac53d353d9af6d2936e0766..9a5118dabc05a223b8d952cf6b38b8e4ddd83b2b 100644 (file)
--- a/emufs/indice_b.c
+++ b/emufs/indice_b.c
@@ -56,6 +56,9 @@ static void b_fundir_nodo(char *, int, char *, int, char *, int, int);
                         
  static EMUFS_REG_ID b_insertar_dup_en_pos(INDICE *idx, INDICE_DATO pos, INDICE_DATO nuevo);
  
+static void abreviar_claves(INDICE *idx, B_NodoEntry *array, B_NodoHeader *header);
+static void desabreviar_claves(INDICE *idx, B_NodoEntry *array, B_NodoHeader *header);
+
  void emufs_indice_b_crear(INDICE *idx)
  {
         FILE *fp;
@@ -68,8 +71,6 @@ void emufs_indice_b_crear(INDICE *idx)
         header.hijo_izquierdo = -1;
  
         fp = fopen(idx->filename, "w");
-       PERR("Creando indice");
-       fprintf(stderr, "Archivo = (%s)\n", idx->filename);
         if (fp == NULL) {
                 PERR("Error al crear el archivo");
                 return;
@@ -117,7 +118,6 @@ int emufs_indice_b_insertar(INDICE *idx, CLAVE clave, INDICE_DATO dato)
                 
                         if (idx->tipo_dato == IDX_STRING) {
                                 /* Tengo que sacar el texto repetido del archivo de textos */
-                               PERR("Eliminando string duplicado");
                                 idx->emu_string->borrar_registro(idx->emu_string, clave);
                         }
                         return 1;
@@ -142,7 +142,6 @@ int emufs_indice_b_insertar(INDICE *idx, CLAVE clave, INDICE_DATO dato)
                  */
                 dummy.id = -1;
                 dato.id = b_insertar_dup_en_pos(idx, dummy, dato);
-               fprintf(stderr, "Agrege un coso duplicado por primera vez en id=%d\n", dato.id);
         }
  
         b_insertar_en_nodo(idx, clave, dato, nodo_id, nodo, -1, -1);
@@ -156,9 +155,11 @@ INDICE_DATO emufs_indice_b_buscar(INDICE *idx, CLAVE clave)
         B_NodoHeader header;
         B_NodoEntry *claves;
         char *nodo, *tmp;
+       int nodo_id;
         
         /* Leo la raiz */
         nodo = b_leer_nodo(idx, 0);
+       nodo_id = 0;
         while (nodo) {
                 b_leer_header(nodo, &header);
                 claves = b_leer_claves(nodo, &header);
@@ -166,15 +167,18 @@ INDICE_DATO emufs_indice_b_buscar(INDICE *idx, CLAVE clave)
                 while ((i<header.cant) && (emufs_indice_es_menor(idx, claves[i].clave, clave))) i++;
                 if ((i<header.cant) && (emufs_indice_es_igual(idx, claves[i].clave, clave))) {
                                 ret = claves[i].dato;
+                               b_grabar_nodo(idx, nodo_id, nodo);
                                 free(nodo);
-                               PERR("CLAVE ENCONTRADA");
                                 return ret;
                 } else {
                         tmp = nodo;
+                       b_grabar_nodo(idx, nodo_id, nodo);
                         if (i == 0) {
                                 nodo = b_leer_nodo(idx, header.hijo_izquierdo);
+                               nodo_id = header.hijo_izquierdo;
                         } else {
                                 nodo = b_leer_nodo(idx, claves[i-1].hijo_derecho);
+                               nodo_id = claves[i-1].hijo_derecho;
                         }
                         free(tmp);
                 }
@@ -196,7 +200,6 @@ int emufs_indice_b_borrar(INDICE *idx, CLAVE k)
  
         nodo_id = 0; /* Tomo la raiz */
         nodo = b_leer_nodo(idx, nodo_id);
-       PERR("Buscando clave a borrar");
         while (nodo && !encontrado) {
                 /* Obtengo los datos del nodo */
                 b_leer_header(nodo, &header);
@@ -266,6 +269,8 @@ static char *b_leer_nodo(INDICE *idx, int id)
  {
         FILE *fp;
         char *out;
+       B_NodoHeader header;
+       B_NodoEntry *claves;
  
         if (id < 0) return NULL;
  
@@ -287,6 +292,12 @@ static char *b_leer_nodo(INDICE *idx, int id)
                 return NULL;
         }
  
+       /* Si estoy manejando string tengo que sacar las abreviaturas */
+       if (idx->tipo_dato == IDX_STRING) {
+               b_leer_header(out, &header);
+               claves = b_leer_claves(out, &header);
+               desabreviar_claves(idx, claves, &header);
+       }
         fclose(fp);
         return out;
  }
@@ -294,24 +305,15 @@ static char *b_leer_nodo(INDICE *idx, int id)
  static void b_grabar_nodo(INDICE *idx, int id, char *data)
  {
         FILE *fp;
+       B_NodoHeader header;
+       B_NodoEntry *claves;
  
-/*     if (id > b_ultimo_id()) {
-               printf("AGREGANDO AL FINAL\n");
-               fp = fopen(FILENAME, "a");
-               if (fp == NULL) {
-               _("No se pudo abrir archivo\n");
-                       return;
-               }
-       } else {
-               fp = fopen(FILENAME, "w");
-               if (fp == NULL) {
-               _("No se pudo abrir archivo\n");
-                       return;
-               }
-               fseek(fp, id*BLOCK_SIZE, SEEK_SET);
-               printf("SOLO GUARDO DATA\n");
-       }*/
-
+       /* Si las claves son de tipo string debo abreviar antes de guardar */
+       if (idx->tipo_dato == IDX_STRING) {
+               b_leer_header(data, &header);
+               claves = b_leer_claves(data, &header);
+               abreviar_claves(idx, claves, &header);
+       }
         fp = fopen(idx->filename, "r+");
         fseek(fp, id*idx->tam_bloque, SEEK_SET);
         fwrite(data, 1, idx->tam_bloque, fp);
@@ -368,15 +370,25 @@ static void b_insertar_en_nodo(INDICE *idx, CLAVE clave, INDICE_DATO dato, int n
                         i=0;
                         /* Creo una lista ordenada de los nodos a partir */
                         tmp_claves = (B_NodoEntry *)malloc(sizeof(B_NodoEntry)*(nodo_header.cant+1));
-                       total = nodo_header.cant;
+                       total = nodo_header.cant+1;
                         while ((i<nodo_header.cant) && (emufs_indice_es_menor(idx, claves[i].clave, clave))) {
                                 tmp_claves[i] = claves[i];
                                 i++;
                         }
                         tmp_claves[i].clave = clave;
                         tmp_claves[i].dato = dato;
-                       tmp_claves[i].hijo_derecho = hijo1;
-                       tmp_claves[i+1].hijo_derecho = hijo2;
+                       /*tmp_claves[i].hijo_derecho = hijo1;*/
+                       if (i==0) {
+                               nodo_header.hijo_izquierdo = hijo1;
+                               tmp_claves[i].hijo_derecho = hijo2;
+                       } else {
+                               tmp_claves[i-1].hijo_derecho = hijo1;
+                               tmp_claves[i].hijo_derecho = hijo2;
+                       }
+/*                     if (i == 0)
+                               nodo_header.hijo_izquierdo = hijo2;
+                       else
+                               tmp_claves[i+1].hijo_derecho = hijo2;*/
                         while (i < nodo_header.cant) {
                                 tmp_claves[i+1] = claves[i];
                                 i++;
@@ -386,8 +398,8 @@ static void b_insertar_en_nodo(INDICE *idx, CLAVE clave, INDICE_DATO dato, int n
                         b_leer_header(nuevo, &nuevo_header);
  
                         nuevo_header.nivel = nodo_header.nivel;
-                       nodo_header.cant = total/2;
-                       nuevo_header.cant = total - nodo_header.cant;
+                       nodo_header.cant = total/2-1;
+                       nuevo_header.cant = (total-1) - nodo_header.cant;
  
                         memset(claves, '*', idx->tam_bloque-sizeof(B_NodoHeader));
                         for(j=0; j<nodo_header.cant; j++)
@@ -396,7 +408,7 @@ static void b_insertar_en_nodo(INDICE *idx, CLAVE clave, INDICE_DATO dato, int n
                         claves_nuevo = b_leer_claves(nuevo, &nuevo_header);
                         memset(claves_nuevo, '*', idx->tam_bloque-sizeof(B_NodoHeader));
                         for(j=0; j<nuevo_header.cant; j++)
-                               claves_nuevo[j] = tmp_claves[j+total/2+1];
+                               claves_nuevo[j] = tmp_claves[j+total/2];
  
                         b_actualizar_header(nodo, &nodo_header);
                         b_actualizar_header(nuevo, &nuevo_header);
@@ -477,8 +489,14 @@ void b_insertar_en_nodo_con_lugar(INDICE *idx, CLAVE clave, INDICE_DATO dato, in
         nodo_header.cant++;
         claves[i].clave = clave;
         claves[i].dato = dato;
-       claves[i].hijo_derecho = hijo2;
-       nodo_header.hijo_izquierdo = b_elegir_izquierdo(idx, nodo_header.hijo_izquierdo, hijo1);
+       if (i==0) {
+               nodo_header.hijo_izquierdo = hijo1;
+               claves[i].hijo_derecho = hijo2;
+       } else {
+               claves[i-1].hijo_derecho = hijo1;
+               claves[i].hijo_derecho = hijo2;
+       }
+       /*b_elegir_izquierdo(idx, nodo_header.hijo_izquierdo, hijo1);*/
  
         b_actualizar_header(nodo, &nodo_header);
         b_grabar_nodo(idx, nodo_id, nodo);
@@ -553,7 +571,6 @@ INDICE_DATO *emufs_indice_b_buscar_muchos(INDICE *idx, CLAVE clave, int *cant)
         }
  
         /* Busco la clave en el arbol */
-       PERR("Buscando clave");
         dato = emufs_indice_b_buscar(idx, clave);
  
         if (dato.id == -1) {
@@ -562,7 +579,6 @@ INDICE_DATO *emufs_indice_b_buscar_muchos(INDICE *idx, CLAVE clave, int *cant)
  
         /* Leo el contenido actual */
         k.i_clave = dato.id;
-       fprintf(stderr, "TENGO DATOS EN ID=%d\n", dato.id);
         error = 0;
         leido = (char *)idx->emu_mult->leer_registro(idx->emu_mult, k, &tam, &error);
  
@@ -575,7 +591,6 @@ INDICE_DATO *emufs_indice_b_buscar_muchos(INDICE *idx, CLAVE clave, int *cant)
         ret = malloc(sizeof(INDICE_DATO)*(*cant));
         memcpy(ret, leido+sizeof(int), (*cant)*sizeof(INDICE_DATO));
         free(leido);
-       fprintf(stderr, "TENGO QUE ESTA CLAVE TIENE %d ITEMS\n", *cant);
         return ret;
  }
  
@@ -839,25 +854,105 @@ static EMUFS_REG_ID b_insertar_dup_en_pos(INDICE *idx, INDICE_DATO pos, INDICE_D
         if (k.i_clave == -1) {
                 /* Creo uno nuevo */
                 error = 0;
-               PERR("GRABADO REGISTRO NUEVO");
                 k.i_clave = idx->emu_mult->grabar_registro(idx->emu_mult,
-                                                                       leido,
-                                                                       cant*sizeof(INDICE_DATO)+sizeof(int),
-                                                                       &error
-                                                               );
+                       leido,
+                       cant*sizeof(INDICE_DATO)+sizeof(int),
+                       &error
+               );
+               if (k.i_clave == -1) PERR("ALGO NO GRABO BIEN!!");
         } else {
                 /* Modifico el que ya existia! */
-               PERR("MODIFICANDO REGISTRO EXISTENTE");
                 error = 0;
                 idx->emu_mult->modificar_registro(idx->emu_mult,
-                                                                       k.i_clave,
-                                                                       leido,
-                                                                       cant*sizeof(INDICE_DATO)+sizeof(int),
-                                                                       &error
-                                                               );
+                       k.i_clave,
+                       leido,
+                       cant*sizeof(INDICE_DATO)+sizeof(int),
+                       &error
+               );
         }
         /* Clean up! */
         free(leido);
         return k.i_clave;
  }
  
+char *abreviar(char *primera, char *actual, int *iguales)
+{
+       (*iguales) = 0;
+       while (((*primera) != '\0') && ((*actual) != '\0')) {
+               if ((*primera) == (*actual)) {
+                       primera++;
+                       actual++;
+                       (*iguales)++;
+               } else {
+                       /* No coinciden mas! */
+                       break;
+               }
+       }
+
+       return actual;
+}
+
+static void abreviar_claves(INDICE *idx, B_NodoEntry *array, B_NodoHeader *header)
+{
+       char *primera, *actual, *resto, salvar[100];
+       EMUFS_REG_SIZE size;
+       int error, i;
+       int iguales;
+
+       /* Agarro la primer clave entera como referencia */
+       primera = (char *)idx->emu_string->leer_registro(idx->emu_string, array[0].clave, &size, &error);
+       for(i=1; i<header->cant; i++) {
+               actual = (char *)idx->emu_string->leer_registro(idx->emu_string, array[i].clave, &size, &error);
+               if (*actual == '*') {
+                       free(actual);
+                       continue;
+               }
+               resto = abreviar(primera, actual, &iguales);
+               /* Para que tenga sentido abreviar tengo que tener
+                * mas de 2 letras iguales, si no no gano nada y complica las cosas
+                */
+               if (iguales > 1) {
+                       sprintf(salvar, "%d|%s", iguales, resto);
+                       free(actual);
+                       error = 0;
+                       idx->emu_string->modificar_registro(idx->emu_string, array[i].clave.i_clave, salvar, strlen(salvar)+1, &error);
+               } else {
+                       free(primera);
+                       primera = actual;
+               }
+       }
+       
+       free(primera);
+}
+
+static void desabreviar_claves(INDICE *idx, B_NodoEntry *array, B_NodoHeader *header)
+{
+       char *primera, *actual, *resto, salvar[100];
+       EMUFS_REG_SIZE size;
+       int error, i;
+       int iguales;
+
+       /* Agarro la primer clave entera como referencia */
+       primera = (char *)idx->emu_string->leer_registro(idx->emu_string, array[0].clave, &size, &error);
+       for(i=1; i<header->cant; i++) {
+               actual = (char *)idx->emu_string->leer_registro(idx->emu_string, array[i].clave, &size, &error);
+               if (*actual == '*') {
+                       free(actual);
+                       continue;
+               }
+               iguales = strtol(actual, &resto, 10);
+               if ((iguales > 0) && (*resto == '|')) {
+                       strncpy(salvar, primera, iguales);
+                       salvar[iguales] = '\0';
+                       strcat(salvar, resto+1); /* +1 para saltar el separador */
+                       idx->emu_string->modificar_registro(idx->emu_string, array[i].clave.i_clave, salvar, strlen(salvar)+1, &error);
+                       free(actual);
+               } else {
+                       free(primera);
+                       primera = actual;
+               }
+       }
+       
+       free(primera);
+}
+