Selaa lähdekoodia

Replace fuzzy search with plain string search

Fuzzy search was crashing with UTF-8 strings and simple string search works well
enough for now.
Matthias Vogelgesang 7 vuotta sitten
vanhempi
commit
e855fa61e2
4 muutettua tiedostoa jossa 13 lisäystä ja 600 poistoa
  1. 0 523
      src/fuzzy.c
  2. 0 53
      src/fuzzy.h
  3. 13 23
      src/iridium-note.c
  4. 0 1
      src/meson.build

+ 0 - 523
src/fuzzy.c

@@ -1,523 +0,0 @@
-/* fuzzy.c
- *
- * Copyright (C) 2013 Christian Hergert <christian@hergert.me>
- *
- * This file is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- * 
- * This file is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- * License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
-#include <ctype.h>
-#include <string.h>
-
-#include "fuzzy.h"
-
-
-#ifndef FUZZY_GROW_HEAP_BY
-#define FUZZY_GROW_HEAP_BY 4096
-#endif
-
-
-/**
- * SECTION:fuzzy
- * @title: Fuzzy Matching
- * @short_description: Fuzzy matching for GLib based programs.
- *
- * TODO:
- *
- * It is a programming error to modify #Fuzzy while holding onto an array
- * of #FuzzyMatch elements. The position of strings within the FuzzyMatch
- * may no longer be valid.
- */
-
-
-typedef struct _FuzzyItem   FuzzyItem;
-typedef struct _FuzzyLookup FuzzyLookup;
-
-
-struct _Fuzzy
-{
-   gchar          *heap;
-   gsize           heap_length;
-   gsize           heap_offset;
-   GArray         *id_to_text_offset;
-   GPtrArray      *id_to_value;
-   GPtrArray      *char_tables;
-   gboolean        in_bulk_insert;
-   gboolean        case_sensitive;
-};
-
-
-struct _FuzzyItem
-{
-   guint id  : 20;
-   guint pos : 12;
-};
-
-
-G_STATIC_ASSERT(sizeof(FuzzyItem) == 4);
-
-
-struct _FuzzyLookup
-{
-   Fuzzy        *fuzzy;
-   GArray      **tables;
-   gint         *state;
-   guint         n_tables;
-   gsize         max_matches;
-   const gchar  *needle;
-   GHashTable   *matches;
-};
-
-
-static gint
-fuzzy_item_compare (gconstpointer a,
-                    gconstpointer b)
-{
-   gint ret;
-
-   const FuzzyItem *fa = a;
-   const FuzzyItem *fb = b;
-
-   if ((ret = fa->id - fb->id) == 0) {
-      ret = fa->pos - fb->pos;
-   }
-
-   return ret;
-}
-
-
-static gint
-fuzzy_match_compare (gconstpointer a,
-                     gconstpointer b)
-{
-   const FuzzyMatch *ma = a;
-   const FuzzyMatch *mb = b;
-
-   if (ma->score < mb->score) {
-      return 1;
-   } else if (ma->score > mb->score) {
-      return -1;
-   }
-
-   return g_strcmp0(ma->key, mb->key);
-}
-
-
-/**
- * fuzzy_new:
- * @case_sensitive: %TRUE if case should be preserved.
- *
- * Create a new #Fuzzy for fuzzy matching strings.
- *
- * Returns: A newly allocated #Fuzzy that should be freed with fuzzy_free().
- */
-Fuzzy *
-fuzzy_new (gboolean case_sensitive)
-{
-   GArray *table;
-   Fuzzy *fuzzy;
-   gint i;
-
-   fuzzy = g_new0(Fuzzy, 1);
-   fuzzy->heap_length = FUZZY_GROW_HEAP_BY;
-   fuzzy->heap = g_malloc(fuzzy->heap_length);
-   fuzzy->heap_offset = 0;
-   fuzzy->id_to_value = g_ptr_array_new();
-   fuzzy->id_to_text_offset = g_array_new(FALSE, FALSE, sizeof(gsize));
-   fuzzy->char_tables = g_ptr_array_new();
-   fuzzy->case_sensitive = case_sensitive;
-   g_ptr_array_set_free_func(fuzzy->char_tables,
-                             (GDestroyNotify)g_array_unref);
-
-   for (i = 0; i < 256; i++) {
-      table = g_array_new(FALSE, FALSE, sizeof(FuzzyItem));
-      g_ptr_array_add(fuzzy->char_tables, table);
-   }
-
-   return fuzzy;
-}
-
-
-Fuzzy *
-fuzzy_new_with_free_func (gboolean       case_sensitive,
-                          GDestroyNotify free_func)
-{
-   Fuzzy *fuzzy;
-
-   fuzzy = fuzzy_new(case_sensitive);
-   fuzzy_set_free_func(fuzzy, free_func);
-
-   return fuzzy;
-}
-
-
-void
-fuzzy_set_free_func (Fuzzy          *fuzzy,
-                     GDestroyNotify  free_func)
-{
-   g_return_if_fail(fuzzy);
-
-   g_ptr_array_set_free_func(fuzzy->id_to_value, free_func);
-}
-
-
-static gsize
-fuzzy_heap_insert (Fuzzy       *fuzzy,
-                   const gchar *text)
-{
-   gsize offset;
-   gsize req_bytes;
-   gsize len;
-
-   g_assert(fuzzy);
-   g_assert(text);
-
-   len = strlen(text) + 1;
-   req_bytes = fuzzy->heap_offset + len;
-
-   if (req_bytes > fuzzy->heap_length) {
-      fuzzy->heap_length = (((req_bytes / FUZZY_GROW_HEAP_BY) + 1) *
-                            FUZZY_GROW_HEAP_BY);
-      fuzzy->heap = g_realloc(fuzzy->heap, fuzzy->heap_length);
-   }
-
-   offset = fuzzy->heap_offset;
-   memcpy(fuzzy->heap + offset, text, len);
-   fuzzy->heap_offset += len;
-
-   return offset;
-}
-
-
-/**
- * fuzzy_begin_bulk_insert:
- * @fuzzy: (in): A #Fuzzy.
- *
- * Start a bulk insertion. @fuzzy is not ready for searching until
- * fuzzy_end_bulk_insert() has been called.
- *
- * This allows for inserting large numbers of strings and deferring
- * the final sort until fuzzy_end_bulk_insert().
- */
-void
-fuzzy_begin_bulk_insert (Fuzzy *fuzzy)
-{
-   g_return_if_fail(fuzzy);
-   g_return_if_fail(!fuzzy->in_bulk_insert);
-
-   fuzzy->in_bulk_insert = TRUE;
-}
-
-
-/**
- * fuzzy_end_bulk_insert:
- * @fuzzy: (in): A #Fuzzy.
- *
- * Complete a bulk insert and resort the index.
- */
-void
-fuzzy_end_bulk_insert (Fuzzy *fuzzy)
-{
-   GArray *table;
-   gint i;
-
-   g_return_if_fail(fuzzy);
-   g_return_if_fail(fuzzy->in_bulk_insert);
-
-   fuzzy->in_bulk_insert = FALSE;
-
-   for (i = 0; i < fuzzy->char_tables->len; i++) {
-      table = g_ptr_array_index(fuzzy->char_tables, i);
-      g_array_sort(table, fuzzy_item_compare);
-   }
-}
-
-
-/**
- * fuzzy_insert:
- * @fuzzy: (in): A #Fuzzy.
- * @key: (in): An ASCII string.
- * @value: (in): A value to associate with key.
- *
- * Inserts a string into the fuzzy matcher.
- *
- * Note that @key MUST be an ascii string. UTF-8 is not supported.
- */
-void
-fuzzy_insert (Fuzzy       *fuzzy,
-              const gchar *key,
-              gpointer     value)
-{
-   FuzzyItem item;
-   GArray *table;
-   gchar *downcase;
-   gsize offset;
-   guint idx;
-   gint id;
-   gint i;
-
-   g_return_if_fail(fuzzy);
-   g_return_if_fail(key);
-   g_return_if_fail(fuzzy->id_to_text_offset->len < ((1 << 20) - 1));
-
-   if (!*key) {
-      return;
-   }
-
-   if (!fuzzy->case_sensitive) {
-      downcase = g_ascii_strdown(key, -1);
-   }
-
-   /*
-    * Insert the string into our heap.
-    * Track the offset within the heap since the heap could realloc.
-    */
-   offset = fuzzy_heap_insert(fuzzy, key);
-   g_array_append_val(fuzzy->id_to_text_offset, offset);
-   g_ptr_array_add(fuzzy->id_to_value, value);
-   g_assert_cmpint(fuzzy->id_to_value->len, ==, fuzzy->id_to_text_offset->len);
-
-   id = fuzzy->id_to_text_offset->len - 1;
-
-   if (!fuzzy->case_sensitive) {
-      key = downcase;
-   }
-
-   for (i = 0; key[i]; i++) {
-      idx = key[i];
-      table = g_ptr_array_index(fuzzy->char_tables, idx);
-
-      item.id = id;
-      item.pos = i;
-      g_array_append_val(table, item);
-
-      if (!fuzzy->in_bulk_insert) {
-         g_array_sort(table, fuzzy_item_compare);
-      }
-   }
-
-   if (!fuzzy->case_sensitive) {
-      g_free(downcase);
-   }
-}
-
-
-/**
- * fuzzy_free:
- * @fuzzy: (allow-none): A #Fuzzy.
- *
- * Frees resources associated with #Fuzzy. @fuzzy must not be used
- * after calling this.
- */
-void
-fuzzy_free (Fuzzy *fuzzy)
-{
-   if (fuzzy) {
-      g_free(fuzzy->heap);
-      fuzzy->heap = 0;
-      fuzzy->heap_offset = 0;
-      fuzzy->heap_length = 0;
-
-      g_array_unref(fuzzy->id_to_text_offset);
-      fuzzy->id_to_text_offset = NULL;
-
-      g_ptr_array_unref(fuzzy->id_to_value);
-      fuzzy->id_to_value = NULL;
-
-      g_ptr_array_unref(fuzzy->char_tables);
-      fuzzy->char_tables = NULL;
-
-      g_free(fuzzy);
-   }
-}
-
-
-static gboolean
-fuzzy_do_match (FuzzyLookup *lookup,
-                FuzzyItem   *item,
-                gint         table_index,
-                gint         score)
-{
-   FuzzyItem *iter;
-   gpointer key;
-   GArray *table;
-   gint *state;
-   gint iter_score;
-
-   g_assert(lookup);
-   g_assert(item);
-   g_assert(table_index);
-
-   table = lookup->tables[table_index];
-   state = &lookup->state[table_index];
-
-   for (; state[0] < table->len; state[0]++) {
-      iter = &g_array_index(table, FuzzyItem, state[0]);
-
-      if ((iter->id < item->id) ||
-          ((iter->id == item->id) && (iter->pos <= item->pos))) {
-         continue;
-      } else if (iter->id > item->id) {
-         break;
-      }
-
-      iter_score = score + (iter->pos - item->pos);
-
-      if ((table_index + 1) < lookup->n_tables) {
-         if (fuzzy_do_match(lookup, iter, table_index + 1, iter_score)) {
-            return TRUE;
-         }
-         continue;
-      }
-
-      key = GINT_TO_POINTER(iter->id);
-
-      if (!g_hash_table_contains(lookup->matches, key) ||
-          (iter_score < GPOINTER_TO_INT(g_hash_table_lookup(lookup->matches, key)))) {
-         g_hash_table_insert(lookup->matches, key, GINT_TO_POINTER(iter_score));
-      }
-
-      return TRUE;
-   }
-
-   return FALSE;
-}
-
-
-static const gchar *
-fuzzy_get_string (Fuzzy *fuzzy,
-                  gint   id)
-{
-   gsize offset;
-
-   g_assert(fuzzy);
-   g_assert(id >= 0);
-
-   offset = g_array_index(fuzzy->id_to_text_offset, gsize, id);
-   return fuzzy->heap + offset;
-}
-
-
-/**
- * fuzzy_match:
- * @fuzzy: (in): A #Fuzzy.
- * @needle: (in): The needle to fuzzy search for.
- * @max_matches: (in): The max number of matches to return.
- *
- * Fuzzy searches within @fuzzy for strings that fuzzy match @needle.
- * Only up to @max_matches will be returned.
- *
- * @needle MUST be an ascii string.
- *
- * TODO: max_matches is not yet respected.
- *
- * Returns: (transfer full) (element-type FuzzyMatch): A newly allocated
- *   #GArray containing #FuzzyMatch elements. This should be freed when
- *   the caller is done with it using g_array_unref().
- *   It is a programming error to keep the structure around longer than
- *   the @fuzzy instance.
- */
-GArray *
-fuzzy_match (Fuzzy       *fuzzy,
-             const gchar *needle,
-             gsize        max_matches)
-{
-   FuzzyLookup lookup = { 0 };
-   FuzzyMatch match;
-   FuzzyItem *item;
-   GArray *matches = NULL;
-   GArray *root;
-   gchar *downcase = NULL;
-   gint i;
-
-   g_return_val_if_fail(fuzzy, NULL);
-   g_return_val_if_fail(!fuzzy->in_bulk_insert, NULL);
-   g_return_val_if_fail(needle, NULL);
-
-   matches = g_array_new(FALSE, FALSE, sizeof(FuzzyMatch));
-
-   if (!*needle) {
-      return matches;
-   }
-
-   if (!fuzzy->case_sensitive) {
-      downcase = g_ascii_strdown(needle, -1);
-      needle = downcase;
-   }
-
-   lookup.fuzzy = fuzzy;
-   lookup.n_tables = strlen(needle);
-   lookup.state = g_new0(gint, lookup.n_tables);
-   lookup.tables = g_new0(GArray*, lookup.n_tables);
-   lookup.needle = needle;
-   lookup.max_matches = max_matches;
-   lookup.matches = g_hash_table_new(NULL, NULL);
-
-   for (i = 0; needle[i]; i++) {
-      lookup.tables[i] = g_ptr_array_index(fuzzy->char_tables,
-                                           (guint)needle[i]);
-   }
-
-   root = g_ptr_array_index(fuzzy->char_tables, (guint)needle[0]);
-
-   if (G_LIKELY(lookup.n_tables > 1)) {
-      for (i = 0; i < root->len; i++) {
-         item = &g_array_index(root, FuzzyItem, i);
-         fuzzy_do_match(&lookup, item, 1, 0);
-      }
-   } else {
-      for (i = 0; i < root->len; i++) {
-         item = &g_array_index(root, FuzzyItem, i);
-         match.key = fuzzy_get_string(fuzzy, item->id);
-         match.value = g_ptr_array_index(fuzzy->id_to_value, item->id);
-         match.score = 0;
-         g_array_append_val(matches, match);
-      }
-      g_free(downcase);
-      return matches;
-   }
-
-   {
-      GHashTableIter iter;
-      gpointer key;
-      gpointer value;
-
-      g_hash_table_iter_init(&iter, lookup.matches);
-      while (g_hash_table_iter_next(&iter, &key, &value)) {
-         match.key = fuzzy_get_string(fuzzy, GPOINTER_TO_INT(key));
-         match.score = 1.0 / (strlen(match.key) + GPOINTER_TO_INT(value));
-         match.value = g_ptr_array_index(fuzzy->id_to_value,
-                                         GPOINTER_TO_INT(key));
-         g_array_append_val(matches, match);
-      }
-
-      g_array_sort(matches, fuzzy_match_compare);
-
-      /*
-       * TODO: We could be more clever here when inserting into the array
-       *       only if it is a lower score than the end or < max items.
-       */
-
-      if (max_matches && (matches->len > max_matches)) {
-         g_array_set_size(matches, max_matches);
-      }
-   }
-
-   g_free(downcase);
-   g_free(lookup.state);
-   g_free(lookup.tables);
-   g_hash_table_unref(lookup.matches);
-
-   return matches;
-}

+ 0 - 53
src/fuzzy.h

@@ -1,53 +0,0 @@
-/* fuzzy.h
- *
- * Copyright (C) 2013 Christian Hergert <christian@hergert.me>
- *
- * This file is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- * the Free Software Foundation; either version 2.1 of the License, or (at
- * your option) any later version.
- * 
- * This file is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- * License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef FUZZY_H
-#define FUZZY_H
-
-#include <glib.h>
-
-G_BEGIN_DECLS
-
-typedef struct _Fuzzy      Fuzzy;
-typedef struct _FuzzyMatch FuzzyMatch;
-
-struct _FuzzyMatch
-{
-   const gchar *key;
-   gpointer     value;
-   gfloat       score;
-};
-
-Fuzzy     *fuzzy_new                (gboolean        case_sensitive);
-Fuzzy     *fuzzy_new_with_free_func (gboolean        case_sensitive,
-                                     GDestroyNotify  free_func);
-void       fuzzy_set_free_func      (Fuzzy          *fuzzy,
-                                     GDestroyNotify  free_func);
-void       fuzzy_begin_bulk_insert  (Fuzzy          *fuzzy);
-void       fuzzy_end_bulk_insert    (Fuzzy          *fuzzy);
-void       fuzzy_insert             (Fuzzy          *fuzzy,
-                                     const gchar    *key,
-                                     gpointer        value);
-GArray    *fuzzy_match              (Fuzzy          *fuzzy,
-                                     const gchar    *needle,
-                                     gsize           max_matches);
-void       fuzzy_free               (Fuzzy          *fuzzy);
-
-G_END_DECLS
-
-#endif /* FUZZY_H */

+ 13 - 23
src/iridium-note.c

@@ -18,7 +18,6 @@
 
 #include "iridium-config.h"
 #include "iridium-note.h"
-#include "fuzzy.h"
 
 #define IRIDIUM_NOTE_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), IRIDIUM_TYPE_NOTE, IridiumNotePrivate))
 
@@ -102,35 +101,26 @@ gboolean
 iridium_note_matches_fuzzy (IridiumNote *note,
                             const gchar *needle)
 {
-  Fuzzy *fuzzy;
-  GArray *matches;
   IridiumNotePrivate *priv;
+  gchar *l_content;
+  gchar *l_title;
+  gchar *l_needle;
   gboolean result = FALSE;
 
   priv = IRIDIUM_NOTE_GET_PRIVATE (note);
-  fuzzy = fuzzy_new (FALSE);
 
-  if (priv->content)
-    fuzzy_insert (fuzzy, priv->content, priv->content);
+  if (priv->content == NULL || priv->title == NULL)
+    return FALSE;
 
-  if (priv->title)
-    fuzzy_insert (fuzzy, priv->title, priv->title);
+  l_content = g_utf8_strdown (priv->content, -1);
+  l_title = g_utf8_strdown (priv->title, -1);
+  l_needle = g_utf8_strdown (needle, -1);
+  result = g_strstr_len (l_content, -1, l_needle) != NULL ||
+           g_strstr_len (l_title, -1, l_needle) != NULL;
 
-  matches = fuzzy_match (fuzzy, needle, 0);
-
-  for (guint i = 0; i < matches->len; i++) {
-    FuzzyMatch *match;
-
-    match = &g_array_index (matches, FuzzyMatch, i);
-
-    if (match->score > 0.1) {
-      result = TRUE;
-      break;
-    }
-  }
-
-  g_array_unref (matches);
-  fuzzy_free (fuzzy);
+  g_free (l_content);
+  g_free (l_title);
+  g_free (l_needle);
   return result;
 }
 

+ 0 - 1
src/meson.build

@@ -2,7 +2,6 @@ cc = meson.get_compiler('c')
 
 iridium_sources = [
   'main.c',
-  'fuzzy.c',
   'iridium-note.c',
   'iridium-note-row.c',
   'iridium-tag.c',