Преглед изворни кода

Add search based on Christian Hergert's code

Matthias Vogelgesang пре 7 година
родитељ
комит
edae8064d4
6 измењених фајлова са 650 додато и 4 уклоњено
  1. 523 0
      src/fuzzy.c
  2. 53 0
      src/fuzzy.h
  3. 44 1
      src/iridium-note.c
  4. 4 1
      src/iridium-note.h
  5. 25 2
      src/iridium-window.c
  6. 1 0
      src/meson.build

+ 523 - 0
src/fuzzy.c

@@ -0,0 +1,523 @@
+/* fuzzy.c
+ *
+ * Copyright (C) 2013 Christian Hergert <christian@hergert.me>
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <ctype.h>
+#include <string.h>
+
+#include "fuzzy.h"
+
+
+#ifndef FUZZY_GROW_HEAP_BY
+#define FUZZY_GROW_HEAP_BY 4096
+#endif
+
+
+/**
+ * SECTION:fuzzy
+ * @title: Fuzzy Matching
+ * @short_description: Fuzzy matching for GLib based programs.
+ *
+ * TODO:
+ *
+ * It is a programming error to modify #Fuzzy while holding onto an array
+ * of #FuzzyMatch elements. The position of strings within the FuzzyMatch
+ * may no longer be valid.
+ */
+
+
+typedef struct _FuzzyItem   FuzzyItem;
+typedef struct _FuzzyLookup FuzzyLookup;
+
+
+struct _Fuzzy
+{
+   gchar          *heap;
+   gsize           heap_length;
+   gsize           heap_offset;
+   GArray         *id_to_text_offset;
+   GPtrArray      *id_to_value;
+   GPtrArray      *char_tables;
+   gboolean        in_bulk_insert;
+   gboolean        case_sensitive;
+};
+
+
+struct _FuzzyItem
+{
+   guint id  : 20;
+   guint pos : 12;
+};
+
+
+G_STATIC_ASSERT(sizeof(FuzzyItem) == 4);
+
+
+struct _FuzzyLookup
+{
+   Fuzzy        *fuzzy;
+   GArray      **tables;
+   gint         *state;
+   guint         n_tables;
+   gsize         max_matches;
+   const gchar  *needle;
+   GHashTable   *matches;
+};
+
+
+static gint
+fuzzy_item_compare (gconstpointer a,
+                    gconstpointer b)
+{
+   gint ret;
+
+   const FuzzyItem *fa = a;
+   const FuzzyItem *fb = b;
+
+   if ((ret = fa->id - fb->id) == 0) {
+      ret = fa->pos - fb->pos;
+   }
+
+   return ret;
+}
+
+
+static gint
+fuzzy_match_compare (gconstpointer a,
+                     gconstpointer b)
+{
+   const FuzzyMatch *ma = a;
+   const FuzzyMatch *mb = b;
+
+   if (ma->score < mb->score) {
+      return 1;
+   } else if (ma->score > mb->score) {
+      return -1;
+   }
+
+   return g_strcmp0(ma->key, mb->key);
+}
+
+
+/**
+ * fuzzy_new:
+ * @case_sensitive: %TRUE if case should be preserved.
+ *
+ * Create a new #Fuzzy for fuzzy matching strings.
+ *
+ * Returns: A newly allocated #Fuzzy that should be freed with fuzzy_free().
+ */
+Fuzzy *
+fuzzy_new (gboolean case_sensitive)
+{
+   GArray *table;
+   Fuzzy *fuzzy;
+   gint i;
+
+   fuzzy = g_new0(Fuzzy, 1);
+   fuzzy->heap_length = FUZZY_GROW_HEAP_BY;
+   fuzzy->heap = g_malloc(fuzzy->heap_length);
+   fuzzy->heap_offset = 0;
+   fuzzy->id_to_value = g_ptr_array_new();
+   fuzzy->id_to_text_offset = g_array_new(FALSE, FALSE, sizeof(gsize));
+   fuzzy->char_tables = g_ptr_array_new();
+   fuzzy->case_sensitive = case_sensitive;
+   g_ptr_array_set_free_func(fuzzy->char_tables,
+                             (GDestroyNotify)g_array_unref);
+
+   for (i = 0; i < 256; i++) {
+      table = g_array_new(FALSE, FALSE, sizeof(FuzzyItem));
+      g_ptr_array_add(fuzzy->char_tables, table);
+   }
+
+   return fuzzy;
+}
+
+
+Fuzzy *
+fuzzy_new_with_free_func (gboolean       case_sensitive,
+                          GDestroyNotify free_func)
+{
+   Fuzzy *fuzzy;
+
+   fuzzy = fuzzy_new(case_sensitive);
+   fuzzy_set_free_func(fuzzy, free_func);
+
+   return fuzzy;
+}
+
+
+void
+fuzzy_set_free_func (Fuzzy          *fuzzy,
+                     GDestroyNotify  free_func)
+{
+   g_return_if_fail(fuzzy);
+
+   g_ptr_array_set_free_func(fuzzy->id_to_value, free_func);
+}
+
+
+static gsize
+fuzzy_heap_insert (Fuzzy       *fuzzy,
+                   const gchar *text)
+{
+   gsize offset;
+   gsize req_bytes;
+   gsize len;
+
+   g_assert(fuzzy);
+   g_assert(text);
+
+   len = strlen(text) + 1;
+   req_bytes = fuzzy->heap_offset + len;
+
+   if (req_bytes > fuzzy->heap_length) {
+      fuzzy->heap_length = (((req_bytes / FUZZY_GROW_HEAP_BY) + 1) *
+                            FUZZY_GROW_HEAP_BY);
+      fuzzy->heap = g_realloc(fuzzy->heap, fuzzy->heap_length);
+   }
+
+   offset = fuzzy->heap_offset;
+   memcpy(fuzzy->heap + offset, text, len);
+   fuzzy->heap_offset += len;
+
+   return offset;
+}
+
+
+/**
+ * fuzzy_begin_bulk_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ *
+ * Start a bulk insertion. @fuzzy is not ready for searching until
+ * fuzzy_end_bulk_insert() has been called.
+ *
+ * This allows for inserting large numbers of strings and deferring
+ * the final sort until fuzzy_end_bulk_insert().
+ */
+void
+fuzzy_begin_bulk_insert (Fuzzy *fuzzy)
+{
+   g_return_if_fail(fuzzy);
+   g_return_if_fail(!fuzzy->in_bulk_insert);
+
+   fuzzy->in_bulk_insert = TRUE;
+}
+
+
+/**
+ * fuzzy_end_bulk_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ *
+ * Complete a bulk insert and resort the index.
+ */
+void
+fuzzy_end_bulk_insert (Fuzzy *fuzzy)
+{
+   GArray *table;
+   gint i;
+
+   g_return_if_fail(fuzzy);
+   g_return_if_fail(fuzzy->in_bulk_insert);
+
+   fuzzy->in_bulk_insert = FALSE;
+
+   for (i = 0; i < fuzzy->char_tables->len; i++) {
+      table = g_ptr_array_index(fuzzy->char_tables, i);
+      g_array_sort(table, fuzzy_item_compare);
+   }
+}
+
+
+/**
+ * fuzzy_insert:
+ * @fuzzy: (in): A #Fuzzy.
+ * @key: (in): An ASCII string.
+ * @value: (in): A value to associate with key.
+ *
+ * Inserts a string into the fuzzy matcher.
+ *
+ * Note that @key MUST be an ascii string. UTF-8 is not supported.
+ */
+void
+fuzzy_insert (Fuzzy       *fuzzy,
+              const gchar *key,
+              gpointer     value)
+{
+   FuzzyItem item;
+   GArray *table;
+   gchar *downcase;
+   gsize offset;
+   guint idx;
+   gint id;
+   gint i;
+
+   g_return_if_fail(fuzzy);
+   g_return_if_fail(key);
+   g_return_if_fail(fuzzy->id_to_text_offset->len < ((1 << 20) - 1));
+
+   if (!*key) {
+      return;
+   }
+
+   if (!fuzzy->case_sensitive) {
+      downcase = g_ascii_strdown(key, -1);
+   }
+
+   /*
+    * Insert the string into our heap.
+    * Track the offset within the heap since the heap could realloc.
+    */
+   offset = fuzzy_heap_insert(fuzzy, key);
+   g_array_append_val(fuzzy->id_to_text_offset, offset);
+   g_ptr_array_add(fuzzy->id_to_value, value);
+   g_assert_cmpint(fuzzy->id_to_value->len, ==, fuzzy->id_to_text_offset->len);
+
+   id = fuzzy->id_to_text_offset->len - 1;
+
+   if (!fuzzy->case_sensitive) {
+      key = downcase;
+   }
+
+   for (i = 0; key[i]; i++) {
+      idx = key[i];
+      table = g_ptr_array_index(fuzzy->char_tables, idx);
+
+      item.id = id;
+      item.pos = i;
+      g_array_append_val(table, item);
+
+      if (!fuzzy->in_bulk_insert) {
+         g_array_sort(table, fuzzy_item_compare);
+      }
+   }
+
+   if (!fuzzy->case_sensitive) {
+      g_free(downcase);
+   }
+}
+
+
+/**
+ * fuzzy_free:
+ * @fuzzy: (allow-none): A #Fuzzy.
+ *
+ * Frees resources associated with #Fuzzy. @fuzzy must not be used
+ * after calling this.
+ */
+void
+fuzzy_free (Fuzzy *fuzzy)
+{
+   if (fuzzy) {
+      g_free(fuzzy->heap);
+      fuzzy->heap = 0;
+      fuzzy->heap_offset = 0;
+      fuzzy->heap_length = 0;
+
+      g_array_unref(fuzzy->id_to_text_offset);
+      fuzzy->id_to_text_offset = NULL;
+
+      g_ptr_array_unref(fuzzy->id_to_value);
+      fuzzy->id_to_value = NULL;
+
+      g_ptr_array_unref(fuzzy->char_tables);
+      fuzzy->char_tables = NULL;
+
+      g_free(fuzzy);
+   }
+}
+
+
+static gboolean
+fuzzy_do_match (FuzzyLookup *lookup,
+                FuzzyItem   *item,
+                gint         table_index,
+                gint         score)
+{
+   FuzzyItem *iter;
+   gpointer key;
+   GArray *table;
+   gint *state;
+   gint iter_score;
+
+   g_assert(lookup);
+   g_assert(item);
+   g_assert(table_index);
+
+   table = lookup->tables[table_index];
+   state = &lookup->state[table_index];
+
+   for (; state[0] < table->len; state[0]++) {
+      iter = &g_array_index(table, FuzzyItem, state[0]);
+
+      if ((iter->id < item->id) ||
+          ((iter->id == item->id) && (iter->pos <= item->pos))) {
+         continue;
+      } else if (iter->id > item->id) {
+         break;
+      }
+
+      iter_score = score + (iter->pos - item->pos);
+
+      if ((table_index + 1) < lookup->n_tables) {
+         if (fuzzy_do_match(lookup, iter, table_index + 1, iter_score)) {
+            return TRUE;
+         }
+         continue;
+      }
+
+      key = GINT_TO_POINTER(iter->id);
+
+      if (!g_hash_table_contains(lookup->matches, key) ||
+          (iter_score < GPOINTER_TO_INT(g_hash_table_lookup(lookup->matches, key)))) {
+         g_hash_table_insert(lookup->matches, key, GINT_TO_POINTER(iter_score));
+      }
+
+      return TRUE;
+   }
+
+   return FALSE;
+}
+
+
+static const gchar *
+fuzzy_get_string (Fuzzy *fuzzy,
+                  gint   id)
+{
+   gsize offset;
+
+   g_assert(fuzzy);
+   g_assert(id >= 0);
+
+   offset = g_array_index(fuzzy->id_to_text_offset, gsize, id);
+   return fuzzy->heap + offset;
+}
+
+
+/**
+ * fuzzy_match:
+ * @fuzzy: (in): A #Fuzzy.
+ * @needle: (in): The needle to fuzzy search for.
+ * @max_matches: (in): The max number of matches to return.
+ *
+ * Fuzzy searches within @fuzzy for strings that fuzzy match @needle.
+ * Only up to @max_matches will be returned.
+ *
+ * @needle MUST be an ascii string.
+ *
+ * TODO: max_matches is not yet respected.
+ *
+ * Returns: (transfer full) (element-type FuzzyMatch): A newly allocated
+ *   #GArray containing #FuzzyMatch elements. This should be freed when
+ *   the caller is done with it using g_array_unref().
+ *   It is a programming error to keep the structure around longer than
+ *   the @fuzzy instance.
+ */
+GArray *
+fuzzy_match (Fuzzy       *fuzzy,
+             const gchar *needle,
+             gsize        max_matches)
+{
+   FuzzyLookup lookup = { 0 };
+   FuzzyMatch match;
+   FuzzyItem *item;
+   GArray *matches = NULL;
+   GArray *root;
+   gchar *downcase = NULL;
+   gint i;
+
+   g_return_val_if_fail(fuzzy, NULL);
+   g_return_val_if_fail(!fuzzy->in_bulk_insert, NULL);
+   g_return_val_if_fail(needle, NULL);
+
+   matches = g_array_new(FALSE, FALSE, sizeof(FuzzyMatch));
+
+   if (!*needle) {
+      return matches;
+   }
+
+   if (!fuzzy->case_sensitive) {
+      downcase = g_ascii_strdown(needle, -1);
+      needle = downcase;
+   }
+
+   lookup.fuzzy = fuzzy;
+   lookup.n_tables = strlen(needle);
+   lookup.state = g_new0(gint, lookup.n_tables);
+   lookup.tables = g_new0(GArray*, lookup.n_tables);
+   lookup.needle = needle;
+   lookup.max_matches = max_matches;
+   lookup.matches = g_hash_table_new(NULL, NULL);
+
+   for (i = 0; needle[i]; i++) {
+      lookup.tables[i] = g_ptr_array_index(fuzzy->char_tables,
+                                           (guint)needle[i]);
+   }
+
+   root = g_ptr_array_index(fuzzy->char_tables, (guint)needle[0]);
+
+   if (G_LIKELY(lookup.n_tables > 1)) {
+      for (i = 0; i < root->len; i++) {
+         item = &g_array_index(root, FuzzyItem, i);
+         fuzzy_do_match(&lookup, item, 1, 0);
+      }
+   } else {
+      for (i = 0; i < root->len; i++) {
+         item = &g_array_index(root, FuzzyItem, i);
+         match.key = fuzzy_get_string(fuzzy, item->id);
+         match.value = g_ptr_array_index(fuzzy->id_to_value, item->id);
+         match.score = 0;
+         g_array_append_val(matches, match);
+      }
+      g_free(downcase);
+      return matches;
+   }
+
+   {
+      GHashTableIter iter;
+      gpointer key;
+      gpointer value;
+
+      g_hash_table_iter_init(&iter, lookup.matches);
+      while (g_hash_table_iter_next(&iter, &key, &value)) {
+         match.key = fuzzy_get_string(fuzzy, GPOINTER_TO_INT(key));
+         match.score = 1.0 / (strlen(match.key) + GPOINTER_TO_INT(value));
+         match.value = g_ptr_array_index(fuzzy->id_to_value,
+                                         GPOINTER_TO_INT(key));
+         g_array_append_val(matches, match);
+      }
+
+      g_array_sort(matches, fuzzy_match_compare);
+
+      /*
+       * TODO: We could be more clever here when inserting into the array
+       *       only if it is a lower score than the end or < max items.
+       */
+
+      if (max_matches && (matches->len > max_matches)) {
+         g_array_set_size(matches, max_matches);
+      }
+   }
+
+   g_free(downcase);
+   g_free(lookup.state);
+   g_free(lookup.tables);
+   g_hash_table_unref(lookup.matches);
+
+   return matches;
+}

+ 53 - 0
src/fuzzy.h

@@ -0,0 +1,53 @@
+/* fuzzy.h
+ *
+ * Copyright (C) 2013 Christian Hergert <christian@hergert.me>
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ * 
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef FUZZY_H
+#define FUZZY_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct _Fuzzy      Fuzzy;
+typedef struct _FuzzyMatch FuzzyMatch;
+
+struct _FuzzyMatch
+{
+   const gchar *key;
+   gpointer     value;
+   gfloat       score;
+};
+
+Fuzzy     *fuzzy_new                (gboolean        case_sensitive);
+Fuzzy     *fuzzy_new_with_free_func (gboolean        case_sensitive,
+                                     GDestroyNotify  free_func);
+void       fuzzy_set_free_func      (Fuzzy          *fuzzy,
+                                     GDestroyNotify  free_func);
+void       fuzzy_begin_bulk_insert  (Fuzzy          *fuzzy);
+void       fuzzy_end_bulk_insert    (Fuzzy          *fuzzy);
+void       fuzzy_insert             (Fuzzy          *fuzzy,
+                                     const gchar    *key,
+                                     gpointer        value);
+GArray    *fuzzy_match              (Fuzzy          *fuzzy,
+                                     const gchar    *needle,
+                                     gsize           max_matches);
+void       fuzzy_free               (Fuzzy          *fuzzy);
+
+G_END_DECLS
+
+#endif /* FUZZY_H */

+ 44 - 1
src/iridium-note.c

@@ -18,6 +18,7 @@
 
 #include "iridium-config.h"
 #include "iridium-note.h"
+#include "fuzzy.h"
 
 #define IRIDIUM_NOTE_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE((obj), IRIDIUM_TYPE_NOTE, IridiumNotePrivate))
 
@@ -76,7 +77,49 @@ iridium_note_get_tags (IridiumNote *note)
 {
   return g_list_copy (IRIDIUM_NOTE_GET_PRIVATE (note)->tags);
 }
-                  
+
+const gchar *
+iridium_note_get_title (IridiumNote *note)
+{
+  return IRIDIUM_NOTE_GET_PRIVATE (note)->title;
+}
+
+gboolean
+iridium_note_matches_fuzzy (IridiumNote *note,
+                            const gchar *needle)
+{
+  Fuzzy *fuzzy;
+  GArray *matches;
+  IridiumNotePrivate *priv;
+  gboolean result = FALSE;
+
+  priv = IRIDIUM_NOTE_GET_PRIVATE (note);
+  fuzzy = fuzzy_new (FALSE);
+
+  if (priv->content)
+    fuzzy_insert (fuzzy, priv->content, priv->content);
+
+  if (priv->title)
+    fuzzy_insert (fuzzy, priv->title, priv->title);
+
+  matches = fuzzy_match (fuzzy, needle, 0);
+
+  for (guint i = 0; i < matches->len; i++) {
+    FuzzyMatch *match;
+
+    match = &g_array_index (matches, FuzzyMatch, i);
+
+    if (match->score > 0.1) {
+      result = TRUE;
+      break;
+    }
+  }
+
+  g_array_unref (matches);
+  fuzzy_free (fuzzy);
+  return result;
+}
+
 static void
 iridium_note_set_property (GObject *object,
                            guint property_id,

+ 4 - 1
src/iridium-note.h

@@ -17,12 +17,15 @@ struct _IridiumNoteClass {
   void  (*tags_changed)     (IridiumNote *note);
 };
 
-IridiumNote *iridium_note_new           (const gchar    *title, 
+IridiumNote *iridium_note_new           (const gchar    *title,
                                          const gchar    *content);
 void         iridium_note_add_tag       (IridiumNote    *note,
                                          IridiumTag     *tag);
 GList       *iridium_note_get_tags      (IridiumNote    *note);
+const gchar *iridium_note_get_title     (IridiumNote    *note);
 gboolean     iridium_note_has_tag       (IridiumNote    *note,
                                          IridiumTag     *tag);
+gboolean     iridium_note_matches_fuzzy (IridiumNote    *note,
+                                         const gchar    *needle);
 
 G_END_DECLS

+ 25 - 2
src/iridium-window.c

@@ -48,6 +48,12 @@ iridium_window_tag_selected (GtkListBox *box, IridiumTagRow *row, IridiumWindow
   gtk_list_box_invalidate_filter (self->note_list);
 }
 
+static void
+iridium_window_search_changed (GtkSearchEntry *entry, IridiumWindow *self)
+{
+  gtk_list_box_invalidate_filter (self->note_list);
+}
+
 static void
 iridium_window_note_selected (GtkListBox *box, IridiumNoteRow *row, IridiumWindow *self)
 {
@@ -74,10 +80,26 @@ iridium_window_search_activated (GAction *action, GVariant *param, IridiumWindow
 static gboolean
 iridium_window_note_visible (IridiumNoteRow *row, IridiumWindow *window)
 {
+  IridiumNote *note;
   IridiumTagRow *tag_row;
 
   tag_row = IRIDIUM_TAG_ROW (gtk_list_box_get_selected_row (window->tag_list));
-  return tag_row == NULL ? TRUE : iridium_note_row_has_tag (row, iridium_tag_row_get_tag (tag_row));
+
+  if (tag_row == NULL)
+    return TRUE;
+
+  note = iridium_note_row_get_note (row);
+
+  if (!iridium_note_has_tag (note, iridium_tag_row_get_tag (tag_row))) {
+    return FALSE;
+  }
+
+  if (gtk_search_bar_get_search_mode (window->search_bar) &&
+      gtk_entry_get_text_length (GTK_ENTRY (window->search_entry)) > 0) {
+    return iridium_note_matches_fuzzy (note, gtk_entry_get_text (GTK_ENTRY (window->search_entry)));
+  }
+
+  return TRUE;
 }
 
 static void
@@ -95,7 +117,7 @@ iridium_window_class_init (IridiumWindowClass *klass)
 {
   GObjectClass *oclass;
   GtkWidgetClass *widget_class;
-  
+
   oclass = G_OBJECT_CLASS (klass);
   widget_class = GTK_WIDGET_CLASS (klass);
 
@@ -163,6 +185,7 @@ iridium_window_init (IridiumWindow *self)
 
   g_signal_connect (self->tag_list, "row-selected", G_CALLBACK (iridium_window_tag_selected), self);
   g_signal_connect (self->note_list, "row-selected", G_CALLBACK (iridium_window_note_selected), self);
+  g_signal_connect (self->search_entry, "search-changed", G_CALLBACK (iridium_window_search_changed), self);
 
   gtk_list_box_set_filter_func (self->note_list, (GtkListBoxFilterFunc) iridium_window_note_visible, self, NULL);
 

+ 1 - 0
src/meson.build

@@ -1,5 +1,6 @@
 iridium_sources = [
   'main.c',
+  'fuzzy.c',
   'iridium-note.c',
   'iridium-note-row.c',
   'iridium-tag.c',