1 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * Copyright (C) 2008-2010 Kouhei Sutou <kou@clear-code.com>
4 *
5 * This library is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 *
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif /* HAVE_CONFIG_H */
23
24 #include <stdlib.h>
25 #include <string.h>
26 #include <glib.h>
27
28 #include "cut-sequence-matcher.h"
29 #include "../gcutter/gcut-list.h"
30
31 #define CUT_SEQUENCE_MATCHER_GET_PRIVATE(obj) (G_TYPE_INSTANCE_GET_PRIVATE ((obj), CUT_TYPE_SEQUENCE_MATCHER, CutSequenceMatcherPrivate))
32
33 typedef struct _CutSequenceMatcherPrivate CutSequenceMatcherPrivate;
34 struct _CutSequenceMatcherPrivate
35 {
36 GSequence *from;
37 GSequence *to;
38 GSequenceIterCompareFunc compare_func;
39 gpointer compare_func_user_data;
40 GHashTable *to_indices;
41 GHashTable *junks;
42 GList *matches;
43 GList *blocks;
44 GList *operations;
45 GList *grouped_operations;
46 gdouble ratio;
47 guint context_size;
48 };
49
50 enum
51 {
52 PROP_0,
53 PROP_FROM_SEQUENCE,
54 PROP_TO_SEQUENCE,
55 PROP_COMPARE_FUNC,
56 PROP_COMPARE_FUNC_USER_DATA,
57 PROP_TO_INDICES,
58 PROP_JUNKS
59 };
60
61 G_DEFINE_TYPE (CutSequenceMatcher, cut_sequence_matcher, G_TYPE_OBJECT)
62
63 static void dispose (GObject *object);
64 static void set_property (GObject *object,
65 guint prop_id,
66 const GValue *value,
67 GParamSpec *pspec);
68 static void get_property (GObject *object,
69 guint prop_id,
70 GValue *value,
71 GParamSpec *pspec);
72
73 CutSequenceMatchInfo *
cut_sequence_match_info_new(guint from_index,guint to_index,guint size)74 cut_sequence_match_info_new (guint from_index, guint to_index, guint size)
75 {
76 CutSequenceMatchInfo *info;
77
78 info = g_slice_new(CutSequenceMatchInfo);
79 info->from_index = from_index;
80 info->to_index = to_index;
81 info->size = size;
82 return info;
83 }
84
85 void
cut_sequence_match_info_free(CutSequenceMatchInfo * info)86 cut_sequence_match_info_free (CutSequenceMatchInfo *info)
87 {
88 g_slice_free(CutSequenceMatchInfo, info);
89 }
90
91 CutSequenceMatchOperation *
cut_sequence_match_operation_new(CutSequenceMatchOperationType type,guint from_begin,guint from_end,guint to_begin,guint to_end)92 cut_sequence_match_operation_new (CutSequenceMatchOperationType type,
93 guint from_begin, guint from_end,
94 guint to_begin, guint to_end)
95 {
96 CutSequenceMatchOperation *operation;
97
98 operation = g_slice_new(CutSequenceMatchOperation);
99 operation->type = type;
100 operation->from_begin = from_begin;
101 operation->from_end = from_end;
102 operation->to_begin = to_begin;
103 operation->to_end = to_end;
104 return operation;
105 }
106
107 void
cut_sequence_match_operation_free(CutSequenceMatchOperation * operation)108 cut_sequence_match_operation_free (CutSequenceMatchOperation *operation)
109 {
110 g_slice_free(CutSequenceMatchOperation, operation);
111 }
112
113 static void
cut_sequence_matcher_class_init(CutSequenceMatcherClass * klass)114 cut_sequence_matcher_class_init (CutSequenceMatcherClass *klass)
115 {
116 GObjectClass *gobject_class;
117 GParamSpec *spec;
118
119 gobject_class = G_OBJECT_CLASS(klass);
120
121 gobject_class->dispose = dispose;
122 gobject_class->set_property = set_property;
123 gobject_class->get_property = get_property;
124
125 spec = g_param_spec_pointer("from-sequence",
126 "From Sequence",
127 "From Sequence",
128 G_PARAM_READWRITE | G_PARAM_CONSTRUCT);
129 g_object_class_install_property(gobject_class, PROP_FROM_SEQUENCE, spec);
130
131 spec = g_param_spec_pointer("to-sequence",
132 "To Sequence",
133 "To Sequence",
134 G_PARAM_READWRITE | G_PARAM_CONSTRUCT);
135 g_object_class_install_property(gobject_class, PROP_TO_SEQUENCE, spec);
136
137 spec = g_param_spec_pointer("compare-function",
138 "Compare Fcuntion",
139 "Compare Fcuntion",
140 G_PARAM_READWRITE | G_PARAM_CONSTRUCT);
141 g_object_class_install_property(gobject_class, PROP_COMPARE_FUNC, spec);
142
143 spec = g_param_spec_pointer("compare-function-user-data",
144 "Compare Fcuntion User Data",
145 "Compare Fcuntion User Data",
146 G_PARAM_READWRITE | G_PARAM_CONSTRUCT);
147 g_object_class_install_property(gobject_class, PROP_COMPARE_FUNC_USER_DATA, spec);
148
149 spec = g_param_spec_pointer("to-indices",
150 "To Indecies",
151 "To Indecies",
152 G_PARAM_READWRITE | G_PARAM_CONSTRUCT);
153 g_object_class_install_property(gobject_class, PROP_TO_INDICES, spec);
154
155 spec = g_param_spec_pointer("junks",
156 "Junks",
157 "Junks",
158 G_PARAM_READWRITE | G_PARAM_CONSTRUCT);
159 g_object_class_install_property(gobject_class, PROP_JUNKS, spec);
160
161 g_type_class_add_private(gobject_class, sizeof(CutSequenceMatcherPrivate));
162 }
163
164 static void
cut_sequence_matcher_init(CutSequenceMatcher * sequence_matcher)165 cut_sequence_matcher_init (CutSequenceMatcher *sequence_matcher)
166 {
167 CutSequenceMatcherPrivate *priv;
168
169 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(sequence_matcher);
170
171 priv->from = NULL;
172 priv->to = NULL;
173 priv->to_indices = NULL;
174 priv->junks = NULL;
175 priv->matches = NULL;
176 priv->blocks = NULL;
177 priv->operations = NULL;
178 priv->grouped_operations = NULL;
179 priv->ratio = -1.0;
180 priv->context_size = 3;
181 }
182
183 static void
dispose_goruped_operations(CutSequenceMatcherPrivate * priv)184 dispose_goruped_operations (CutSequenceMatcherPrivate *priv)
185 {
186 GList *node;
187
188 if (!priv->grouped_operations)
189 return;
190
191 for (node = priv->grouped_operations; node; node = g_list_next(node)) {
192 GList *operations = node->data;
193 g_list_foreach(operations,
194 (GFunc)cut_sequence_match_operation_free, NULL);
195 g_list_free(operations);
196 }
197 g_list_free(priv->grouped_operations);
198 priv->grouped_operations = NULL;
199 }
200
201 static void
dispose(GObject * object)202 dispose (GObject *object)
203 {
204 CutSequenceMatcherPrivate *priv;
205
206 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(object);
207
208 if (priv->from) {
209 g_sequence_free(priv->from);
210 priv->from = NULL;
211 }
212
213 if (priv->to) {
214 g_sequence_free(priv->to);
215 priv->to = NULL;
216 }
217
218 if (priv->to_indices) {
219 g_hash_table_unref(priv->to_indices);
220 priv->to_indices = NULL;
221 }
222
223 if (priv->junks) {
224 g_hash_table_unref(priv->junks);
225 priv->junks = NULL;
226 }
227
228 if (priv->matches) {
229 g_list_foreach(priv->matches, (GFunc)cut_sequence_match_info_free, NULL);
230 g_list_free(priv->matches);
231 priv->matches = NULL;
232 }
233
234 if (priv->blocks) {
235 g_list_foreach(priv->blocks, (GFunc)cut_sequence_match_info_free, NULL);
236 g_list_free(priv->blocks);
237 priv->blocks = NULL;
238 }
239
240
241 if (priv->operations) {
242 g_list_foreach(priv->operations,
243 (GFunc)cut_sequence_match_operation_free, NULL);
244 g_list_free(priv->operations);
245 priv->operations = NULL;
246 }
247
248 dispose_goruped_operations(priv);
249
250 G_OBJECT_CLASS(cut_sequence_matcher_parent_class)->dispose(object);
251 }
252
253 static void
set_property(GObject * object,guint prop_id,const GValue * value,GParamSpec * pspec)254 set_property (GObject *object,
255 guint prop_id,
256 const GValue *value,
257 GParamSpec *pspec)
258 {
259 CutSequenceMatcherPrivate *priv;
260 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(object);
261
262 switch (prop_id) {
263 case PROP_FROM_SEQUENCE:
264 priv->from = g_value_get_pointer(value);
265 break;
266 case PROP_TO_SEQUENCE:
267 priv->to = g_value_get_pointer(value);
268 break;
269 case PROP_COMPARE_FUNC:
270 priv->compare_func = g_value_get_pointer(value);
271 break;
272 case PROP_COMPARE_FUNC_USER_DATA:
273 priv->compare_func_user_data = g_value_get_pointer(value);
274 break;
275 case PROP_TO_INDICES:
276 priv->to_indices = g_value_get_pointer(value);
277 break;
278 case PROP_JUNKS:
279 priv->junks = g_value_get_pointer(value);
280 break;
281 default:
282 G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
283 break;
284 }
285 }
286
287 static void
get_property(GObject * object,guint prop_id,GValue * value,GParamSpec * pspec)288 get_property (GObject *object,
289 guint prop_id,
290 GValue *value,
291 GParamSpec *pspec)
292 {
293 CutSequenceMatcherPrivate *priv;
294 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(object);
295
296 switch (prop_id) {
297 case PROP_FROM_SEQUENCE:
298 g_value_set_pointer(value, priv->from);
299 break;
300 case PROP_TO_SEQUENCE:
301 g_value_set_pointer(value, priv->to);
302 break;
303 case PROP_COMPARE_FUNC:
304 g_value_set_pointer(value, priv->compare_func);
305 break;
306 case PROP_COMPARE_FUNC_USER_DATA:
307 g_value_set_pointer(value, priv->compare_func_user_data);
308 break;
309 case PROP_TO_INDICES:
310 g_value_set_pointer(value, priv->to_indices);
311 break;
312 case PROP_JUNKS:
313 g_value_set_pointer(value, priv->junks);
314 break;
315 default:
316 G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
317 break;
318 }
319 }
320
321 typedef struct {
322 GHashTable *junks;
323 CutJunkFilterFunc junk_filter_func;
324 gpointer junk_filter_func_user_data;
325 } RemoveJunkData;
326
327 static gboolean
remove_junk(gpointer key,gpointer value,gpointer user_data)328 remove_junk (gpointer key, gpointer value, gpointer user_data)
329 {
330 RemoveJunkData *data = user_data;
331 gboolean is_junk;
332
333 is_junk = data->junk_filter_func(key, data->junk_filter_func_user_data);
334 if (is_junk) {
335 g_hash_table_insert(data->junks, key, GINT_TO_POINTER(TRUE));
336 }
337 return is_junk;
338 }
339
340 static void
remove_junks_from_to_indices(CutSequenceMatcher * matcher,CutJunkFilterFunc junk_filter_func,gpointer junk_filter_func_user_data)341 remove_junks_from_to_indices (CutSequenceMatcher *matcher,
342 CutJunkFilterFunc junk_filter_func,
343 gpointer junk_filter_func_user_data)
344 {
345 CutSequenceMatcherPrivate *priv;
346 RemoveJunkData data;
347
348 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
349 data.junk_filter_func = junk_filter_func;
350 data.junk_filter_func_user_data = junk_filter_func_user_data;
351 data.junks = priv->junks;
352 g_hash_table_foreach_steal(priv->to_indices, remove_junk, &data);
353 }
354
355 static void
update_to_indices(CutSequenceMatcher * matcher,CutJunkFilterFunc junk_filter_func,gpointer junk_filter_func_user_data)356 update_to_indices (CutSequenceMatcher *matcher,
357 CutJunkFilterFunc junk_filter_func,
358 gpointer junk_filter_func_user_data)
359 {
360 CutSequenceMatcherPrivate *priv;
361 gint i;
362 GSequenceIter *iter, *begin, *end;
363
364 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
365
366 if (!priv->to)
367 return;
368
369 begin = g_sequence_get_begin_iter(priv->to);
370 end = g_sequence_get_end_iter(priv->to);
371 for (i = 0, iter = begin;
372 iter != end;
373 i++, iter = g_sequence_iter_next(iter)) {
374 gpointer data;
375 GList *indices;
376
377 data = g_sequence_get(iter);
378 indices = g_hash_table_lookup(priv->to_indices, data);
379 indices = g_list_append(indices, GINT_TO_POINTER(i));
380 g_hash_table_replace(priv->to_indices, data, g_list_copy(indices));
381 }
382
383 if (junk_filter_func)
384 remove_junks_from_to_indices(matcher,
385 junk_filter_func,
386 junk_filter_func_user_data);
387 }
388
389 CutSequenceMatcher *
cut_sequence_matcher_new(GSequence * from,GSequence * to,GSequenceIterCompareFunc compare_func,gpointer compare_func_user_data,GHashFunc content_hash_func,GEqualFunc content_equal_func,CutJunkFilterFunc junk_filter_func,gpointer junk_filter_func_user_data)390 cut_sequence_matcher_new (GSequence *from, GSequence *to,
391 GSequenceIterCompareFunc compare_func,
392 gpointer compare_func_user_data,
393 GHashFunc content_hash_func,
394 GEqualFunc content_equal_func,
395 CutJunkFilterFunc junk_filter_func,
396 gpointer junk_filter_func_user_data)
397 {
398 CutSequenceMatcher *matcher;
399
400 matcher = g_object_new(CUT_TYPE_SEQUENCE_MATCHER,
401 "from-sequence", from,
402 "to-sequence", to,
403 "compare-function", compare_func,
404 "compare-function-user-data", compare_func_user_data,
405 "to-indices",
406 g_hash_table_new_full(content_hash_func,
407 content_equal_func,
408 NULL,
409 (GDestroyNotify)g_list_free),
410 "junks", g_hash_table_new(content_hash_func,
411 content_equal_func),
412 NULL);
413 update_to_indices(matcher, junk_filter_func, junk_filter_func_user_data);
414 return matcher;
415 }
416
417 static GSequence *
char_sequence_new(const gchar * string)418 char_sequence_new (const gchar *string)
419 {
420 GSequence *sequence;
421
422 sequence = g_sequence_new(NULL);
423
424 for (; *string != '\0'; string = g_utf8_next_char(string)) {
425 g_sequence_append(sequence, GUINT_TO_POINTER(g_utf8_get_char(string)));
426 }
427
428 return sequence;
429 }
430
431 static gint
char_sequence_iter_compare(GSequenceIter * data1,GSequenceIter * data2,gpointer user_data)432 char_sequence_iter_compare (GSequenceIter *data1, GSequenceIter *data2,
433 gpointer user_data)
434 {
435 gunichar character1, character2;
436
437 character1 = GPOINTER_TO_UINT(g_sequence_get(data1));
438 character2 = GPOINTER_TO_UINT(g_sequence_get(data2));
439
440 if (character1 < character2) {
441 return -1;
442 } else if (character1 == character2) {
443 return 0;
444 } else {
445 return 1;
446 }
447 }
448
449 static guint
char_value_hash(gconstpointer v)450 char_value_hash (gconstpointer v)
451 {
452 return GPOINTER_TO_UINT(v);
453 }
454
455 static gboolean
char_value_equal(gconstpointer v1,gconstpointer v2)456 char_value_equal (gconstpointer v1, gconstpointer v2)
457 {
458 gunichar character1, character2;
459 character1 = GPOINTER_TO_UINT(v1);
460 character2 = GPOINTER_TO_UINT(v2);
461 return character1 == character2;
462 }
463
464 CutSequenceMatcher *
cut_sequence_matcher_char_new(const gchar * from,const gchar * to)465 cut_sequence_matcher_char_new (const gchar *from, const gchar *to)
466 {
467 return cut_sequence_matcher_char_new_full(from, to, NULL, NULL);
468 }
469
470 CutSequenceMatcher *
cut_sequence_matcher_char_new_full(const gchar * from,const gchar * to,CutJunkFilterFunc junk_filter_func,gpointer junk_filter_func_user_data)471 cut_sequence_matcher_char_new_full (const gchar *from, const gchar *to,
472 CutJunkFilterFunc junk_filter_func,
473 gpointer junk_filter_func_user_data)
474 {
475 return cut_sequence_matcher_new(char_sequence_new(from),
476 char_sequence_new(to),
477 char_sequence_iter_compare, NULL,
478 char_value_hash, char_value_equal,
479 junk_filter_func,
480 junk_filter_func_user_data);
481 }
482
483 static GSequence *
string_sequence_new(gchar ** strings)484 string_sequence_new (gchar **strings)
485 {
486 GSequence *sequence;
487
488 sequence = g_sequence_new(g_free);
489
490 for (; *strings; strings++) {
491 g_sequence_append(sequence, g_strdup(*strings));
492 }
493
494 return sequence;
495 }
496
497 static gint
string_sequence_iter_compare(GSequenceIter * data1,GSequenceIter * data2,gpointer user_data)498 string_sequence_iter_compare (GSequenceIter *data1, GSequenceIter *data2,
499 gpointer user_data)
500 {
501 gchar *string1, *string2;
502
503 string1 = g_sequence_get(data1);
504 string2 = g_sequence_get(data2);
505
506 return strcmp(string1, string2);
507 }
508
509 CutSequenceMatcher *
cut_sequence_matcher_string_new(gchar ** from,gchar ** to)510 cut_sequence_matcher_string_new (gchar **from, gchar **to)
511 {
512 return cut_sequence_matcher_string_new_full(from, to, NULL, NULL);
513 }
514
515 CutSequenceMatcher *
cut_sequence_matcher_string_new_full(gchar ** from,gchar ** to,CutJunkFilterFunc junk_filter_func,gpointer junk_filter_func_user_data)516 cut_sequence_matcher_string_new_full (gchar **from, gchar **to,
517 CutJunkFilterFunc junk_filter_func,
518 gpointer junk_filter_func_user_data)
519 {
520 return cut_sequence_matcher_new(string_sequence_new(from),
521 string_sequence_new(to),
522 string_sequence_iter_compare, NULL,
523 g_str_hash, g_str_equal,
524 junk_filter_func,
525 junk_filter_func_user_data);
526 }
527
528 const GList *
cut_sequence_matcher_get_to_index(CutSequenceMatcher * matcher,gpointer to_content)529 cut_sequence_matcher_get_to_index (CutSequenceMatcher *matcher,
530 gpointer to_content)
531 {
532 CutSequenceMatcherPrivate *priv;
533
534 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
535 if (!priv->to)
536 return NULL;
537
538 return g_hash_table_lookup(priv->to_indices, to_content);
539 }
540
541 static CutSequenceMatchInfo *
find_best_match_position(CutSequenceMatcher * matcher,guint from_begin,guint from_end,guint to_begin,guint to_end)542 find_best_match_position (CutSequenceMatcher *matcher,
543 guint from_begin, guint from_end,
544 guint to_begin, guint to_end)
545 {
546 CutSequenceMatcherPrivate *priv;
547 CutSequenceMatchInfo *info;
548 GSequenceIter *iter, *begin, *end;
549 GHashTable *sizes, *current_sizes;
550
551 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
552
553 info = cut_sequence_match_info_new(from_begin, to_begin, 0);
554
555 sizes = g_hash_table_new(g_direct_hash, g_direct_equal);
556
557 begin = g_sequence_get_iter_at_pos(priv->from, from_begin);
558 end = g_sequence_get_iter_at_pos(priv->from, from_end);
559 for (iter = begin; iter != end; iter = g_sequence_iter_next(iter)) {
560 guint from_index;
561 const GList *node;
562
563 from_index = g_sequence_iter_get_position(iter);
564 current_sizes = g_hash_table_new(g_direct_hash, g_direct_equal);
565 for (node = cut_sequence_matcher_get_to_index(matcher,
566 g_sequence_get(iter));
567 node;
568 node = g_list_next(node)) {
569 guint size, to_index;
570 gpointer size_as_pointer;
571
572 to_index = GPOINTER_TO_UINT(node->data);
573 if (to_index < to_begin)
574 continue;
575 if (to_index + 1 > to_end)
576 break;
577
578 size_as_pointer =
579 g_hash_table_lookup(sizes, GUINT_TO_POINTER(to_index - 1));
580 size = GPOINTER_TO_INT(size_as_pointer);
581 size++;
582 size_as_pointer = GUINT_TO_POINTER(size);
583 g_hash_table_insert(current_sizes,
584 GUINT_TO_POINTER(to_index),
585 size_as_pointer);
586 if (size > info->size) {
587 info->from_index = from_index - size + 1;
588 info->to_index = to_index - size + 1;
589 info->size = size;
590 }
591 }
592 g_hash_table_unref(sizes);
593 sizes = current_sizes;
594 }
595 g_hash_table_unref(sizes);
596
597 return info;
598 }
599
600 static gboolean
check_junk(CutSequenceMatcherPrivate * priv,gboolean should_junk,guint index)601 check_junk (CutSequenceMatcherPrivate *priv, gboolean should_junk, guint index)
602 {
603 gpointer key;
604 gboolean is_junk;
605
606 key = g_sequence_get(g_sequence_get_iter_at_pos(priv->to, index));
607 is_junk = GPOINTER_TO_INT(g_hash_table_lookup(priv->junks, key));
608 return should_junk ? is_junk : !is_junk;
609 }
610
611 static gboolean
equal_content(CutSequenceMatcherPrivate * priv,guint from_index,guint to_index)612 equal_content (CutSequenceMatcherPrivate *priv, guint from_index, guint to_index)
613 {
614 GSequenceIter *from_iter, *to_iter;
615
616 from_iter = g_sequence_get_iter_at_pos(priv->from, from_index);
617 to_iter = g_sequence_get_iter_at_pos(priv->to, to_index);
618 return priv->compare_func(from_iter, to_iter,
619 priv->compare_func_user_data) == 0;
620 }
621
622
623 static void
adjust_best_info_with_junk_predicate(CutSequenceMatcher * matcher,gboolean should_junk,CutSequenceMatchInfo * best_info,guint from_begin,guint from_end,guint to_begin,guint to_end)624 adjust_best_info_with_junk_predicate (CutSequenceMatcher *matcher,
625 gboolean should_junk,
626 CutSequenceMatchInfo *best_info,
627 guint from_begin, guint from_end,
628 guint to_begin, guint to_end)
629 {
630 CutSequenceMatcherPrivate *priv;
631
632 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
633
634 while (best_info->from_index > from_begin &&
635 best_info->to_index > to_begin &&
636 check_junk(priv, should_junk, best_info->to_index - 1) &&
637 equal_content(priv,
638 best_info->from_index - 1,
639 best_info->to_index - 1)) {
640 best_info->from_index--;
641 best_info->to_index--;
642 best_info->size++;
643 }
644
645 while (best_info->from_index + best_info->size + 1 < from_end &&
646 best_info->to_index + best_info->size + 1 < to_end &&
647 check_junk(priv,
648 should_junk,
649 best_info->to_index + best_info->size) &&
650 equal_content(priv,
651 best_info->from_index + best_info->size,
652 best_info->to_index + best_info->size)) {
653 best_info->size++;
654 }
655 }
656
657 CutSequenceMatchInfo *
cut_sequence_matcher_get_longest_match(CutSequenceMatcher * matcher,guint from_begin,guint from_end,guint to_begin,guint to_end)658 cut_sequence_matcher_get_longest_match (CutSequenceMatcher *matcher,
659 guint from_begin, guint from_end,
660 guint to_begin, guint to_end)
661 {
662 CutSequenceMatcherPrivate *priv;
663 CutSequenceMatchInfo *info;
664
665 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
666
667 info = find_best_match_position(matcher,
668 from_begin, from_end,
669 to_begin, to_end);
670 if (g_hash_table_size(priv->junks) > 0) {
671 adjust_best_info_with_junk_predicate(matcher, FALSE, info,
672 from_begin, from_end,
673 to_begin, to_end);
674 adjust_best_info_with_junk_predicate(matcher, TRUE, info,
675 from_begin, from_end,
676 to_begin, to_end);
677 }
678
679 return info;
680 }
681
682 typedef struct _MatchingInfo MatchingInfo;
683 struct _MatchingInfo {
684 guint from_begin;
685 guint from_end;
686 guint to_begin;
687 guint to_end;
688 };
689
690 static void
push_matching_info(GQueue * queue,guint from_begin,guint from_end,guint to_begin,guint to_end)691 push_matching_info (GQueue *queue,
692 guint from_begin, guint from_end,
693 guint to_begin, guint to_end)
694 {
695 MatchingInfo *info;
696
697 info = g_slice_new(MatchingInfo);
698 info->from_begin = from_begin;
699 info->from_end = from_end;
700 info->to_begin = to_begin;
701 info->to_end = to_end;
702
703 g_queue_push_tail(queue, info);
704 }
705
706 static void
pop_matching_info(GQueue * queue,MatchingInfo * info)707 pop_matching_info (GQueue *queue, MatchingInfo *info)
708 {
709 MatchingInfo *popped_info;
710
711 popped_info = g_queue_pop_head(queue);
712 *info = *popped_info;
713 g_slice_free(MatchingInfo, popped_info);
714 }
715
716 static gint
compare_match_info(gconstpointer data1,gconstpointer data2)717 compare_match_info (gconstpointer data1, gconstpointer data2)
718 {
719 const CutSequenceMatchInfo *info1, *info2;
720
721 info1 = data1;
722 info2 = data2;
723
724 if (info1->from_index < info2->from_index) {
725 return -1;
726 } else if (info1->from_index == info2->from_index) {
727 return 0;
728 } else {
729 return 1;
730 }
731 }
732
733 const GList *
cut_sequence_matcher_get_matches(CutSequenceMatcher * matcher)734 cut_sequence_matcher_get_matches (CutSequenceMatcher *matcher)
735 {
736 CutSequenceMatcherPrivate *priv;
737 GList *matches = NULL;
738 GQueue *queue;
739
740 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
741 if (priv->matches)
742 return priv->matches;
743
744 queue = g_queue_new();
745 push_matching_info(queue,
746 0, g_sequence_get_length(priv->from),
747 0, g_sequence_get_length(priv->to));
748
749 while (!g_queue_is_empty(queue)) {
750 MatchingInfo info;
751 CutSequenceMatchInfo *match_info;
752
753 pop_matching_info(queue, &info);
754 match_info = cut_sequence_matcher_get_longest_match(matcher,
755 info.from_begin,
756 info.from_end,
757 info.to_begin,
758 info.to_end);
759 if (match_info->size == 0) {
760 cut_sequence_match_info_free(match_info);
761 continue;
762 }
763
764 if (info.from_begin < match_info->from_index &&
765 info.to_begin < match_info->to_index)
766 push_matching_info(queue,
767 info.from_begin, match_info->from_index,
768 info.to_begin, match_info->to_index);
769 matches = g_list_prepend(matches, match_info);
770 if (match_info->from_index + match_info->size < info.from_end &&
771 match_info->to_index + match_info->size < info.to_end)
772 push_matching_info(queue,
773 match_info->from_index + match_info->size,
774 info.from_end,
775 match_info->to_index + match_info->size,
776 info.to_end);
777 }
778
779 g_queue_free(queue);
780 priv->matches = g_list_sort(matches, compare_match_info);
781
782 return priv->matches;
783 }
784
785 static GList *
prepend_match_info(GList * list,guint begin,guint end,guint size)786 prepend_match_info (GList *list, guint begin, guint end, guint size)
787 {
788 return g_list_prepend(list, cut_sequence_match_info_new(begin, end, size));
789 }
790
791 const GList *
cut_sequence_matcher_get_blocks(CutSequenceMatcher * matcher)792 cut_sequence_matcher_get_blocks (CutSequenceMatcher *matcher)
793 {
794 CutSequenceMatcherPrivate *priv;
795 guint from_index, to_index, size;
796 const GList *node;
797 GList *blocks = NULL;
798
799 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
800 if (priv->blocks)
801 return priv->blocks;
802
803 from_index = to_index = size = 0;
804 for (node = cut_sequence_matcher_get_matches(matcher);
805 node;
806 node = g_list_next(node)) {
807 CutSequenceMatchInfo *info = node->data;
808
809 if (from_index + size == info->from_index &&
810 to_index + size == info->to_index) {
811 size += info->size;
812 } else {
813 if (size > 0)
814 blocks = prepend_match_info(blocks, from_index, to_index, size);
815 from_index = info->from_index;
816 to_index = info->to_index;
817 size = info->size;
818 }
819 }
820
821 if (size > 0)
822 blocks = prepend_match_info(blocks, from_index, to_index, size);
823
824 blocks = prepend_match_info(blocks,
825 g_sequence_get_length(priv->from),
826 g_sequence_get_length(priv->to),
827 0);
828 priv->blocks = g_list_reverse(blocks);
829
830 return priv->blocks;
831 }
832
833 static CutSequenceMatchOperationType
determine_operation_type(guint from_index,guint to_index,CutSequenceMatchInfo * info)834 determine_operation_type (guint from_index, guint to_index,
835 CutSequenceMatchInfo *info)
836 {
837 if (from_index < info->from_index && to_index < info->to_index) {
838 return CUT_SEQUENCE_MATCH_OPERATION_REPLACE;
839 } else if (from_index < info->from_index) {
840 return CUT_SEQUENCE_MATCH_OPERATION_DELETE;
841 } else if (to_index < info->to_index) {
842 return CUT_SEQUENCE_MATCH_OPERATION_INSERT;
843 } else {
844 return CUT_SEQUENCE_MATCH_OPERATION_EQUAL;
845 }
846 }
847
848 static GList *
prepend_operation(GList * list,CutSequenceMatchOperationType type,guint from_begin,guint from_end,guint to_begin,guint to_end)849 prepend_operation (GList *list, CutSequenceMatchOperationType type,
850 guint from_begin, guint from_end,
851 guint to_begin, guint to_end)
852 {
853 return g_list_prepend(list,
854 cut_sequence_match_operation_new(type,
855 from_begin,
856 from_end,
857 to_begin,
858 to_end));
859 }
860
861 const GList *
cut_sequence_matcher_get_operations(CutSequenceMatcher * matcher)862 cut_sequence_matcher_get_operations (CutSequenceMatcher *matcher)
863 {
864 CutSequenceMatcherPrivate *priv;
865 const GList *node;
866 GList *operations = NULL;
867 guint from_index = 0;
868 guint to_index = 0;
869
870 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
871 if (priv->operations)
872 return priv->operations;
873
874 for (node = cut_sequence_matcher_get_blocks(matcher);
875 node;
876 node = g_list_next(node)) {
877 CutSequenceMatchInfo *info = node->data;
878 CutSequenceMatchOperationType type;
879
880 type = determine_operation_type(from_index, to_index, info);
881 if (type != CUT_SEQUENCE_MATCH_OPERATION_EQUAL)
882 operations = prepend_operation(operations, type,
883 from_index, info->from_index,
884 to_index, info->to_index);
885
886 from_index = info->from_index + info->size;
887 to_index = info->to_index + info->size;
888 if (info->size > 0)
889 operations = prepend_operation(operations,
890 CUT_SEQUENCE_MATCH_OPERATION_EQUAL,
891 info->from_index, from_index,
892 info->to_index, to_index);
893 }
894
895 priv->operations = g_list_reverse(operations);
896 return priv->operations;
897 }
898
899 static GList *
get_edge_expanded_copied_operations(CutSequenceMatcher * matcher,guint context_size)900 get_edge_expanded_copied_operations (CutSequenceMatcher *matcher,
901 guint context_size)
902 {
903 const GList *original_operations = NULL;
904 GList *operations = NULL;
905
906 original_operations = cut_sequence_matcher_get_operations(matcher);
907 if (original_operations) {
908 CutSequenceMatchOperation *operation = original_operations->data;
909 const GList *node;
910 guint from_begin, from_end, to_begin, to_end;
911
912 from_begin = operation->from_begin;
913 from_end = operation->from_end;
914 to_begin = operation->to_begin;
915 to_end = operation->to_end;
916 node = original_operations;
917 if (operation->type == CUT_SEQUENCE_MATCH_OPERATION_EQUAL) {
918 guint from_context_begin = 0, to_context_begin = 0;
919
920 if (from_end > context_size)
921 from_context_begin = from_end - context_size;
922 if (to_end > context_size)
923 to_context_begin = to_end - context_size;
924 operations = prepend_operation(operations,
925 CUT_SEQUENCE_MATCH_OPERATION_EQUAL,
926 MAX(from_begin, from_context_begin),
927 from_end,
928 MAX(to_begin, to_context_begin),
929 to_end);
930 node = g_list_next(node);
931 }
932 for (; node; node = g_list_next(node)) {
933 operation = node->data;
934 from_begin = operation->from_begin;
935 from_end = operation->from_end;
936 to_begin = operation->to_begin;
937 to_end = operation->to_end;
938 if (!g_list_next(node) &&
939 operation->type == CUT_SEQUENCE_MATCH_OPERATION_EQUAL) {
940 operations =
941 prepend_operation(operations,
942 CUT_SEQUENCE_MATCH_OPERATION_EQUAL,
943 from_begin,
944 MIN(from_end, from_begin + context_size),
945 to_begin,
946 MIN(to_end, to_begin + context_size));
947 } else {
948 operations = prepend_operation(operations,
949 operation->type,
950 from_begin, from_end,
951 to_begin, to_end);
952 }
953 }
954 operations = g_list_reverse(operations);
955 } else {
956 operations = prepend_operation(operations,
957 CUT_SEQUENCE_MATCH_OPERATION_EQUAL,
958 0, 0, 0, 0);
959 }
960
961 return operations;
962 }
963
964 const GList *
cut_sequence_matcher_get_grouped_operations(CutSequenceMatcher * matcher)965 cut_sequence_matcher_get_grouped_operations (CutSequenceMatcher *matcher)
966 {
967 CutSequenceMatcherPrivate *priv;
968 GList *node;
969 GList *operations = NULL;
970 GList *groups = NULL;
971 GList *group = NULL;
972 guint context_size;
973 guint group_window;
974
975 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
976 if (priv->grouped_operations)
977 return priv->grouped_operations;
978
979 context_size = priv->context_size;
980 operations = get_edge_expanded_copied_operations(matcher, context_size);
981 group_window = context_size * 2;
982
983 for (node = operations; node; node = g_list_next(node)) {
984 CutSequenceMatchOperation *operation = node->data;
985 guint from_begin, from_end, to_begin, to_end;
986
987 from_begin = operation->from_begin;
988 from_end = operation->from_end;
989 to_begin = operation->to_begin;
990 to_end = operation->to_end;
991 if (operation->type == CUT_SEQUENCE_MATCH_OPERATION_EQUAL &&
992 ((from_end - from_begin) > group_window)) {
993 operation->from_end = MIN(from_end, from_begin + context_size);
994 operation->to_end = MIN(to_end, to_begin + context_size);
995 group = g_list_prepend(group, operation);
996 groups = g_list_prepend(groups, g_list_reverse(group));
997 group = NULL;
998
999 if (from_end > context_size)
1000 from_begin = MAX(from_begin, from_end - context_size);
1001 if (to_end > context_size)
1002 to_begin = MAX(to_begin, to_end - context_size);
1003 group = prepend_operation(group, operation->type,
1004 from_begin, from_end,
1005 to_begin, to_end);
1006 } else {
1007 group = g_list_prepend(group, operation);
1008 }
1009 }
1010
1011 if (group)
1012 groups = g_list_prepend(groups, g_list_reverse(group));
1013
1014 g_list_free(operations);
1015
1016 priv->grouped_operations = g_list_reverse(groups);
1017 return priv->grouped_operations;
1018 }
1019
1020 gdouble
cut_sequence_matcher_get_ratio(CutSequenceMatcher * matcher)1021 cut_sequence_matcher_get_ratio (CutSequenceMatcher *matcher)
1022 {
1023 CutSequenceMatcherPrivate *priv;
1024 const GList *node;
1025 guint length;
1026
1027 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
1028 if (priv->ratio >= 0.0)
1029 return priv->ratio;
1030
1031 length = g_sequence_get_length(priv->from) + g_sequence_get_length(priv->to);
1032 if (length == 0) {
1033 priv->ratio = 1.0;
1034 } else {
1035 guint matches = 0;
1036
1037 for (node = cut_sequence_matcher_get_blocks(matcher);
1038 node;
1039 node = g_list_next(node)) {
1040 CutSequenceMatchInfo *info = node->data;
1041
1042 matches += info->size;
1043 }
1044 priv->ratio = 2.0 * matches / length;
1045 }
1046
1047 return priv->ratio;
1048 }
1049
1050 guint
cut_sequence_matcher_get_context_size(CutSequenceMatcher * matcher)1051 cut_sequence_matcher_get_context_size (CutSequenceMatcher *matcher)
1052 {
1053 return CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher)->context_size;
1054 }
1055
1056 void
cut_sequence_matcher_set_context_size(CutSequenceMatcher * matcher,guint context_size)1057 cut_sequence_matcher_set_context_size (CutSequenceMatcher *matcher,
1058 guint context_size)
1059 {
1060 CutSequenceMatcherPrivate *priv;
1061
1062 priv = CUT_SEQUENCE_MATCHER_GET_PRIVATE(matcher);
1063 if (priv->context_size != context_size) {
1064 priv->context_size = context_size;
1065 dispose_goruped_operations(priv);
1066 }
1067 }
1068
1069 /*
1070 vi:ts=4:nowrap:ai:expandtab:sw=4
1071 */
1072