1 /*
2  * speedup.c
3  * Copyright (C) 2018 Kovid Goyal <kovid at kovidgoyal.net>
4  *
5  * Distributed under terms of the GPL3 license.
6  */
7 
8 #include "data-types.h"
9 
10 static PyObject*
changed_center(PyObject * self UNUSED,PyObject * args)11 changed_center(PyObject *self UNUSED, PyObject *args) {
12     unsigned int prefix_count = 0, suffix_count = 0;
13     PyObject *lp, *rp;
14     if (!PyArg_ParseTuple(args, "UU", &lp, &rp)) return NULL;
15     const size_t left_len = PyUnicode_GET_LENGTH(lp), right_len = PyUnicode_GET_LENGTH(rp);
16 
17 #define R(which, index) PyUnicode_READ(PyUnicode_KIND(which), PyUnicode_DATA(which), index)
18     while(prefix_count < MIN(left_len, right_len)) {
19         if (R(lp, prefix_count) != R(rp, prefix_count)) break;
20         prefix_count++;
21     }
22     if (left_len && right_len && prefix_count < MIN(left_len, right_len)) {
23         while(suffix_count < MIN(left_len - prefix_count, right_len - prefix_count)) {
24             if(R(lp, left_len - 1 - suffix_count) != R(rp, right_len - 1 - suffix_count)) break;
25             suffix_count++;
26         }
27     }
28 #undef R
29     return Py_BuildValue("II", prefix_count, suffix_count);
30 }
31 
32 typedef struct {
33     unsigned int start_pos, end_pos, current_pos;
34     PyObject *start_code, *end_code;
35 } Segment;
36 
37 typedef struct {
38     Segment sg;
39     unsigned int num, pos;
40 } SegmentPointer;
41 
42 static const Segment EMPTY_SEGMENT = { .current_pos = UINT_MAX };
43 
44 static bool
convert_segment(PyObject * highlight,Segment * dest)45 convert_segment(PyObject *highlight, Segment *dest) {
46     PyObject *val = NULL;
47 #define I
48 #define A(x, d, c) { \
49     val = PyObject_GetAttrString(highlight, #x); \
50     if (val == NULL) return false; \
51     dest->d = c(val); Py_DECREF(val); \
52 }
53     A(start, start_pos, PyLong_AsUnsignedLong);
54     A(end, end_pos, PyLong_AsUnsignedLong);
55     dest->current_pos = dest->start_pos;
56     A(start_code, start_code, I);
57     A(end_code, end_code, I);
58     if (!PyUnicode_Check(dest->start_code)) { PyErr_SetString(PyExc_TypeError, "start_code is not a string"); return false; }
59     if (!PyUnicode_Check(dest->end_code)) { PyErr_SetString(PyExc_TypeError, "end_code is not a string"); return false; }
60 #undef A
61 #undef I
62     return true;
63 }
64 
65 static bool
next_segment(SegmentPointer * s,PyObject * highlights)66 next_segment(SegmentPointer *s, PyObject *highlights) {
67     if (s->pos < s->num) {
68         if (!convert_segment(PyList_GET_ITEM(highlights, s->pos), &s->sg)) return false;
69         s->pos++;
70     } else s->sg.current_pos = UINT_MAX;
71     return true;
72 }
73 
74 typedef struct LineBuffer {
75     Py_UCS4 *buf;
76     size_t pos, capacity;
77 } LineBuffer;
78 
79 
80 static bool
ensure_space(LineBuffer * b,size_t num)81 ensure_space(LineBuffer *b, size_t num) {
82     if (b->pos + num >= b->capacity) {
83         size_t new_cap = MAX(b->capacity * 2, 4096u);
84         new_cap = MAX(b->pos + num + 1024u, new_cap);
85         b->buf = realloc(b->buf, new_cap * sizeof(b->buf[0]));
86         if (!b->buf) { PyErr_NoMemory(); return false; }
87         b->capacity = new_cap;
88     }
89     return true;
90 }
91 
92 static bool
insert_code(PyObject * code,LineBuffer * b)93 insert_code(PyObject *code, LineBuffer *b) {
94     unsigned int csz = PyUnicode_GET_LENGTH(code);
95     if (!ensure_space(b, csz)) return false;
96     for (unsigned int s = 0; s < csz; s++) b->buf[b->pos++] = PyUnicode_READ(PyUnicode_KIND(code), PyUnicode_DATA(code), s);
97     return true;
98 }
99 
100 static bool
add_line(Segment * bg_segment,Segment * fg_segment,LineBuffer * b,PyObject * ans)101 add_line(Segment *bg_segment, Segment *fg_segment, LineBuffer *b, PyObject *ans) {
102     bool bg_is_active = bg_segment->current_pos == bg_segment->end_pos, fg_is_active = fg_segment->current_pos == fg_segment->end_pos;
103     if (bg_is_active) { if(!insert_code(bg_segment->end_code, b)) return false; }
104     if (fg_is_active) { if(!insert_code(fg_segment->end_code, b)) return false; }
105     PyObject *wl = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, b->buf, b->pos);
106     if (!wl) return false;
107     int ret = PyList_Append(ans, wl); Py_DECREF(wl); if (ret != 0) return false;
108     b->pos = 0;
109     if (bg_is_active) { if(!insert_code(bg_segment->start_code, b)) return false; }
110     if (fg_is_active) { if(!insert_code(fg_segment->start_code, b)) return false; }
111     return true;
112 }
113 
114 static LineBuffer b;
115 
116 static PyObject*
split_with_highlights(PyObject * self UNUSED,PyObject * args)117 split_with_highlights(PyObject *self UNUSED, PyObject *args) {
118     PyObject *line, *truncate_points_py, *fg_highlights, *bg_highlight;
119     if (!PyArg_ParseTuple(args, "UO!O!O", &line, &PyList_Type, &truncate_points_py, &PyList_Type, &fg_highlights, &bg_highlight)) return NULL;
120     PyObject *ans = PyList_New(0);
121     if (!ans) return NULL;
122     static unsigned int truncate_points[256];
123     unsigned int num_truncate_pts = PyList_GET_SIZE(truncate_points_py), truncate_pos = 0, truncate_point;
124     for (unsigned int i = 0; i < MIN(num_truncate_pts, arraysz(truncate_points)); i++) {
125         truncate_points[i] = PyLong_AsUnsignedLong(PyList_GET_ITEM(truncate_points_py, i));
126     }
127     SegmentPointer fg_segment = { .sg = EMPTY_SEGMENT, .num = PyList_GET_SIZE(fg_highlights)}, bg_segment = { .sg = EMPTY_SEGMENT };
128     if (bg_highlight != Py_None) { if (!convert_segment(bg_highlight, &bg_segment.sg)) { Py_CLEAR(ans); return NULL; }; bg_segment.num = 1; }
129 #define CHECK_CALL(func, ...) if (!func(__VA_ARGS__)) { Py_CLEAR(ans); if (!PyErr_Occurred()) PyErr_SetString(PyExc_ValueError, "unknown error while processing line"); return NULL; }
130     CHECK_CALL(next_segment, &fg_segment, fg_highlights);
131 
132 #define NEXT_TRUNCATE_POINT truncate_point = (truncate_pos < num_truncate_pts) ? truncate_points[truncate_pos++] : UINT_MAX
133     NEXT_TRUNCATE_POINT;
134 
135 #define INSERT_CODE(x) { CHECK_CALL(insert_code, x, &b); }
136 
137 #define ADD_LINE CHECK_CALL(add_line, &bg_segment.sg, &fg_segment.sg, &b, ans);
138 
139 #define ADD_CHAR(x) { \
140     if (!ensure_space(&b, 1)) { Py_CLEAR(ans); return NULL; } \
141     b.buf[b.pos++] = x; \
142 }
143 #define CHECK_SEGMENT(sgp, is_fg) { \
144     if (i == sgp.sg.current_pos) { \
145         INSERT_CODE(sgp.sg.current_pos == sgp.sg.start_pos ? sgp.sg.start_code : sgp.sg.end_code); \
146         if (sgp.sg.current_pos == sgp.sg.start_pos) sgp.sg.current_pos = sgp.sg.end_pos; \
147         else { \
148             if (is_fg) { \
149                 CHECK_CALL(next_segment, &fg_segment, fg_highlights); \
150                 if (sgp.sg.current_pos == i) { \
151                     INSERT_CODE(sgp.sg.start_code); \
152                     sgp.sg.current_pos = sgp.sg.end_pos; \
153                 } \
154             } else sgp.sg.current_pos = UINT_MAX; \
155         } \
156     }\
157 }
158 
159     const unsigned int line_sz = PyUnicode_GET_LENGTH(line);
160     b.pos = 0;
161     unsigned int i = 0;
162     for (; i < line_sz; i++) {
163         if (i == truncate_point) { ADD_LINE; NEXT_TRUNCATE_POINT; }
164         CHECK_SEGMENT(bg_segment, false);
165         CHECK_SEGMENT(fg_segment, true)
166         ADD_CHAR(PyUnicode_READ(PyUnicode_KIND(line), PyUnicode_DATA(line), i));
167     }
168     if (b.pos) ADD_LINE;
169     return ans;
170 #undef INSERT_CODE
171 #undef CHECK_SEGMENT
172 #undef CHECK_CALL
173 #undef ADD_CHAR
174 #undef ADD_LINE
175 #undef NEXT_TRUNCATE_POINT
176 }
177 
178 static void
free_resources(void)179 free_resources(void) {
180     free(b.buf); b.buf = NULL; b.capacity = 0; b.pos = 0;
181 }
182 
183 static PyMethodDef module_methods[] = {
184     {"changed_center", (PyCFunction)changed_center, METH_VARARGS, ""},
185     {"split_with_highlights", (PyCFunction)split_with_highlights, METH_VARARGS, ""},
186     {NULL, NULL, 0, NULL}        /* Sentinel */
187 };
188 
189 static struct PyModuleDef module = {
190    .m_base = PyModuleDef_HEAD_INIT,
191    .m_name = "diff_speedup",   /* name of module */
192    .m_doc = NULL,
193    .m_size = -1,
194    .m_methods = module_methods
195 };
196 
197 EXPORTED PyMODINIT_FUNC
PyInit_diff_speedup(void)198 PyInit_diff_speedup(void) {
199     PyObject *m;
200 
201     m = PyModule_Create(&module);
202     if (m == NULL) return NULL;
203     Py_AtExit(free_resources);
204     return m;
205 }
206