1 /*
2  * lu_file.c
3  *
4  * Copyright (C) 2016-2018  ERGO-Code
5  *
6  * Data file implementation
7  *
8  * A data file stores lines of (index,value) pairs. Entries of each line are
9  * contiguous in memory. Lines can be in any order in memory and there can be
10  * gaps between consecutive lines.
11  *
12  * The data file implementation uses arrays
13  *
14  *  index[0:fmem-1], value[0:fmem-1],
15  *  begin[0:nlines], end[0:nlines],
16  *  next[0:nlines], prev[0:nlines]
17  *
18  *  index, value    storing (index,value) pairs
19  *  begin[k]        pointer to first element in line 0 <= k < nlines,
20  *  end[k]          pointer to one past the last element in line k.
21  *  begin[nlines]   pointer to the first element of unused space
22  *  end[nlines]     holds fmem
23  *
24  *  next, prev hold the line numbers (0..nlines-1) in a double linked list in
25  *  the order in which they appear in memory. That is, for line 0 <= k < nlines,
26  *  next[k] and prev[k] are the line which comes before respectively after line
27  *  k in memory. next[nlines] and prev[nlines] are the first respectively last
28  *  line in memory order.
29  *
30  * Methods:
31  *
32  *  lu_file_empty
33  *  lu_file_reappend
34  *  lu_file_compress
35  *  lu_file_diff
36  *
37  */
38 
39 #include "lu_def.h"
40 #include "lu_list.h"
41 #include "lu_file.h"
42 
43 /* ==========================================================================
44  * lu_file_empty
45  *
46  * Initialize empty file with @fmem memory space.
47  * ========================================================================== */
48 
lu_file_empty(lu_int nlines,lu_int * begin,lu_int * end,lu_int * next,lu_int * prev,lu_int fmem)49 void lu_file_empty(
50     lu_int nlines, lu_int *begin, lu_int *end, lu_int *next, lu_int *prev,
51     lu_int fmem)
52 {
53     lu_int i;
54 
55     begin[nlines] = 0;
56     end[nlines] = fmem;
57     for (i = 0; i < nlines; i++)
58         begin[i] = end[i] = 0;
59     for (i = 0; i < nlines; i++)
60     {
61         next[i] = i+1;
62         prev[i+1] = i;
63     }
64     next[nlines] = 0;
65     prev[0] = nlines;
66 }
67 
68 
69 /* ==========================================================================
70  * lu_file_reappend
71  *
72  * Reappend line to file end and add @extra_space elements room. The file must
73  * have at least length(line) + @extra_space elements free space.
74  * ========================================================================== */
75 
lu_file_reappend(lu_int line,lu_int nlines,lu_int * begin,lu_int * end,lu_int * next,lu_int * prev,lu_int * index,double * value,lu_int extra_space)76 void lu_file_reappend(
77     lu_int line, lu_int nlines, lu_int *begin, lu_int *end, lu_int *next,
78     lu_int *prev, lu_int *index, double *value, lu_int extra_space)
79 {
80     lu_int fmem, used, room, ibeg, iend, pos;
81 
82     fmem = end[nlines];
83     used = begin[nlines];
84     room = fmem-used;
85     ibeg = begin[line];         /* old beginning of line */
86     iend = end[line];
87     begin[line] = used;         /* new beginning of line */
88     assert(iend-ibeg <= room);
89     for (pos = ibeg; pos < iend; pos++)
90     {
91         index[used] = index[pos];
92         value[used++] = value[pos];
93     }
94     end[line] = used;
95     room = fmem-used;
96     assert(room >= extra_space);
97     used += extra_space;
98     begin[nlines] = used;       /* beginning of unused space */
99     lu_list_move(line, 0, next, prev, nlines, NULL);
100 }
101 
102 
103 /* ==========================================================================
104  * lu_file_compress
105  *
106  * Compress file to reuse memory gaps. The ordering of lines in the file is
107  * unchanged. To each line with nz entries add @stretch*nz+@pad elements extra
108  * space. Chop extra space if it would overlap the following line in memory.
109  *
110  * Return: number of entries in file
111  * ========================================================================== */
112 
lu_file_compress(lu_int nlines,lu_int * begin,lu_int * end,const lu_int * next,lu_int * index,double * value,double stretch,lu_int pad)113 lu_int lu_file_compress(
114     lu_int nlines, lu_int *begin, lu_int *end, const lu_int *next,
115     lu_int *index, double *value, double stretch, lu_int pad)
116 {
117     lu_int i, pos, ibeg, iend, used, extra_space, nz = 0;
118 
119     used = 0; extra_space = 0;
120     for (i = next[nlines]; i < nlines; i = next[i]) /* move line i */
121     {
122         ibeg = begin[i];
123         iend = end[i];
124         assert(ibeg >= used);
125         used += extra_space;
126         if (used > ibeg)
127             used = ibeg;        /* chop extra space added before */
128         begin[i] = used;
129         for (pos = ibeg; pos < iend; pos++)
130         {
131             index[used] = index[pos];
132             value[used++] = value[pos];
133         }
134         end[i] = used;
135         extra_space = stretch*(iend-ibeg) + pad;
136         nz += iend-ibeg;
137     }
138     assert(used <= begin[nlines]);
139     used += extra_space;
140     if (used > begin[nlines])
141         used = begin[nlines];   /* never use more space than before */
142     begin[nlines] = used;
143     return nz;
144 }
145 
146 
147 /* ==========================================================================
148  * lu_file_diff (for debugging)
149  *
150  * @begin_row, @end_row, @begin_col, @end_col are pointer into @index, @value,
151  * defining lines of the "row file" and the "column file".
152  *
153  * Task:
154  *
155  *  val == NULL: count row file entries that are missing in column file.
156  *  val != NULL: count row file entries that are missing in column file
157  *               or which values are different.
158  *
159  * The method does a column file search for each row file entry. To check
160  * consistency of rowwise and columnwise storage, the method must be called
161  * twice with row pointers and column pointers swapped.
162  * ========================================================================== */
163 
lu_file_diff(lu_int nrow,const lu_int * begin_row,const lu_int * end_row,const lu_int * begin_col,const lu_int * end_col,const lu_int * index,const double * value)164 lu_int lu_file_diff(
165     lu_int nrow, const lu_int *begin_row, const lu_int *end_row,
166     const lu_int *begin_col, const lu_int *end_col, const lu_int *index,
167     const double *value)
168 {
169     lu_int i, j, pos, where, ndiff = 0;
170 
171     for (i = 0; i < nrow; i++)
172     {
173         for (pos = begin_row[i]; pos < end_row[i]; pos++)
174         {
175             j = index[pos];
176             for (where = begin_col[j]; where < end_col[j] &&
177                      index[where] != i; where++)
178                 ;
179             if (where == end_col[j] || (value && value[pos] != value[where]))
180                 ndiff++;
181         }
182     }
183     return ndiff;
184 }
185