1function df = df_matassign(df, S, indc, ncol, RHS, trigger)
2  %# auxiliary function: assign the dataframe as if it was a matrix
3
4  %% Copyright (C) 2009-2017 Pascal Dupuis <cdemills@gmail.com>
5  %%
6  %% This file is part of the dataframe package for Octave.
7  %%
8  %% This package is free software; you can redistribute it and/or
9  %% modify it under the terms of the GNU General Public
10  %% License as published by the Free Software Foundation;
11  %% either version 2, or (at your option) any later version.
12  %%
13  %% This package is distributed in the hope that it will be useful,
14  %% but WITHOUT ANY WARRANTY; without even the implied
15  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
16  %% PURPOSE.  See the GNU General Public License for more
17  %% details.
18  %%
19  %% You should have received a copy of the GNU General Public
20  %% License along with this package; see the file COPYING.  If not,
21  %% see <http://www.gnu.org/licenses/>.
22
23  try
24    NA = NA;
25  catch
26    NA = NaN;
27  end
28
29  if (isempty (RHS))
30    if (1 == ncol)
31      if (sum (~strcmp (S.subs, ':')) > 2)
32        error('A null assignment can only have one non-colon index.');
33      end
34    elseif (sum (~strcmp (S.subs, ':')) > 1)
35      error('A null assignment can only have one non-colon index.');
36    end
37
38    if (strcmp (S.subs(1), ':'))  %# removing column/matrix
39      RHS = S; RHS.subs(2) = [];
40      for indi = (indc)
41        unfolded  = df.x_data{indi}(:, df.x_rep{indi});
42        unfolded  = feval (@subsasgn, unfolded, RHS, []);
43        df.x_data{indi} = unfolded;
44        if (~isempty (unfolded))
45          df.x_rep(indi) = 1:size (unfolded, 2);
46        end
47      end
48      %# remove empty elements
49      indi = cellfun ('isempty', df.x_data);
50      if (any (indi)) %# nothing left, remove this column
51        df.x_cnt(2) = df.x_cnt(2) - sum (indi);
52        indi = ~indi; %# vector of kept data
53        df.x_name{2} = df.x_name{2}(indi);
54        df.x_over{2} = df.x_over{2}(indi);
55        df.x_type = df.x_type(indi);
56        df.x_data = df.x_data(indi);
57        df.x_rep = df.x_rep(indi);
58      end
59      if (size (df.x_ridx, 3) > 1)
60        df.x_ridx(:, indc, :) = [];
61      end
62    elseif (strcmp (S.subs(2), ':'))  %# removing rows
63      indr = S.subs{1};
64      if (~isempty (df.x_name{1}))
65        df.x_name{1}(indr, :) = [];
66        df.x_over{1}(indr) = [];
67      end
68      df.x_ridx(indr, :, :) = [];
69      %# to remove a line, iterate on each column
70      df.x_data = cellfun (@(x) feval(@subsasgn, x, S, []), ...
71                          df.x_data, 'UniformOutPut', false);
72      if (isa (indr, 'char'))
73        df.x_cnt(1) = 0;
74      else
75        df.x_cnt(1) = df.x_cnt(1) - length (indr);
76      end
77    end
78    df = df_thirddim (df);
79
80    return;
81  end
82
83  %# char array are problematic, convert to cellstr
84  if (ischar (RHS) && ismatrix (RHS))
85    RHS = cellstr (RHS);
86  end
87
88  indc_was_set = ~isempty (indc);
89  if (~indc_was_set) %# initial dataframe was empty
90    ncol = size (RHS, 2); indc = 1:ncol;
91  end
92
93  %# iscell(dataframe) returns true. Beware.
94  ispurecell = iscell (RHS) & ~isa (RHS, 'dataframe');
95
96  indr = S.subs{1, 1};
97  indr_was_set = ~isempty (indr);
98  %# initial dataframe was empty ?
99  if (~indr_was_set || strcmp (indr, ':'))
100    if (ispurecell)
101      nrow = max (sum (cellfun ('size', RHS, 1), 1));
102    else
103      if (isvector (RHS))
104        if (0 == df.x_cnt(1))
105          nrow = size (RHS, 1);
106        else
107          nrow = df.x_cnt(1);  %# limit to df numbner of rows
108        end
109      else
110        %# deduce limit from RHS
111        nrow = size (RHS, 1);
112      end
113    end
114    indr = 1:nrow;
115  elseif (~isempty (indr))
116    if (~isnumeric (indr))
117      %# translate row names to row index
118      [indr, nrow] = df_name2idx (df.x_name{1}, indr, df.x_cnt(1), 'row');
119      S.subs{1, 1} = indr;
120    else
121      nrow = length (indr);
122    end
123  end
124  if (length (S.subs) > 2)
125    inds = S.subs{1, 3};
126  else
127    inds = [];
128  end
129
130  rname = cell(0, 0); rname_width = max (1, size (df.x_name{2}, 2));
131  ridx = []; cname = rname; ctype = rname;
132
133  if (ispurecell)
134    if ((length (indc) == df.x_cnt(2) && size (RHS, 2) >=  df.x_cnt(2)) ...
135        || 0 == df.x_cnt(2) || isempty (S.subs{1}) || isempty (S.subs{2}))
136      %# providing too much information -- remove extra content
137      if (size (RHS, 1) > 1)
138        %# at this stage, verify that the first line doesn't contain
139        %# chars only; use them for column names
140        dummy = cellfun ('class', ...
141                         RHS(1, ~cellfun ('isempty', RHS(1, :))), ...
142                         'UniformOutput', false);
143        dummy = strcmp (dummy, 'char');
144        if (all (dummy))
145          if (length (df.x_over{2}) >= max (indc) ...
146              && ~all (df.x_over{2}(indc)) && ~isempty (S.subs{2}))
147            warning('Trying to overwrite colum names');
148          end
149
150          cname = RHS(1, :).'; RHS = RHS(2:end, :);
151          if (~indr_was_set)
152            nrow = nrow - 1; indr = 1:nrow;
153          else
154            %# we know indr, there is no reason that RHS(:, 1) contains
155            %# row names.
156            if (isempty (S.subs{2}))
157              %# extract columns position from columns names
158              [indc, ncol,  S.subs{2}, dummy] = ...
159                  df_name2idx (df.x_name{2}, cname, df.x_cnt(2), 'column');
160              if (length (dummy) ~= sum (dummy))
161                warning ('Not all RHS column names used');
162                cname = cname(dummy); RHS = RHS(:, dummy);
163              end
164            end
165          end
166        end
167        %# at this stage, verify that the first line doesn't contain
168        %# chars only; use them for column types
169        dummy = cellfun ('class', ...
170                         RHS(1, ~cellfun ('isempty', RHS(1, :))), ...
171                         'UniformOutput', false);
172        dummy = strcmp (dummy, 'char');
173        if (all (dummy))
174          if (length (df.x_over{2}) >= max (indc) ...
175              && ~all (df.x_over{2}(indc)))
176            warning ('Trying to overwrite colum names');
177          end
178
179          if (sum (~cellfun ('isempty', RHS(1, indc))) == ncol)
180            ctype = RHS(1, :);
181          end
182
183          RHS = RHS(2:end, :);
184          if (~indr_was_set)
185            nrow = nrow - 1; indr = 1:nrow;
186          end
187        end
188      end
189
190      %# more elements than df width -- try to use the first two as
191      %# row index and/or row name
192      if (size (RHS, 1) > 1)
193        dummy = all (cellfun ('isnumeric', ...
194                              RHS(~cellfun ('isempty', RHS(:, 1)), 1)));
195      else
196         if  (0 == size (RHS, 1))
197           dummy = false;
198         else
199           dummy =  isnumeric (RHS{1, 1});
200         end
201      end
202      dummy = dummy && (~isempty (cname) && size (cname{1}, 2) < 1);
203      if (dummy)
204        ridx = cell2mat (RHS(:, 1));
205        %# can it be converted to a list of unique numbers ?
206        if (length (unique (ridx)) == length (ridx))
207          ridx = RHS(:, 1); RHS = RHS(:, 2:end);
208          if (length (df.x_name{2}) == df.x_cnt(2) + ncol)
209            %# columns name were pre-filled with too much values
210            df.x_name{2}(end) = [];
211            df.x_over{2}(end) = [];
212            if (size (RHS, 2) < ncol)
213              ncol = size (RHS, 2); indc = 1:ncol;
214            end
215          elseif (~indc_was_set)
216            ncol = ncol - 1;  indc = 1:ncol;
217          end
218          if (~isempty (cname)) cname = cname(2:end); end
219          if (~isempty (ctype)) ctype = ctype(2:end); end
220        else
221          ridx = [];
222        end
223      end
224
225      if (size (RHS, 2) >  df.x_cnt(2))
226        %# verify the the first row doesn't contain chars only, use them
227        %# for row names
228        dummy = cellfun ('class', ...
229                         RHS(~cellfun ('isempty', RHS(:, 1)), 1), ...
230                         'UniformOutput', false);
231        dummy = strcmp (dummy, 'char') ...
232            && (~isempty (cname) && size (cname{1}, 2) < 1);
233        if (all (dummy))
234          if (length (df.x_over{1}) >= max (indr) ...
235              && ~all (df.x_over{1}(indr)))
236            warning('Trying to overwrite row names');
237          else
238            rname = RHS(:, 1);
239          end
240          rname_width = max ([1; cellfun('size', rname, 2)]);
241          RHS = RHS(:, 2:end);
242          if (length (df.x_name{2}) == df.x_cnt(2) + ncol)
243            %# columns name were pre-filled with too much values
244            df.x_name{2}(end) = [];
245            df.x_over{2}(end) = [];
246            if (size (RHS, 2) < ncol)
247              ncol = size (RHS, 2); indc = 1:ncol;
248            end
249          elseif (~indc_was_set)
250            ncol = ncol - 1;  indc = 1:ncol;
251          end
252          if (~isempty (cname)) cname = cname(2:end); end
253          if (~isempty (ctype)) ctype = ctype(2:end); end
254        end
255      end
256    end
257  end
258
259  %# perform row resizing if columns are already filled
260  if (~isempty (indr) && isnumeric(indr))
261    if (max (indr) > df.x_cnt(1) && size (df.x_data, 2) == df.x_cnt(2))
262      df = df_pad (df, 1, max (indr)-df.x_cnt(1), rname_width);
263    end
264  end
265
266  if (ispurecell) %# we must pad on a column-by-column basis
267    %# verify that each cell contains a non-empty vector, and that sizes
268    %# are compatible
269    %# dummy = cellfun ('size', RHS(:), 2);
270    %# if any (dummy < 1),
271    %#   error('cells content may not be empty');
272    %# end
273
274    %# dummy = cellfun ('size', RHS, 1);
275    %# if any (dummy < 1),
276    %#   error('cells content may not be empty');
277    %# end
278    %# if any (diff(dummy) > 0),
279    %#   error('cells content with unequal length');
280    %# end
281    %# if 1 < size (RHS, 1) && any (dummy > 1),
282    %#   error('cells may only contain scalar');
283    %# end
284
285    if (size (RHS, 2) > indc)
286      if (size (cname, 1) > indc)
287        ncol = size (RHS, 2); indc = 1:ncol;
288      else
289        if (debug_on_error ()) keyboard; end
290      end
291    end
292
293    %# try to detect and remove bottom garbage
294    eff_len = zeros (nrow, 1);
295    if (size (RHS, 1) > 1)
296      for indi = (indr)
297        eff_len(indi, 1) = sum (~cellfun ('isempty', RHS(indi, :)));
298      end
299      indi = nrow;
300      while (indi > 0)
301        if (eff_len(indi) < 1)
302          nrow = nrow - 1;
303          indr(end) = [];
304          RHS(end, :) = [];
305          indi = indi - 1;
306          if (~indr_was_set && isempty (df.x_name{1, 1}))
307            df.x_cnt(1) = nrow;
308            df.x_ridx(end) = [];
309          end
310        else
311          break;
312        end
313      end
314      clear eff_len;
315    end
316
317    %# the real assignement
318    if (1 == size (RHS, 1)) %# each cell contains one vector
319      extractfunc = @(x) RHS{x};
320      idxOK = logical(indr);
321    else %# use cell2mat to pad on a column-by-column basis
322      extractfunc = @(x) cell2mat (RHS(:, x));
323    end
324
325    indj = 1; S.subs(2) = [];
326    if (length (S.subs) < 2)
327      S.subs{2} = 1;
328    end
329    for indi = (1:ncol)
330      if (indc(indi) > df.x_cnt(2))
331        %# perform dynamic resizing one-by-one, to get type right
332        if (isempty (ctype) || length (ctype) < indc(indi))
333          df = df_pad (df, 2, indc(indi)-df.x_cnt(2), class (RHS{1, indj}));
334        else
335          df = df_pad (df, 2, indc(indi)-df.x_cnt(2), ctype{indj});
336        end
337      end
338      if (max (inds) > length (df.x_rep{indc(indi)}))
339        df = df_pad (df, 3, max (inds)-length (df.x_rep{indc(indi)}), ...
340                     indc(indi));
341      end
342      if (nrow == df.x_cnt(1))
343        %# whole assignement
344        try
345          if (size (RHS, 1) <= 1)
346            switch df.x_type{indc(indi)}
347              case {'char'} %# use a cell array to hold strings
348                dummy = cellfun (@num2str, RHS(:, indj), ...
349                                 'UniformOutput', false);
350              case {'double'}
351                dummy = extractfunc (indj);
352              otherwise
353                dummy = cast (extractfunc (indj), df.x_type{indc(indi)});
354            end
355          else
356            %# keeps indexes in sync as cell elements may be empty
357            idxOK = ~cellfun ('isempty', RHS(:, indj));
358            %# intialise dummy so that it can receive 'anything'
359            dummy = [];
360            switch (df.x_type{indc(indi)})
361              case {'char'} %# use a cell array to hold strings
362                dummy = cellfun (@num2str, RHS(:, indj, :), ...
363                                 'UniformOutput', false);
364              case {'double'}
365                dummy(idxOK, :) = extractfunc (indj); dummy(~idxOK, :) = NA;
366              otherwise
367                dummy(idxOK, :) = extractfunc (indj); dummy(~idxOK, :) = NA;
368                dummy = cast(dummy, df.x_type{indc(indi)});
369            end
370          end
371        catch
372          fprintf (2, 'Something went wrong while converting colum %d\n', indj);
373          fprintf (2, 'Error was: %s\n', lasterr ());
374          keyboard;
375          dummy =  unique (cellfun (@class, RHS(:, indj), ...
376                                    'UniformOutput', false));
377          if (any (strmatch ('char', dummy, 'exact')))
378            fprintf (2, 'Downclassing to char\n');
379            %# replace the actual column, of type numeric, by a char
380            df.x_type{indc(indi)} = 'char';
381            dummy = RHS(:, indj);
382            for indk =  (size (dummy, 1):-1:1)
383              if (~isa ('char', dummy{indk}))
384                if (isinteger (dummy{indk}))
385                  dummy(indk) = mat2str (dummy{indk});
386                elseif (isa ('logical', dummy{indk}))
387                  if  (dummy{indk})
388                    dummy(indk) = 'true';
389                  else
390                    dummy{indk} = 'false';
391                  end
392                elseif (isnumeric (dummy{indk}))
393                  dummy(indk) = mat2str (dummy{indk}, 6);
394                end
395              end
396            end
397          else
398            dummy = ...
399                sprintf ('Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s', ...
400                         indj, df.x_type{indc(indi)}, length (indr), disp (RHS(:, indj)));
401            keyboard
402            error (dummy);
403          end
404          if (debug_on_error ()) keyboard; end
405        end
406        if (size (dummy, 1) < df.x_cnt(1))
407          dummy(end+1:df.x_cnt(1), :) = NA;
408        end
409      else
410        %# partial assignement -- extract actual data and update
411        dummy = df.x_data{indc(indi)};
412        if (size (RHS, 1) > 0)
413           %# pad content
414          try
415            switch (df.x_type{indc(indi)})
416              case {'char'} %# use a cell array to hold strings
417                dummy(indr, 1) = cellfun(@num2str, RHS(:, indj), ...
418                                         'UniformOutput', false);
419              case {'double'}
420                dummy(indr, :) = extractfunc (indj);
421              otherwise
422                dummy(indr, :) = cast(extractfunc (indj), df.x_type{indc(indi)});
423            end
424          catch
425            dummy = ...
426            sprintf ('Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s', ...
427                     indj, df.x_type{indc(indi)}, length (indr), disp (RHS(:, indj)));
428            error (dummy);
429          end
430        end
431      end
432      [df, S] = df_cow (df, S, indc(indi));
433      if (isempty (inds))
434        df.x_data{indc(indi)} = dummy;
435        df.x_rep{indc(indi)} = 1:size (dummy, 2);
436      else
437        fillfunc = @(x, S, y) feval (@subsasgn, x, S, dummy);
438        try
439          df.x_data{indc(indi)} = fillfunc (df.x_data{indc(indi)}, S, indi);
440        catch
441          disp (lasterr ()); disp ('line 439'); keyboard
442        end
443      end
444
445      %# df.x_rep{indc(indi)} = 1:size (dummy, 2);
446      indj = indj + 1;
447    end
448
449  else
450    %# RHS is either a numeric, either a df
451    if (any (indc > min (size (df.x_data, 2), df.x_cnt(2))))
452      df = df_pad (df, 2, max (indc-min (size (df.x_data, 2), df.x_cnt(2))), ...
453                   class(RHS));
454    end
455    if (~isempty (inds) && isnumeric(inds) && any (inds > 1))
456      for indi = (1:ncol)
457        if (max (inds) > length (df.x_rep{indc(indi)}))
458          df = df_pad (df, 3, max (inds)-length (df.x_rep{indc(indi)}), ...
459                       indc(indi));
460        end
461      end
462    end
463
464    if (isa (RHS, 'dataframe'))
465      %# block-copy index
466      S.subs(2) = 1;
467      if (any (~isna(RHS.x_ridx)))
468        df.x_ridx = feval (@subsasgn,  df.x_ridx, S,  RHS.x_ridx);
469      end
470      %# skip second dim and copy data
471      S.subs(2) = []; Sorig = S;
472      for indi = (1:ncol)
473        [df, S] = df_cow (df, S, indc(indi));
474        if (strcmp (df.x_type(indc(indi)), RHS.x_type(indi)))
475          try
476            df.x_data{indc(indi)} = feval (@subsasgn, df.x_data{indc(indi)}, S, ...
477                                          RHS.x_data{indi}(:, RHS.x_rep{indi}));
478          catch
479            disp (lasterr ()); disp('line 445 ???'); keyboard
480          end
481        else
482          df.x_data{indc(indi)} = feval (@subsasgn, df.x_data{indc(indi)}, S, ...
483                                        cast (RHS.x_data{indi}(:, RHS.x_rep{indi}),...
484                                            df.x_type(indc(indi))));
485        end
486        S = Sorig;
487      end
488      if (~isempty (RHS.x_name{1}))
489        df.x_name{1}(indr) = genvarname(RHS.x_name{1}(indr));
490        df.x_over{1}(indr) = RHS.x_over{1}(indr);
491      end
492      if (~isempty (RHS.x_src))
493        if (~any (strcmp (cellstr(df.x_src), cellstr(RHS.x_src))))
494          df.x_src = vertcat(df.x_src, RHS.x_src);
495        end
496      end
497      if (~isempty (RHS.x_cmt))
498        if (~any (strcmp (cellstr(df.x_cmt), cellstr(RHS.x_cmt))))
499          df.x_cmt = vertcat(df.x_cmt, RHS.x_cmt);
500        end
501      end
502
503    else
504      %# RHS is homogenous, pad at once
505      if (isvector (RHS)) %# scalar - vector
506        if (isempty (S.subs))
507          fillfunc = @(x, y) RHS;
508        else
509          %# ignore 'column' dimension -- force colum vectors -- use a
510          %# third dim just in case
511          if (isempty (S.subs{1})) S.subs{1} = ':'; end
512          S.subs(2) = [];
513          if (length (S.subs) < 2)
514            S.subs{2} = 1;
515          end
516          if (ncol > 1 && length (RHS) > 1)
517            %# set a row from a vector
518            fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS(y));
519          else
520            fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS);
521          end
522        end
523        Sorig = S;
524        for indi = (1:ncol)
525          try
526            lasterr('');
527            dummy= 'df_cow';
528            [df, S] = df_cow (df, S, indc(indi));
529            dummy = 'fillfunc';
530            df.x_data{indc(indi)} = fillfunc (df.x_data{indc(indi)}, S, indi);
531            S = Sorig;
532          catch
533            disp (lasterr  ()); disp ('line 499 '); keyboard
534          end
535          %# catch
536          %#   if ndims(df.x_data{indc(indi)}) > 2,
537          %#     %# upstream forgot to give the third dim
538          %#     dummy = S; dummy.subs(3) = 1;
539          %#     df.x_data{indc(indi)} = fillfunc(df.x_data{indc(indi)}, \
540          %#                                   dummy, indi);
541          %#   else
542          %#     rethrow(lasterr());
543          %#   end
544          %# end
545        end
546      else %# 2D - 3D matrix
547        S.subs(2) = []; %# ignore 'column' dimension
548        if (isempty (S.subs{1}))
549          S.subs{1} = indr;
550        end
551        %# rotate slices in dim 1-3 to slices in dim 1-2
552        fillfunc = @(x, S, y) feval (@subsasgn, x, S, squeeze(RHS(:, y, :)));
553        Sorig = S;
554        for indi = (1:ncol)
555          [df, S] = df_cow (df, S, indc(indi));
556          df.x_data{indc(indi)} = fillfunc (df.x_data{indc(indi)}, S, indi);
557          S = Sorig;
558        end
559      end
560      if (indi < size (RHS, 2) && ~isa (RHS, 'char'))
561        warning (' not all columns of RHS used');
562      end
563    end
564  end
565
566  %# delayed row padding -- column padding occured before
567  if (~isempty (indr) && isnumeric (indr))
568    if (max (indr) > df.x_cnt(1) && size (df.x_data, 2) < df.x_cnt(2))
569      df = df_pad (df, 1, max (indr)-df.x_cnt(1), rname_width);
570    end
571  end
572
573  %# adjust ridx and rnames, if required
574  if (~isempty (ridx))
575    dummy = df.x_ridx;
576    if (1 == size (RHS, 1))
577      dummy(indr) = ridx{1};
578    else
579      dummy(indr) = vertcat(ridx{indr});
580    end
581    if (length (unique (dummy)) ~= length (dummy)) %# || ...
582          %# any (diff(dummy) <= 0),
583      error('row indexes are not unique or not ordered');
584    end
585    df.x_ridx = dummy;
586  end
587
588  if (~isempty (rname) && (length (df.x_over{1}) < max (indr) || ...
589        all (df.x_over{1}(indr))))
590    df.x_name{1}(indr, 1) = genvarname(rname);
591    df.x_over{1}(1, indr) = false;
592  end
593  if (~isempty (cname) && (length (df.x_over{2}) < max (indc) || ...
594        all (df.x_over{2}(indc))))
595    if (length (cname) < ncol)
596      cname(end+1:ncol) = {'_'};
597    end
598    cname(cellfun (@isempty, cname)) = 'unnamed';
599    try
600      df.x_name{2}(indc, 1) = genvarname (cname);
601    catch
602      %# there was a problem with genvarname.
603      dummy = sum (~cellfun ('isempty', cname));
604      if (1 == dummy)
605        dummy =  strsplit(cname{1}, ' ', true);
606        if (length (dummy) == ncol)
607          df.x_name{2}(indc, 1) = dummy;
608        else
609          disp ('line 575 '); keyboard
610        end
611      else
612        disp ('line 578 '); keyboard
613      end
614    end
615    df.x_over{2}(1, indc) = false;
616  end
617
618  df = df_thirddim (df);
619
620  end
621