1function df = df_matassign(df, S, indc, ncol, RHS, trigger) 2 %# auxiliary function: assign the dataframe as if it was a matrix 3 4 %% Copyright (C) 2009-2017 Pascal Dupuis <cdemills@gmail.com> 5 %% 6 %% This file is part of the dataframe package for Octave. 7 %% 8 %% This package is free software; you can redistribute it and/or 9 %% modify it under the terms of the GNU General Public 10 %% License as published by the Free Software Foundation; 11 %% either version 2, or (at your option) any later version. 12 %% 13 %% This package is distributed in the hope that it will be useful, 14 %% but WITHOUT ANY WARRANTY; without even the implied 15 %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 16 %% PURPOSE. See the GNU General Public License for more 17 %% details. 18 %% 19 %% You should have received a copy of the GNU General Public 20 %% License along with this package; see the file COPYING. If not, 21 %% see <http://www.gnu.org/licenses/>. 22 23 try 24 NA = NA; 25 catch 26 NA = NaN; 27 end 28 29 if (isempty (RHS)) 30 if (1 == ncol) 31 if (sum (~strcmp (S.subs, ':')) > 2) 32 error('A null assignment can only have one non-colon index.'); 33 end 34 elseif (sum (~strcmp (S.subs, ':')) > 1) 35 error('A null assignment can only have one non-colon index.'); 36 end 37 38 if (strcmp (S.subs(1), ':')) %# removing column/matrix 39 RHS = S; RHS.subs(2) = []; 40 for indi = (indc) 41 unfolded = df.x_data{indi}(:, df.x_rep{indi}); 42 unfolded = feval (@subsasgn, unfolded, RHS, []); 43 df.x_data{indi} = unfolded; 44 if (~isempty (unfolded)) 45 df.x_rep(indi) = 1:size (unfolded, 2); 46 end 47 end 48 %# remove empty elements 49 indi = cellfun ('isempty', df.x_data); 50 if (any (indi)) %# nothing left, remove this column 51 df.x_cnt(2) = df.x_cnt(2) - sum (indi); 52 indi = ~indi; %# vector of kept data 53 df.x_name{2} = df.x_name{2}(indi); 54 df.x_over{2} = df.x_over{2}(indi); 55 df.x_type = df.x_type(indi); 56 df.x_data = df.x_data(indi); 57 df.x_rep = df.x_rep(indi); 58 end 59 if (size (df.x_ridx, 3) > 1) 60 df.x_ridx(:, indc, :) = []; 61 end 62 elseif (strcmp (S.subs(2), ':')) %# removing rows 63 indr = S.subs{1}; 64 if (~isempty (df.x_name{1})) 65 df.x_name{1}(indr, :) = []; 66 df.x_over{1}(indr) = []; 67 end 68 df.x_ridx(indr, :, :) = []; 69 %# to remove a line, iterate on each column 70 df.x_data = cellfun (@(x) feval(@subsasgn, x, S, []), ... 71 df.x_data, 'UniformOutPut', false); 72 if (isa (indr, 'char')) 73 df.x_cnt(1) = 0; 74 else 75 df.x_cnt(1) = df.x_cnt(1) - length (indr); 76 end 77 end 78 df = df_thirddim (df); 79 80 return; 81 end 82 83 %# char array are problematic, convert to cellstr 84 if (ischar (RHS) && ismatrix (RHS)) 85 RHS = cellstr (RHS); 86 end 87 88 indc_was_set = ~isempty (indc); 89 if (~indc_was_set) %# initial dataframe was empty 90 ncol = size (RHS, 2); indc = 1:ncol; 91 end 92 93 %# iscell(dataframe) returns true. Beware. 94 ispurecell = iscell (RHS) & ~isa (RHS, 'dataframe'); 95 96 indr = S.subs{1, 1}; 97 indr_was_set = ~isempty (indr); 98 %# initial dataframe was empty ? 99 if (~indr_was_set || strcmp (indr, ':')) 100 if (ispurecell) 101 nrow = max (sum (cellfun ('size', RHS, 1), 1)); 102 else 103 if (isvector (RHS)) 104 if (0 == df.x_cnt(1)) 105 nrow = size (RHS, 1); 106 else 107 nrow = df.x_cnt(1); %# limit to df numbner of rows 108 end 109 else 110 %# deduce limit from RHS 111 nrow = size (RHS, 1); 112 end 113 end 114 indr = 1:nrow; 115 elseif (~isempty (indr)) 116 if (~isnumeric (indr)) 117 %# translate row names to row index 118 [indr, nrow] = df_name2idx (df.x_name{1}, indr, df.x_cnt(1), 'row'); 119 S.subs{1, 1} = indr; 120 else 121 nrow = length (indr); 122 end 123 end 124 if (length (S.subs) > 2) 125 inds = S.subs{1, 3}; 126 else 127 inds = []; 128 end 129 130 rname = cell(0, 0); rname_width = max (1, size (df.x_name{2}, 2)); 131 ridx = []; cname = rname; ctype = rname; 132 133 if (ispurecell) 134 if ((length (indc) == df.x_cnt(2) && size (RHS, 2) >= df.x_cnt(2)) ... 135 || 0 == df.x_cnt(2) || isempty (S.subs{1}) || isempty (S.subs{2})) 136 %# providing too much information -- remove extra content 137 if (size (RHS, 1) > 1) 138 %# at this stage, verify that the first line doesn't contain 139 %# chars only; use them for column names 140 dummy = cellfun ('class', ... 141 RHS(1, ~cellfun ('isempty', RHS(1, :))), ... 142 'UniformOutput', false); 143 dummy = strcmp (dummy, 'char'); 144 if (all (dummy)) 145 if (length (df.x_over{2}) >= max (indc) ... 146 && ~all (df.x_over{2}(indc)) && ~isempty (S.subs{2})) 147 warning('Trying to overwrite colum names'); 148 end 149 150 cname = RHS(1, :).'; RHS = RHS(2:end, :); 151 if (~indr_was_set) 152 nrow = nrow - 1; indr = 1:nrow; 153 else 154 %# we know indr, there is no reason that RHS(:, 1) contains 155 %# row names. 156 if (isempty (S.subs{2})) 157 %# extract columns position from columns names 158 [indc, ncol, S.subs{2}, dummy] = ... 159 df_name2idx (df.x_name{2}, cname, df.x_cnt(2), 'column'); 160 if (length (dummy) ~= sum (dummy)) 161 warning ('Not all RHS column names used'); 162 cname = cname(dummy); RHS = RHS(:, dummy); 163 end 164 end 165 end 166 end 167 %# at this stage, verify that the first line doesn't contain 168 %# chars only; use them for column types 169 dummy = cellfun ('class', ... 170 RHS(1, ~cellfun ('isempty', RHS(1, :))), ... 171 'UniformOutput', false); 172 dummy = strcmp (dummy, 'char'); 173 if (all (dummy)) 174 if (length (df.x_over{2}) >= max (indc) ... 175 && ~all (df.x_over{2}(indc))) 176 warning ('Trying to overwrite colum names'); 177 end 178 179 if (sum (~cellfun ('isempty', RHS(1, indc))) == ncol) 180 ctype = RHS(1, :); 181 end 182 183 RHS = RHS(2:end, :); 184 if (~indr_was_set) 185 nrow = nrow - 1; indr = 1:nrow; 186 end 187 end 188 end 189 190 %# more elements than df width -- try to use the first two as 191 %# row index and/or row name 192 if (size (RHS, 1) > 1) 193 dummy = all (cellfun ('isnumeric', ... 194 RHS(~cellfun ('isempty', RHS(:, 1)), 1))); 195 else 196 if (0 == size (RHS, 1)) 197 dummy = false; 198 else 199 dummy = isnumeric (RHS{1, 1}); 200 end 201 end 202 dummy = dummy && (~isempty (cname) && size (cname{1}, 2) < 1); 203 if (dummy) 204 ridx = cell2mat (RHS(:, 1)); 205 %# can it be converted to a list of unique numbers ? 206 if (length (unique (ridx)) == length (ridx)) 207 ridx = RHS(:, 1); RHS = RHS(:, 2:end); 208 if (length (df.x_name{2}) == df.x_cnt(2) + ncol) 209 %# columns name were pre-filled with too much values 210 df.x_name{2}(end) = []; 211 df.x_over{2}(end) = []; 212 if (size (RHS, 2) < ncol) 213 ncol = size (RHS, 2); indc = 1:ncol; 214 end 215 elseif (~indc_was_set) 216 ncol = ncol - 1; indc = 1:ncol; 217 end 218 if (~isempty (cname)) cname = cname(2:end); end 219 if (~isempty (ctype)) ctype = ctype(2:end); end 220 else 221 ridx = []; 222 end 223 end 224 225 if (size (RHS, 2) > df.x_cnt(2)) 226 %# verify the the first row doesn't contain chars only, use them 227 %# for row names 228 dummy = cellfun ('class', ... 229 RHS(~cellfun ('isempty', RHS(:, 1)), 1), ... 230 'UniformOutput', false); 231 dummy = strcmp (dummy, 'char') ... 232 && (~isempty (cname) && size (cname{1}, 2) < 1); 233 if (all (dummy)) 234 if (length (df.x_over{1}) >= max (indr) ... 235 && ~all (df.x_over{1}(indr))) 236 warning('Trying to overwrite row names'); 237 else 238 rname = RHS(:, 1); 239 end 240 rname_width = max ([1; cellfun('size', rname, 2)]); 241 RHS = RHS(:, 2:end); 242 if (length (df.x_name{2}) == df.x_cnt(2) + ncol) 243 %# columns name were pre-filled with too much values 244 df.x_name{2}(end) = []; 245 df.x_over{2}(end) = []; 246 if (size (RHS, 2) < ncol) 247 ncol = size (RHS, 2); indc = 1:ncol; 248 end 249 elseif (~indc_was_set) 250 ncol = ncol - 1; indc = 1:ncol; 251 end 252 if (~isempty (cname)) cname = cname(2:end); end 253 if (~isempty (ctype)) ctype = ctype(2:end); end 254 end 255 end 256 end 257 end 258 259 %# perform row resizing if columns are already filled 260 if (~isempty (indr) && isnumeric(indr)) 261 if (max (indr) > df.x_cnt(1) && size (df.x_data, 2) == df.x_cnt(2)) 262 df = df_pad (df, 1, max (indr)-df.x_cnt(1), rname_width); 263 end 264 end 265 266 if (ispurecell) %# we must pad on a column-by-column basis 267 %# verify that each cell contains a non-empty vector, and that sizes 268 %# are compatible 269 %# dummy = cellfun ('size', RHS(:), 2); 270 %# if any (dummy < 1), 271 %# error('cells content may not be empty'); 272 %# end 273 274 %# dummy = cellfun ('size', RHS, 1); 275 %# if any (dummy < 1), 276 %# error('cells content may not be empty'); 277 %# end 278 %# if any (diff(dummy) > 0), 279 %# error('cells content with unequal length'); 280 %# end 281 %# if 1 < size (RHS, 1) && any (dummy > 1), 282 %# error('cells may only contain scalar'); 283 %# end 284 285 if (size (RHS, 2) > indc) 286 if (size (cname, 1) > indc) 287 ncol = size (RHS, 2); indc = 1:ncol; 288 else 289 if (debug_on_error ()) keyboard; end 290 end 291 end 292 293 %# try to detect and remove bottom garbage 294 eff_len = zeros (nrow, 1); 295 if (size (RHS, 1) > 1) 296 for indi = (indr) 297 eff_len(indi, 1) = sum (~cellfun ('isempty', RHS(indi, :))); 298 end 299 indi = nrow; 300 while (indi > 0) 301 if (eff_len(indi) < 1) 302 nrow = nrow - 1; 303 indr(end) = []; 304 RHS(end, :) = []; 305 indi = indi - 1; 306 if (~indr_was_set && isempty (df.x_name{1, 1})) 307 df.x_cnt(1) = nrow; 308 df.x_ridx(end) = []; 309 end 310 else 311 break; 312 end 313 end 314 clear eff_len; 315 end 316 317 %# the real assignement 318 if (1 == size (RHS, 1)) %# each cell contains one vector 319 extractfunc = @(x) RHS{x}; 320 idxOK = logical(indr); 321 else %# use cell2mat to pad on a column-by-column basis 322 extractfunc = @(x) cell2mat (RHS(:, x)); 323 end 324 325 indj = 1; S.subs(2) = []; 326 if (length (S.subs) < 2) 327 S.subs{2} = 1; 328 end 329 for indi = (1:ncol) 330 if (indc(indi) > df.x_cnt(2)) 331 %# perform dynamic resizing one-by-one, to get type right 332 if (isempty (ctype) || length (ctype) < indc(indi)) 333 df = df_pad (df, 2, indc(indi)-df.x_cnt(2), class (RHS{1, indj})); 334 else 335 df = df_pad (df, 2, indc(indi)-df.x_cnt(2), ctype{indj}); 336 end 337 end 338 if (max (inds) > length (df.x_rep{indc(indi)})) 339 df = df_pad (df, 3, max (inds)-length (df.x_rep{indc(indi)}), ... 340 indc(indi)); 341 end 342 if (nrow == df.x_cnt(1)) 343 %# whole assignement 344 try 345 if (size (RHS, 1) <= 1) 346 switch df.x_type{indc(indi)} 347 case {'char'} %# use a cell array to hold strings 348 dummy = cellfun (@num2str, RHS(:, indj), ... 349 'UniformOutput', false); 350 case {'double'} 351 dummy = extractfunc (indj); 352 otherwise 353 dummy = cast (extractfunc (indj), df.x_type{indc(indi)}); 354 end 355 else 356 %# keeps indexes in sync as cell elements may be empty 357 idxOK = ~cellfun ('isempty', RHS(:, indj)); 358 %# intialise dummy so that it can receive 'anything' 359 dummy = []; 360 switch (df.x_type{indc(indi)}) 361 case {'char'} %# use a cell array to hold strings 362 dummy = cellfun (@num2str, RHS(:, indj, :), ... 363 'UniformOutput', false); 364 case {'double'} 365 dummy(idxOK, :) = extractfunc (indj); dummy(~idxOK, :) = NA; 366 otherwise 367 dummy(idxOK, :) = extractfunc (indj); dummy(~idxOK, :) = NA; 368 dummy = cast(dummy, df.x_type{indc(indi)}); 369 end 370 end 371 catch 372 fprintf (2, 'Something went wrong while converting colum %d\n', indj); 373 fprintf (2, 'Error was: %s\n', lasterr ()); 374 keyboard; 375 dummy = unique (cellfun (@class, RHS(:, indj), ... 376 'UniformOutput', false)); 377 if (any (strmatch ('char', dummy, 'exact'))) 378 fprintf (2, 'Downclassing to char\n'); 379 %# replace the actual column, of type numeric, by a char 380 df.x_type{indc(indi)} = 'char'; 381 dummy = RHS(:, indj); 382 for indk = (size (dummy, 1):-1:1) 383 if (~isa ('char', dummy{indk})) 384 if (isinteger (dummy{indk})) 385 dummy(indk) = mat2str (dummy{indk}); 386 elseif (isa ('logical', dummy{indk})) 387 if (dummy{indk}) 388 dummy(indk) = 'true'; 389 else 390 dummy{indk} = 'false'; 391 end 392 elseif (isnumeric (dummy{indk})) 393 dummy(indk) = mat2str (dummy{indk}, 6); 394 end 395 end 396 end 397 else 398 dummy = ... 399 sprintf ('Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s', ... 400 indj, df.x_type{indc(indi)}, length (indr), disp (RHS(:, indj))); 401 keyboard 402 error (dummy); 403 end 404 if (debug_on_error ()) keyboard; end 405 end 406 if (size (dummy, 1) < df.x_cnt(1)) 407 dummy(end+1:df.x_cnt(1), :) = NA; 408 end 409 else 410 %# partial assignement -- extract actual data and update 411 dummy = df.x_data{indc(indi)}; 412 if (size (RHS, 1) > 0) 413 %# pad content 414 try 415 switch (df.x_type{indc(indi)}) 416 case {'char'} %# use a cell array to hold strings 417 dummy(indr, 1) = cellfun(@num2str, RHS(:, indj), ... 418 'UniformOutput', false); 419 case {'double'} 420 dummy(indr, :) = extractfunc (indj); 421 otherwise 422 dummy(indr, :) = cast(extractfunc (indj), df.x_type{indc(indi)}); 423 end 424 catch 425 dummy = ... 426 sprintf ('Assignement failed for colum %d, of type %s and length %d,\nwith new content\n%s', ... 427 indj, df.x_type{indc(indi)}, length (indr), disp (RHS(:, indj))); 428 error (dummy); 429 end 430 end 431 end 432 [df, S] = df_cow (df, S, indc(indi)); 433 if (isempty (inds)) 434 df.x_data{indc(indi)} = dummy; 435 df.x_rep{indc(indi)} = 1:size (dummy, 2); 436 else 437 fillfunc = @(x, S, y) feval (@subsasgn, x, S, dummy); 438 try 439 df.x_data{indc(indi)} = fillfunc (df.x_data{indc(indi)}, S, indi); 440 catch 441 disp (lasterr ()); disp ('line 439'); keyboard 442 end 443 end 444 445 %# df.x_rep{indc(indi)} = 1:size (dummy, 2); 446 indj = indj + 1; 447 end 448 449 else 450 %# RHS is either a numeric, either a df 451 if (any (indc > min (size (df.x_data, 2), df.x_cnt(2)))) 452 df = df_pad (df, 2, max (indc-min (size (df.x_data, 2), df.x_cnt(2))), ... 453 class(RHS)); 454 end 455 if (~isempty (inds) && isnumeric(inds) && any (inds > 1)) 456 for indi = (1:ncol) 457 if (max (inds) > length (df.x_rep{indc(indi)})) 458 df = df_pad (df, 3, max (inds)-length (df.x_rep{indc(indi)}), ... 459 indc(indi)); 460 end 461 end 462 end 463 464 if (isa (RHS, 'dataframe')) 465 %# block-copy index 466 S.subs(2) = 1; 467 if (any (~isna(RHS.x_ridx))) 468 df.x_ridx = feval (@subsasgn, df.x_ridx, S, RHS.x_ridx); 469 end 470 %# skip second dim and copy data 471 S.subs(2) = []; Sorig = S; 472 for indi = (1:ncol) 473 [df, S] = df_cow (df, S, indc(indi)); 474 if (strcmp (df.x_type(indc(indi)), RHS.x_type(indi))) 475 try 476 df.x_data{indc(indi)} = feval (@subsasgn, df.x_data{indc(indi)}, S, ... 477 RHS.x_data{indi}(:, RHS.x_rep{indi})); 478 catch 479 disp (lasterr ()); disp('line 445 ???'); keyboard 480 end 481 else 482 df.x_data{indc(indi)} = feval (@subsasgn, df.x_data{indc(indi)}, S, ... 483 cast (RHS.x_data{indi}(:, RHS.x_rep{indi}),... 484 df.x_type(indc(indi)))); 485 end 486 S = Sorig; 487 end 488 if (~isempty (RHS.x_name{1})) 489 df.x_name{1}(indr) = genvarname(RHS.x_name{1}(indr)); 490 df.x_over{1}(indr) = RHS.x_over{1}(indr); 491 end 492 if (~isempty (RHS.x_src)) 493 if (~any (strcmp (cellstr(df.x_src), cellstr(RHS.x_src)))) 494 df.x_src = vertcat(df.x_src, RHS.x_src); 495 end 496 end 497 if (~isempty (RHS.x_cmt)) 498 if (~any (strcmp (cellstr(df.x_cmt), cellstr(RHS.x_cmt)))) 499 df.x_cmt = vertcat(df.x_cmt, RHS.x_cmt); 500 end 501 end 502 503 else 504 %# RHS is homogenous, pad at once 505 if (isvector (RHS)) %# scalar - vector 506 if (isempty (S.subs)) 507 fillfunc = @(x, y) RHS; 508 else 509 %# ignore 'column' dimension -- force colum vectors -- use a 510 %# third dim just in case 511 if (isempty (S.subs{1})) S.subs{1} = ':'; end 512 S.subs(2) = []; 513 if (length (S.subs) < 2) 514 S.subs{2} = 1; 515 end 516 if (ncol > 1 && length (RHS) > 1) 517 %# set a row from a vector 518 fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS(y)); 519 else 520 fillfunc = @(x, S, y) feval (@subsasgn, x, S, RHS); 521 end 522 end 523 Sorig = S; 524 for indi = (1:ncol) 525 try 526 lasterr(''); 527 dummy= 'df_cow'; 528 [df, S] = df_cow (df, S, indc(indi)); 529 dummy = 'fillfunc'; 530 df.x_data{indc(indi)} = fillfunc (df.x_data{indc(indi)}, S, indi); 531 S = Sorig; 532 catch 533 disp (lasterr ()); disp ('line 499 '); keyboard 534 end 535 %# catch 536 %# if ndims(df.x_data{indc(indi)}) > 2, 537 %# %# upstream forgot to give the third dim 538 %# dummy = S; dummy.subs(3) = 1; 539 %# df.x_data{indc(indi)} = fillfunc(df.x_data{indc(indi)}, \ 540 %# dummy, indi); 541 %# else 542 %# rethrow(lasterr()); 543 %# end 544 %# end 545 end 546 else %# 2D - 3D matrix 547 S.subs(2) = []; %# ignore 'column' dimension 548 if (isempty (S.subs{1})) 549 S.subs{1} = indr; 550 end 551 %# rotate slices in dim 1-3 to slices in dim 1-2 552 fillfunc = @(x, S, y) feval (@subsasgn, x, S, squeeze(RHS(:, y, :))); 553 Sorig = S; 554 for indi = (1:ncol) 555 [df, S] = df_cow (df, S, indc(indi)); 556 df.x_data{indc(indi)} = fillfunc (df.x_data{indc(indi)}, S, indi); 557 S = Sorig; 558 end 559 end 560 if (indi < size (RHS, 2) && ~isa (RHS, 'char')) 561 warning (' not all columns of RHS used'); 562 end 563 end 564 end 565 566 %# delayed row padding -- column padding occured before 567 if (~isempty (indr) && isnumeric (indr)) 568 if (max (indr) > df.x_cnt(1) && size (df.x_data, 2) < df.x_cnt(2)) 569 df = df_pad (df, 1, max (indr)-df.x_cnt(1), rname_width); 570 end 571 end 572 573 %# adjust ridx and rnames, if required 574 if (~isempty (ridx)) 575 dummy = df.x_ridx; 576 if (1 == size (RHS, 1)) 577 dummy(indr) = ridx{1}; 578 else 579 dummy(indr) = vertcat(ridx{indr}); 580 end 581 if (length (unique (dummy)) ~= length (dummy)) %# || ... 582 %# any (diff(dummy) <= 0), 583 error('row indexes are not unique or not ordered'); 584 end 585 df.x_ridx = dummy; 586 end 587 588 if (~isempty (rname) && (length (df.x_over{1}) < max (indr) || ... 589 all (df.x_over{1}(indr)))) 590 df.x_name{1}(indr, 1) = genvarname(rname); 591 df.x_over{1}(1, indr) = false; 592 end 593 if (~isempty (cname) && (length (df.x_over{2}) < max (indc) || ... 594 all (df.x_over{2}(indc)))) 595 if (length (cname) < ncol) 596 cname(end+1:ncol) = {'_'}; 597 end 598 cname(cellfun (@isempty, cname)) = 'unnamed'; 599 try 600 df.x_name{2}(indc, 1) = genvarname (cname); 601 catch 602 %# there was a problem with genvarname. 603 dummy = sum (~cellfun ('isempty', cname)); 604 if (1 == dummy) 605 dummy = strsplit(cname{1}, ' ', true); 606 if (length (dummy) == ncol) 607 df.x_name{2}(indc, 1) = dummy; 608 else 609 disp ('line 575 '); keyboard 610 end 611 else 612 disp ('line 578 '); keyboard 613 end 614 end 615 df.x_over{2}(1, indc) = false; 616 end 617 618 df = df_thirddim (df); 619 620 end 621