1function [idx, nelem, subs, mask] = df_name2idx(names, subs, count, dimname, missingOK);
2
3  %# This is a helper routine to translate rownames or columnames into
4  %# real index. Input: names, a char array, and subs, a cell array as
5  %# produced by subsref and similar. This routine can also detect
6  %# ranges, two values separated by ':'. On output, subs is
7  %# 'sanitised' from names, and is either a vector, either a single ':'
8
9  %% Copyright (C) 2009-2017 Pascal Dupuis <cdemills@gmail.com>
10  %%
11  %% This file is part of the dataframe package for Octave.
12  %%
13  %% This package is free software; you can redistribute it and/or
14  %% modify it under the terms of the GNU General Public
15  %% License as published by the Free Software Foundation;
16  %% either version 2, or (at your option) any later version.
17  %%
18  %% This package is distributed in the hope that it will be useful,
19  %% but WITHOUT ANY WARRANTY; without even the implied
20  %% warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
21  %% PURPOSE.  See the GNU General Public License for more
22  %% details.
23  %%
24  %% You should have received a copy of the GNU General Public
25  %% License along with this package; see the file COPYING.  If not,
26  %% see <http://www.gnu.org/licenses/>.
27
28  if (nargin < 5) missingOK = false; end
29
30  %# regexp idea of 'word boundary' changed between 3.6 and 3.7
31  persistent wbs wbe;
32
33  if (isempty (wbs))
34    if (isempty ( regexp ('This is a test', '\<is\>')))
35       [wbs, wbe] = deal ('\b');
36    else
37      wbs = '\<'; wbe = '\>';
38    end
39  end
40
41  if (isempty (subs))
42    %# not caring about rownames ? Avoid generating an error.
43    idx = []; nelem = 0; return
44  end
45
46  if (~isa (dimname, 'char'))
47    switch dimname
48      case 1
49        dimname = 'row';
50      case 2
51        dimname = 'column';
52      case 3
53        dimname = 'page';
54      otherwise
55        error ('Unknown dimension %d', dimname);
56    end
57  end
58
59  if (isa (subs, 'char')),
60    orig_name = subs;
61    if (1 == size (subs, 1))
62      if (strcmp(subs, ':')) %# range operator
63        idx = 1:count; nelem = count;
64        return
65      end
66    end
67    subs = cellstr (subs);
68  else
69    if (~isvector(subs))
70      %# yes/no ?
71      %# error('Trying to access column as a matrix');
72    end
73    switch (class (subs))
74      case {'cell'}
75        orig_name = char (subs);
76      case {'dataframe'}
77        orig_name = 'elements indexed by a dataframe';
78      otherwise
79        orig_name = num2str (subs);
80    end
81  end
82
83  if (isa (subs, 'cell'))
84    subs = subs(:); idx = []; mask = logical (zeros (size (subs, 1), 1));
85    %# translate list of variables to list of indices
86    for indi = (1:size (subs, 1))
87      %# regexp doesn't like empty patterns
88      if (isempty (subs{indi})) continue; end
89      %# convert  from standard pattern to regexp pattern
90      subs{indi} = regexprep (subs{indi}, '([^\.\\])(\*|\?)', '$1.$2');
91      %# quote repetition ops at begining of line, otherwise the regexp
92      %# will stall forever/fail
93      subs{indi} = regexprep (subs{indi}, ...
94                              '^([\*\+\?\{\}\|])', '\\$1');
95      %# detect | followed by EOL
96      subs{indi} = regexprep (subs{indi}, '([^\\])\|$', '$1\\|');
97      if (0 == index (subs{indi}, ':'))
98         %# if there's no special operator, make match strict
99        if (isempty (regexp (subs{indi}, '[\.\*\+\?\{\}\(\)\[\]\^\$\\]')))
100          subs{indi}  = [wbs subs{indi} wbe];
101        end
102        for indj = (1:min (length (names), count)) %# sanity check
103          if (~isempty (regexp (names{indj}, subs{indi})))
104            idx = [idx indj]; mask(indi) = true; dummy = true;
105          end
106        end
107      else
108        dummy = strsplit (subs{indi}, ':');
109        ind_start = 1;
110        if (~isempty (dummy{1}))
111          ind_start = sscanf (dummy{1}, '%d');
112          if (isempty (ind_start))
113            ind_start = 1;
114            for indj = (1:min(length (names), count)) %# sanity check
115              if (~isempty (regexp (names{indj}, subs{indi}))),
116                ind_start = indj; break; %# stop at the first match
117              end
118            end
119          end
120        end
121
122        if (isempty (dummy{2}) || strcmp (dummy{2}, 'end'))
123          ind_stop = count;
124        else
125          ind_stop = sscanf(dummy{2}, '%d');
126          if (isempty (ind_stop))
127            ind_stop = 1;
128            for indj = (min (length (names), count):-1:1) %# sanity check
129              if (~isempty (regexp (names{indj}, subs{indi})))
130                ind_stop = indj; break; %# stop at the last match
131              end
132            end
133          end
134        end
135        idx = [idx ind_start:ind_stop];
136      end
137    end
138    if (isempty (idx) && ~missingOK)
139      dummy = sprintf ('Unknown %s name while searching for %s', ...
140                       dimname, orig_name);
141      error (dummy);
142    end
143  elseif (isa (subs, 'logical'))
144    idx = 1:length (subs(:)); idx = reshape (idx, size (subs));
145    idx(~subs) = []; mask = subs;
146  elseif (isa (subs, 'dataframe'))
147    idx = subsindex (subs, 1);
148  else
149    idx = subs;
150  end
151
152  subs = idx;
153  nelem = length (idx);
154
155end
156