1function [R,CC]=xval(D,classlabel,MODE,arg4) 2% XVAL is used for crossvalidation 3% 4% [R,CC] = xval(D,classlabel) 5% .. = xval(D,classlabel,CLASSIFIER) 6% .. = xval(D,classlabel,CLASSIFIER,type) 7% .. = xval(D,{classlabel,W},CLASSIFIER) 8% .. = xval(D,{classlabel,W,NG},CLASSIFIER) 9% 10% example: 11% load_fisheriris; %builtin iris dataset 12% C = species; 13% K = 5; NG = [1:length(C)]'*K/length(C); 14% [R,CC] = xval(meas,{C,[],NG},'NBC'); 15% 16% Input: 17% D: data features (one feature per column, one sample per row) 18% classlabel labels of each sample, must have the same number of rows as D. 19% Two different encodings are supported: 20% {-1,1}-encoding (multiple classes with separate columns for each class) or 21% 1..M encoding. 22% So [1;2;3;1;4] is equivalent to 23% [+1,-1,-1,-1; 24% [-1,+1,-1,-1; 25% [-1,-1,+1,-1; 26% [+1,-1,-1,-1] 27% [-1,-1,-1,+1] 28% Note, samples with classlabel=0 are ignored. 29% 30% CLASSIFIER can be any classifier supported by train_sc (default='LDA') 31% {'REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf', 'RDA','GDBC', 32% 'SVM','RBF','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3','WINNOW'} 33% these can be modified by ###/GSVD, ###/sparse and ###/DELETION. 34% /DELETION removes in case of NaN's either the rows or the columns (which removes less data values) with any NaN 35% /sparse and /GSVD preprocess the data an reduce it to some lower-dimensional space. 36% Hyperparameters (like alpha for PLA, gamma/lambda for RDA, c_value for SVM, etc) can be defined as 37% CLASSIFIER.hyperparameter.alpha, etc. and 38% CLASSIFIER.TYPE = 'PLA' (as listed above). 39% See train_sc for details. 40% W: weights for each sample (row) in D. 41% default: [] (i.e. all weights are 1) 42% number of elements in W must match the number of rows of D 43% NG: used to define the type of cross-valdiation 44% Leave-One-Out-Method (LOOM): NG = [1:length(classlabel)]' (default) 45% Leave-K-Out-Method: NG = ceil([1:length(classlabel)]'/K) 46% K-fold XV: NG = ceil([1:length(classlabel)]'*K/length(classlabel)) 47% group-wise XV (if samples are not indepentent) can be also defined here 48% samples from the same group (dependent samples) get the same identifier 49% samples from different groups get different classifiers 50% TYPE: defines the type of cross-validation procedure if NG is not specified 51% 'LOOM' leave-one-out-method 52% k k-fold crossvalidation 53% 54% OUTPUT: 55% R contains the resulting performance metric 56% CC contains the classifier 57% 58% plota(R) shows the confusion matrix of the results 59% 60% see also: TRAIN_SC, TEST_SC, CLASSIFY, PLOTA 61% 62% References: 63% [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed. 64% John Wiley & Sons, 2001. 65% [2] A. Schlögl, J. Kronegg, J.E. Huggins, S. G. Mason; 66% Evaluation criteria in BCI research. 67% (Eds.) G. Dornhege, J.R. Millan, T. Hinterberger, D.J. McFarland, K.-R.Müller; 68% Towards Brain-Computer Interfacing, MIT Press, 2007, p.327-342 69 70% $Id$ 71% Copyright (C) 2008,2009,2010 by Alois Schloegl <alois.schloegl@gmail.com> 72% This function is part of the NaN-toolbox 73% http://pub.ist.ac.at/~schloegl/matlab/NaN/ 74 75% This program is free software; you can redistribute it and/or 76% modify it under the terms of the GNU General Public License 77% as published by the Free Software Foundation; either version 3 78% of the License, or (at your option) any later version. 79% 80% This program is distributed in the hope that it will be useful, 81% but WITHOUT ANY WARRANTY; without even the implied warranty of 82% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 83% GNU General Public License for more details. 84% 85% You should have received a copy of the GNU General Public License 86% along with this program; if not, write to the Free Software 87% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. 88 89if (nargin<3) || isempty(MODE), 90 MODE = 'LDA'; 91end; 92if ischar(MODE) 93 tmp = MODE; 94 clear MODE; 95 MODE.TYPE = tmp; 96elseif ~isfield(MODE,'TYPE') 97 MODE.TYPE=''; 98end; 99 100sz = size(D); 101NG = []; 102W = []; 103 104if iscell(classlabel) 105 % hack to handle NaN's in unique(...) 106 c = classlabel{:,1}; 107 ix = find(~isnan(c)); 108 C = c; 109 [b,i,C(ix)] = unique(c(ix)); 110 if size(classlabel,2)>1, 111 W = [classlabel{:,2}]; 112 end; 113 if size(classlabel,2)>2, 114 [Label,tmp1,NG] = unique(classlabel{:,3}); 115 end; 116elseif size(classlabel,2)>1, 117 %% group-wise classvalidation 118 C = classlabel(:,1); 119 W = classlabel(:,2); 120 if size(classlabel,2)==2, 121 warning('This option defines W and NG in an ambigous way - use instead xval(D,{C,[],NG},...) or xval(D,{C,W},...)'); 122 else 123 [Label,tmp1,NG] = unique(classlabel(:,3)); 124 end; 125else 126 C = classlabel; 127end; 128if all(W==1), W = []; end; 129if sz(1)~=size(C,1), 130 error('length of data and classlabel does not fit'); 131end; 132 133 134if isempty(NG) 135if (nargin<4) || strcmpi(arg4,'LOOM') 136 %% LOOM 137 NG = (1:sz(1))'; 138 139elseif isnumeric(arg4) 140 if isscalar(arg4) 141 % K-fold XV 142 NG = ceil((1:length(C))'*arg4/length(C)); 143 elseif length(arg4)==2, 144 NG = ceil((1:length(C))'*arg4(1)/length(C)); 145 end; 146 147end; 148end; 149 150sz = size(D); 151if sz(1)~=length(C), 152 error('length of data and classlabel does not fit'); 153end; 154if ~isfield(MODE,'hyperparameter') 155 MODE.hyperparameter = []; 156end 157 158cl = repmat(NaN,size(classlabel,1),1); 159output = repmat(NaN,size(classlabel,1),max(C)); 160for k = 1:max(NG), 161 ix = find(~any(isnan(C),2) & (NG~=k)); 162 if isempty(W), 163 CC = train_sc(D(ix,:), C(ix), MODE); 164 else 165 CC = train_sc(D(ix,:), C(ix), MODE, W(ix)); 166 end; 167 ix = find(NG==k); 168 r = test_sc(CC, D(ix,:)); 169 cl(ix,1) = r.classlabel; 170 output(ix,:) = r.output; 171end; 172 173%R = kappa(C,cl,'notIgnoreNAN',W); 174R = kappa(C,cl,[],W); 175%R2 = kappa(R.H); 176 177R.OUTPUT=output; 178R.CL=cl; 179R.ERR = 1-R.ACC; 180if isnumeric(R.Label) 181 R.Label = cellstr(int2str(R.Label)); 182end; 183 184if nargout>1, 185 % final classifier 186 ix = find(~any(isnan(C),2)); 187 if isempty(W), 188 CC = train_sc(D(ix,:), C(ix), MODE); 189 else 190 CC = train_sc(D(ix,:), C(ix), MODE,W); 191 end; 192 CC.Labels = 1:max(C); 193 %CC.Labels = unique(C); 194end; 195