1function [R,CC]=xval(D,classlabel,MODE,arg4)
2% XVAL is used for crossvalidation
3%
4%  [R,CC] = xval(D,classlabel)
5%  .. = xval(D,classlabel,CLASSIFIER)
6%  .. = xval(D,classlabel,CLASSIFIER,type)
7%  .. = xval(D,{classlabel,W},CLASSIFIER)
8%  .. = xval(D,{classlabel,W,NG},CLASSIFIER)
9%
10%  example:
11%      load_fisheriris;    %builtin iris dataset
12%      C = species;
13%      K = 5; NG = [1:length(C)]'*K/length(C);
14%      [R,CC] = xval(meas,{C,[],NG},'NBC');
15%
16% Input:
17%    D:	data features (one feature per column, one sample per row)
18%    classlabel	labels of each sample, must have the same number of rows as D.
19% 		Two different encodings are supported:
20%		{-1,1}-encoding (multiple classes with separate columns for each class) or
21%		1..M encoding.
22% 		So [1;2;3;1;4] is equivalent to
23%			[+1,-1,-1,-1;
24%			[-1,+1,-1,-1;
25%			[-1,-1,+1,-1;
26%			[+1,-1,-1,-1]
27%			[-1,-1,-1,+1]
28%		Note, samples with classlabel=0 are ignored.
29%
30%    CLASSIFIER can be any classifier supported by train_sc (default='LDA')
31%       {'REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf', 'RDA','GDBC',
32%	 'SVM','RBF','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3','WINNOW'}
33%       these can be modified by ###/GSVD, ###/sparse and ###/DELETION.
34%	   /DELETION removes in case of NaN's either the rows or the columns (which removes less data values) with any NaN
35%	   /sparse and /GSVD preprocess the data an reduce it to some lower-dimensional space.
36%       Hyperparameters (like alpha for PLA, gamma/lambda for RDA, c_value for SVM, etc) can be defined as
37% 	CLASSIFIER.hyperparameter.alpha, etc. and
38% 	CLASSIFIER.TYPE = 'PLA' (as listed above).
39%       See train_sc for details.
40%    W:	weights for each sample (row) in D.
41%	default: [] (i.e. all weights are 1)
42%	number of elements in W must match the number of rows of D
43%    NG: used to define the type of cross-valdiation
44% 	Leave-One-Out-Method (LOOM): NG = [1:length(classlabel)]' (default)
45% 	Leave-K-Out-Method: NG = ceil([1:length(classlabel)]'/K)
46%	K-fold XV:  NG = ceil([1:length(classlabel)]'*K/length(classlabel))
47%	group-wise XV (if samples are not indepentent) can be also defined here
48%	samples from the same group (dependent samples) get the same identifier
49%	samples from different groups get different classifiers
50%    TYPE:  defines the type of cross-validation procedure if NG is not specified
51%	'LOOM'  leave-one-out-method
52%       k	k-fold crossvalidation
53%
54% OUTPUT:
55%    R contains the resulting performance metric
56%    CC contains the classifier
57%
58%    plota(R) shows the confusion matrix of the results
59%
60% see also: TRAIN_SC, TEST_SC, CLASSIFY, PLOTA
61%
62% References:
63% [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed.
64%       John Wiley & Sons, 2001.
65% [2] A. Schlögl, J. Kronegg, J.E. Huggins, S. G. Mason;
66%       Evaluation criteria in BCI research.
67%       (Eds.) G. Dornhege, J.R. Millan, T. Hinterberger, D.J. McFarland, K.-R.Müller;
68%       Towards Brain-Computer Interfacing, MIT Press, 2007, p.327-342
69
70%	$Id$
71%	Copyright (C) 2008,2009,2010 by Alois Schloegl <alois.schloegl@gmail.com>
72%       This function is part of the NaN-toolbox
73%       http://pub.ist.ac.at/~schloegl/matlab/NaN/
74
75% This program is free software; you can redistribute it and/or
76% modify it under the terms of the GNU General Public License
77% as published by the Free Software Foundation; either version 3
78% of the  License, or (at your option) any later version.
79%
80% This program is distributed in the hope that it will be useful,
81% but WITHOUT ANY WARRANTY; without even the implied warranty of
82% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
83% GNU General Public License for more details.
84%
85% You should have received a copy of the GNU General Public License
86% along with this program; if not, write to the Free Software
87% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
88
89if (nargin<3) || isempty(MODE),
90	MODE = 'LDA';
91end;
92if ischar(MODE)
93        tmp = MODE;
94        clear MODE;
95        MODE.TYPE = tmp;
96elseif ~isfield(MODE,'TYPE')
97        MODE.TYPE='';
98end;
99
100sz = size(D);
101NG = [];
102W = [];
103
104if iscell(classlabel)
105        % hack to handle NaN's in unique(...)
106        c  = classlabel{:,1};
107        ix = find(~isnan(c));
108        C  = c;
109        [b,i,C(ix)] = unique(c(ix));
110        if size(classlabel,2)>1,
111                W = [classlabel{:,2}];
112        end;
113	if size(classlabel,2)>2,
114		[Label,tmp1,NG] = unique(classlabel{:,3});
115	end;
116elseif size(classlabel,2)>1,
117	%% group-wise classvalidation
118	C = classlabel(:,1);
119	W = classlabel(:,2);
120	if size(classlabel,2)==2,
121	        warning('This option defines W and NG in an ambigous way - use instead xval(D,{C,[],NG},...) or xval(D,{C,W},...)');
122	else
123		[Label,tmp1,NG] = unique(classlabel(:,3));
124	end;
125else
126	C = classlabel;
127end;
128if all(W==1), W = []; end;
129if sz(1)~=size(C,1),
130        error('length of data and classlabel does not fit');
131end;
132
133
134if isempty(NG)
135if (nargin<4) || strcmpi(arg4,'LOOM')
136	%% LOOM
137	NG = (1:sz(1))';
138
139elseif isnumeric(arg4)
140	if isscalar(arg4)
141	% K-fold XV
142		NG = ceil((1:length(C))'*arg4/length(C));
143	elseif length(arg4)==2,
144		NG = ceil((1:length(C))'*arg4(1)/length(C));
145	end;
146
147end;
148end;
149
150sz = size(D);
151if sz(1)~=length(C),
152        error('length of data and classlabel does not fit');
153end;
154if ~isfield(MODE,'hyperparameter')
155        MODE.hyperparameter = [];
156end
157
158cl     = repmat(NaN,size(classlabel,1),1);
159output = repmat(NaN,size(classlabel,1),max(C));
160for k = 1:max(NG),
161	ix = find(~any(isnan(C),2) & (NG~=k));
162	if isempty(W),
163		CC = train_sc(D(ix,:), C(ix), MODE);
164	else
165		CC = train_sc(D(ix,:), C(ix), MODE, W(ix));
166	end;
167	ix = find(NG==k);
168	r  = test_sc(CC, D(ix,:));
169	cl(ix,1)     = r.classlabel;
170	output(ix,:) = r.output;
171end;
172
173%R = kappa(C,cl,'notIgnoreNAN',W);
174R = kappa(C,cl,[],W);
175%R2 = kappa(R.H);
176
177R.OUTPUT=output;
178R.CL=cl;
179R.ERR = 1-R.ACC;
180if isnumeric(R.Label)
181	R.Label = cellstr(int2str(R.Label));
182end;
183
184if nargout>1,
185	% final classifier
186	ix = find(~any(isnan(C),2));
187	if isempty(W),
188		CC = train_sc(D(ix,:), C(ix), MODE);
189	else
190		CC = train_sc(D(ix,:), C(ix), MODE,W);
191	end;
192	CC.Labels = 1:max(C);
193	%CC.Labels = unique(C);
194end;
195