1## Copyright (C) 1995-2017 Kurt Hornik
2##
3## This program is free software: you can redistribute it and/or
4## modify it under the terms of the GNU General Public License as
5## published by the Free Software Foundation, either version 3 of the
6## License, or (at your option) any later version.
7##
8## This program is distributed in the hope that it will be useful, but
9## WITHOUT ANY WARRANTY; without even the implied warranty of
10## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11## General Public License for more details.
12##
13## You should have received a copy of the GNU General Public License
14## along with this program; see the file COPYING.  If not, see
15## <http://www.gnu.org/licenses/>.
16
17## -*- texinfo -*-
18## @deftypefn {} {[@var{pval}, @var{z}] =} z_test_2 (@var{x}, @var{y}, @var{v_x}, @var{v_y}, @var{alt})
19## For two samples @var{x} and @var{y} from normal distributions with unknown
20## means and known variances @var{v_x} and @var{v_y}, perform a Z-test of the
21## hypothesis of equal means.
22##
23## Under the null, the test statistic @var{z} follows a standard normal
24## distribution.
25##
26## With the optional argument string @var{alt}, the alternative of interest
27## can be selected.  If @var{alt} is @qcode{"!="} or @qcode{"<>"}, the null
28## is tested against the two-sided alternative
29## @code{mean (@var{x}) != mean (@var{y})}.  If alt is @qcode{">"}, the
30## one-sided alternative @code{mean (@var{x}) > mean (@var{y})} is used.
31## Similarly for @qcode{"<"}, the one-sided alternative
32## @code{mean (@var{x}) < mean (@var{y})} is used.  The default is the
33## two-sided case.
34##
35## The p-value of the test is returned in @var{pval}.
36##
37## If no output argument is given, the p-value of the test is displayed along
38## with some information.
39## @end deftypefn
40
41## Author: KH <Kurt.Hornik@wu-wien.ac.at>
42## Description: Compare means of two normal samples with known variances
43
44function [pval, z] = z_test_2 (x, y, v_x, v_y, alt)
45
46  if (nargin < 4 || nargin > 5)
47    print_usage ();
48  endif
49
50  if (! (isvector (x) && isvector (y)))
51    error ("z_test_2: both X and Y must be vectors");
52  elseif (! (isscalar (v_x) && (v_x > 0)
53             && isscalar (v_y) && (v_y > 0)))
54    error ("z_test_2: both V_X and V_Y must be positive scalars");
55  endif
56
57  n_x  = length (x);
58  n_y  = length (y);
59  mu_x = sum (x) / n_x;
60  mu_y = sum (y) / n_y;
61  z    = (mu_x - mu_y) / sqrt (v_x / n_x + v_y / n_y);
62  cdf  = stdnormal_cdf (z);
63
64  if (nargin == 4)
65    alt = "!=";
66  endif
67
68  if (! ischar (alt))
69    error ("z_test_2: ALT must be a string");
70  elseif (strcmp (alt, "!=") || strcmp (alt, "<>"))
71    pval = 2 * min (cdf, 1 - cdf);
72  elseif (strcmp (alt, ">"))
73    pval = 1 - cdf;
74  elseif (strcmp (alt, "<"))
75    pval = cdf;
76  else
77    error ("z_test_2: option %s not recognized", alt);
78  endif
79
80  if (nargout == 0)
81    s = ["Two-sample Z-test of mean(x) == mean(y) against ", ...
82         "mean(x) %s mean(y),\n",                            ...
83         "with known var(x) == %g and var(y) == %g:\n",      ...
84         "  pval = %g\n"];
85    printf (s, alt, v_x, v_y, pval);
86  endif
87
88endfunction
89
90%!test
91%! ## Two-sided (also the default option)
92%! x = randn (100, 1); v_x = 2; x = v_x * x;
93%! [pval, zval] = z_test_2 (x, x, v_x, v_x);
94%! zval_exp = 0; pval_exp = 1.0;
95%! assert (zval, zval_exp, eps);
96%! assert (pval, pval_exp, eps);
97
98%!test
99%! ## Two-sided (also the default option)
100%! x = randn (10000, 1); v_x = 2; x = v_x * x; n_x = length (x);
101%! y = randn (20000, 1); v_y = 3; y = v_y * y; n_y = length (y);
102%! [pval, z] = z_test_2 (x, y, v_x, v_y);
103%! if (mean (x) >= mean (y))
104%!   zval = abs (norminv (0.5*pval));
105%! else
106%!   zval = -abs (norminv (0.5*pval));
107%! endif
108%! unew = zval * sqrt (v_x/n_x + v_y/n_y);
109%! delmu = mean (x) - mean (y);
110%! assert (delmu, unew, 100*eps);
111
112%!test
113%! x = randn (100, 1); v_x = 2; x = v_x * x;
114%! [pval, zval] = z_test_2 (x, x, v_x, v_x, ">");
115%! zval_exp = 0; pval_exp = 0.5;
116%! assert (zval, zval_exp, eps);
117%! assert (pval, pval_exp, eps);
118
119%!test
120%! x = randn (10000, 1); v_x = 2; x = v_x * x; n_x = length (x);
121%! y = randn (20000, 1); v_y = 3; y = v_y * y; n_y = length (y);
122%! [pval, z] = z_test_2 (x, y, v_x, v_y, ">");
123%! zval = norminv (1-pval);
124%! unew = zval * sqrt (v_x/n_x + v_y/n_y);
125%! delmu = mean (x) - mean (y);
126%! assert (delmu, unew, 100*eps);
127
128%!test
129%! x = randn (100, 1); v_x = 2; x = v_x * x;
130%! [pval, zval] = z_test_2 (x, x, v_x, v_x, "<");
131%! zval_exp = 0; pval_exp = 0.5;
132%! assert (zval, zval_exp, eps);
133%! assert (pval, pval_exp, eps);
134
135%!test
136%! x = randn (10000, 1); v_x = 2; x = v_x * x; n_x = length (x);
137%! y = randn (20000, 1); v_y = 3; y = v_y * y; n_y = length (y);
138%! [pval, z] = z_test_2 (x, y, v_x, v_y, "<");
139%! zval = norminv (pval);
140%! unew = zval * sqrt (v_x/n_x + v_y/n_y);
141%! delmu = mean (x) - mean (y);
142%! assert (delmu, unew, 100*eps);
143