1 /*
2   PSPP - a program for statistical analysis.
3   Copyright (C) 2012, 2013, 2015, 2019 Free Software Foundation, Inc.
4 
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation, either version 3 of the License, or
8   (at your option) any later version.
9 
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14 
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 
19 /*
20  * This module implements the graph command
21  */
22 
23 #include <config.h>
24 
25 #include <math.h>
26 #include "gl/xalloc.h"
27 #include <gsl/gsl_cdf.h>
28 
29 #include "libpspp/assertion.h"
30 #include "libpspp/message.h"
31 #include "libpspp/pool.h"
32 
33 
34 #include "data/dataset.h"
35 #include "data/dictionary.h"
36 #include "data/casegrouper.h"
37 #include "data/casereader.h"
38 #include "data/casewriter.h"
39 #include "data/caseproto.h"
40 #include "data/subcase.h"
41 
42 
43 #include "data/format.h"
44 
45 #include "math/chart-geometry.h"
46 #include "math/histogram.h"
47 #include "math/moments.h"
48 #include "math/sort.h"
49 #include "math/order-stats.h"
50 #include "output/charts/plot-hist.h"
51 #include "output/charts/scatterplot.h"
52 #include "output/charts/barchart.h"
53 
54 #include "language/command.h"
55 #include "language/lexer/lexer.h"
56 #include "language/lexer/value-parser.h"
57 #include "language/lexer/variable-parser.h"
58 #include "language/stats/freq.h"
59 #include "language/stats/chart-category.h"
60 
61 #include "gettext.h"
62 #define _(msgid) gettext (msgid)
63 #define N_(msgid) msgid
64 
65 enum chart_type
66   {
67     CT_NONE,
68     CT_BAR,
69     CT_LINE,
70     CT_PIE,
71     CT_ERRORBAR,
72     CT_HILO,
73     CT_HISTOGRAM,
74     CT_SCATTERPLOT,
75     CT_PARETO
76   };
77 
78 enum scatter_type
79   {
80     ST_BIVARIATE,
81     ST_OVERLAY,
82     ST_MATRIX,
83     ST_XYZ
84   };
85 
86 enum  bar_type
87   {
88     CBT_SIMPLE,
89     CBT_GROUPED,
90     CBT_STACKED,
91     CBT_RANGE
92   };
93 
94 
95 /* Variable index for histogram case */
96 enum
97   {
98     HG_IDX_X,
99     HG_IDX_WT
100   };
101 
102 struct exploratory_stats
103 {
104   double missing;
105   double non_missing;
106 
107   struct moments *mom;
108 
109   double minimum;
110   double maximum;
111 
112   /* Total weight */
113   double cc;
114 
115   /* The minimum weight */
116   double cmin;
117 };
118 
119 
120 struct graph
121 {
122   struct pool *pool;
123 
124   size_t n_dep_vars;
125   const struct variable **dep_vars;
126   struct exploratory_stats *es;
127 
128   enum mv_class dep_excl;
129   enum mv_class fctr_excl;
130 
131   const struct dictionary *dict;
132 
133   bool missing_pw;
134 
135   /* ------------ Graph ---------------- */
136   bool normal; /* For histograms, draw the normal curve */
137 
138   enum chart_type chart_type;
139   enum scatter_type scatter_type;
140   enum bar_type bar_type;
141   const struct variable *by_var[2];
142   size_t n_by_vars;
143 
144   struct subcase ordering; /* Ordering for aggregation */
145   int agr; /* Index into ag_func */
146 
147   /* A caseproto that contains the plot data */
148   struct caseproto *gr_proto;
149 };
150 
151 
152 
153 
154 static double
calc_mom1(double acc,double x,double w)155 calc_mom1 (double acc, double x, double w)
156 {
157   return acc + x * w;
158 }
159 
160 static double
calc_mom0(double acc,double x UNUSED,double w)161 calc_mom0 (double acc, double x UNUSED, double w)
162 {
163   return acc + w;
164 }
165 
166 static double
pre_low_extreme(void)167 pre_low_extreme (void)
168 {
169   return -DBL_MAX;
170 }
171 
172 static double
calc_max(double acc,double x,double w UNUSED)173 calc_max (double acc, double x, double w UNUSED)
174 {
175   return (acc > x) ? acc : x;
176 }
177 
178 static double
pre_high_extreme(void)179 pre_high_extreme (void)
180 {
181   return DBL_MAX;
182 }
183 
184 static double
calc_min(double acc,double x,double w UNUSED)185 calc_min (double acc, double x, double w UNUSED)
186 {
187   return (acc < x) ? acc : x;
188 }
189 
190 static double
post_normalise(double acc,double cc)191 post_normalise (double acc, double cc)
192 {
193   return acc / cc;
194 }
195 
196 static double
post_percentage(double acc,double ccc)197 post_percentage (double acc, double ccc)
198 {
199   return acc / ccc * 100.0;
200 }
201 
202 
203 const struct ag_func ag_func[] =
204   {
205     {"COUNT",   N_("Count"),      0, 0, NULL, calc_mom0, 0, 0},
206     {"PCT",     N_("Percentage"), 0, 0, NULL, calc_mom0, 0, post_percentage},
207     {"CUFREQ",  N_("Cumulative Count"),   0, 1, NULL, calc_mom0, 0, 0},
208     {"CUPCT",   N_("Cumulative Percent"), 0, 1, NULL, calc_mom0, 0,
209      post_percentage},
210 
211     {"MEAN",    N_("Mean"),    1, 0, NULL, calc_mom1, post_normalise, 0},
212     {"SUM",     N_("Sum"),     1, 0, NULL, calc_mom1, 0, 0},
213     {"MAXIMUM", N_("Maximum"), 1, 0, pre_low_extreme, calc_max, 0, 0},
214     {"MINIMUM", N_("Minimum"), 1, 0, pre_high_extreme, calc_min, 0, 0},
215   };
216 
217 const int N_AG_FUNCS = sizeof (ag_func) / sizeof (ag_func[0]);
218 
219 static bool
parse_function(struct lexer * lexer,struct graph * graph)220 parse_function (struct lexer *lexer, struct graph *graph)
221 {
222   int i;
223   for (i = 0 ; i < N_AG_FUNCS; ++i)
224     {
225       if (lex_match_id (lexer, ag_func[i].name))
226 	{
227 	  graph->agr = i;
228 	  break;
229 	}
230     }
231   if (i == N_AG_FUNCS)
232     {
233       goto error;
234     }
235 
236   graph->n_dep_vars = ag_func[i].arity;
237   if (ag_func[i].arity > 0)
238     {
239       int v;
240       if (!lex_force_match (lexer, T_LPAREN))
241 	goto error;
242 
243       graph->dep_vars = xzalloc (sizeof (graph->dep_vars) * graph->n_dep_vars);
244       for (v = 0; v < ag_func[i].arity; ++v)
245 	{
246 	  graph->dep_vars[v] = parse_variable (lexer, graph->dict);
247 	  if (! graph->dep_vars[v])
248 	    goto error;
249 	}
250 
251       if (!lex_force_match (lexer, T_RPAREN))
252 	goto error;
253     }
254 
255   if (!lex_force_match (lexer, T_BY))
256     goto error;
257 
258   graph->by_var[0] = parse_variable (lexer, graph->dict);
259   if (!graph->by_var[0])
260     {
261       goto error;
262     }
263   subcase_add_var (&graph->ordering, graph->by_var[0], SC_ASCEND);
264   graph->n_by_vars++;
265 
266   if (lex_match (lexer, T_BY))
267     {
268       graph->by_var[1] = parse_variable (lexer, graph->dict);
269       if (!graph->by_var[1])
270 	{
271 	  goto error;
272 	}
273       subcase_add_var (&graph->ordering, graph->by_var[1], SC_ASCEND);
274       graph->n_by_vars++;
275     }
276 
277   return true;
278 
279  error:
280   lex_error (lexer, NULL);
281   return false;
282 }
283 
284 
285 static void
show_scatterplot(const struct graph * cmd,struct casereader * input)286 show_scatterplot (const struct graph *cmd, struct casereader *input)
287 {
288   struct string title;
289   struct scatterplot_chart *scatterplot;
290   bool byvar_overflow = false;
291 
292   ds_init_empty (&title);
293 
294   if (cmd->n_by_vars > 0)
295     {
296       ds_put_format (&title, _("%s vs. %s by %s"),
297 			   var_to_string (cmd->dep_vars[1]),
298 			   var_to_string (cmd->dep_vars[0]),
299 			   var_to_string (cmd->by_var[0]));
300     }
301   else
302     {
303       ds_put_format (&title, _("%s vs. %s"),
304 		     var_to_string (cmd->dep_vars[1]),
305 		     var_to_string (cmd->dep_vars[0]));
306     }
307 
308   scatterplot = scatterplot_create (input,
309 				    var_to_string(cmd->dep_vars[0]),
310 				    var_to_string(cmd->dep_vars[1]),
311 				    (cmd->n_by_vars > 0) ? cmd->by_var[0]
312 				                         : NULL,
313 				    &byvar_overflow,
314 				    ds_cstr (&title),
315 				    cmd->es[0].minimum, cmd->es[0].maximum,
316 				    cmd->es[1].minimum, cmd->es[1].maximum);
317   scatterplot_chart_submit (scatterplot);
318   ds_destroy (&title);
319 
320   if (byvar_overflow)
321     {
322       msg (MW, _("Maximum number of scatterplot categories reached. "
323 		 "Your BY variable has too many distinct values. "
324 		 "The coloring of the plot will not be correct."));
325     }
326 }
327 
328 static void
show_histogr(const struct graph * cmd,struct casereader * input)329 show_histogr (const struct graph *cmd, struct casereader *input)
330 {
331   struct histogram *histogram;
332   struct ccase *c;
333 
334   if (cmd->es[0].cc <= 0)
335     {
336       casereader_destroy (input);
337       return;
338     }
339 
340   {
341     /* Sturges Rule */
342     double bin_width = fabs (cmd->es[0].minimum - cmd->es[0].maximum)
343       / (1 + log2 (cmd->es[0].cc))
344       ;
345 
346     histogram =
347       histogram_create (bin_width, cmd->es[0].minimum, cmd->es[0].maximum);
348   }
349 
350   if (NULL == histogram)
351     {
352       casereader_destroy (input);
353       return;
354     }
355 
356   for (;(c = casereader_read (input)) != NULL; case_unref (c))
357     {
358       const double x      = case_data_idx (c, HG_IDX_X)->f;
359       const double weight = case_data_idx (c, HG_IDX_WT)->f;
360       moments_pass_two (cmd->es[0].mom, x, weight);
361       histogram_add (histogram, x, weight);
362     }
363   casereader_destroy (input);
364 
365 
366   {
367     double n, mean, var;
368 
369     struct string label;
370 
371     ds_init_cstr (&label,
372 		  var_to_string (cmd->dep_vars[0]));
373 
374     moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL);
375 
376     chart_item_submit
377       (histogram_chart_create (histogram->gsl_hist,
378 				ds_cstr (&label), n, mean,
379 				sqrt (var), cmd->normal));
380 
381     statistic_destroy (&histogram->parent);
382     ds_destroy (&label);
383   }
384 }
385 
386 static void
cleanup_exploratory_stats(struct graph * cmd)387 cleanup_exploratory_stats (struct graph *cmd)
388 {
389   int v;
390 
391   for (v = 0; v < cmd->n_dep_vars; ++v)
392     {
393       moments_destroy (cmd->es[v].mom);
394     }
395 }
396 
397 
398 static void
run_barchart(struct graph * cmd,struct casereader * input)399 run_barchart (struct graph *cmd, struct casereader *input)
400 {
401   struct casegrouper *grouper;
402   struct casereader *group;
403   double ccc = 0.0;
404 
405   if (cmd->missing_pw == false)
406     input = casereader_create_filter_missing (input,
407                                               cmd->dep_vars,
408                                               cmd->n_dep_vars,
409                                               cmd->dep_excl,
410                                               NULL,
411                                               NULL);
412 
413 
414   input = sort_execute (input, &cmd->ordering);
415 
416   struct freq **cells = NULL;
417   int n_cells = 0;
418 
419   struct hmap columns = HMAP_INITIALIZER (columns);
420   assert (cmd->n_by_vars <= 2);
421   for (grouper = casegrouper_create_vars (input, cmd->by_var,
422                                           cmd->n_by_vars);
423        casegrouper_get_next_group (grouper, &group);
424        casereader_destroy (group))
425     {
426       int v;
427       struct ccase *c = casereader_peek (group, 0);
428 
429       /* Deal with missing values in the categorical variables */
430       for (v = 0; v < cmd->n_by_vars; ++v)
431 	{
432 	  if (var_is_value_missing (cmd->by_var[v],
433 				    case_data (c, cmd->by_var[v]),
434 				    cmd->fctr_excl))
435 	    break;
436 	}
437 
438       if (v < cmd->n_by_vars)
439 	{
440 	  case_unref (c);
441 	  continue;
442 	}
443 
444       cells = xrealloc (cells, sizeof (*cells) * ++n_cells);
445       cells[n_cells - 1] = xzalloc (sizeof (**cells)
446 				    + sizeof (union value)
447 				    * (cmd->n_by_vars - 1));
448 
449       if (ag_func[cmd->agr].cumulative && n_cells >= 2)
450 	cells[n_cells - 1]->count = cells[n_cells - 2]->count;
451       else
452 	cells[n_cells - 1]->count = 0;
453       if (ag_func[cmd->agr].pre)
454 	cells[n_cells - 1]->count = ag_func[cmd->agr].pre();
455 
456       if (cmd->n_by_vars > 1)
457       {
458 	const union value *vv = case_data (c, cmd->by_var[1]);
459 	const double weight = dict_get_case_weight (cmd->dict, c, NULL);
460 	int v1_width = var_get_width (cmd->by_var[1]);
461 	size_t hash = value_hash (vv, v1_width, 0);
462 
463 	struct freq *fcol = NULL;
464 	HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns)
465 	  if (value_equal (vv, &fcol->values[0], v1_width))
466 	    break;
467 
468 	if (fcol)
469 	  fcol->count += weight;
470 	else
471 	  {
472 	    fcol = xzalloc (sizeof *fcol);
473 	    fcol->count = weight;
474 	    value_clone (&fcol->values[0], vv, v1_width);
475 	    hmap_insert (&columns, &fcol->node, hash);
476 	  }
477       }
478 
479       for (v = 0; v < cmd->n_by_vars; ++v)
480 	{
481 	  value_clone (&cells[n_cells - 1]->values[v],
482 		       case_data (c, cmd->by_var[v]),
483 		       var_get_width (cmd->by_var[v]));
484 	}
485       case_unref (c);
486 
487       double cc = 0;
488       for (;(c = casereader_read (group)) != NULL; case_unref (c))
489 	{
490 	  const double weight = dict_get_case_weight (cmd->dict,c,NULL);
491 	  const double x = (cmd->n_dep_vars > 0)
492 	    ? case_data (c, cmd->dep_vars[0])->f : SYSMIS;
493 
494 	  cc += weight;
495 
496 	  cells[n_cells - 1]->count
497 	    = ag_func[cmd->agr].calc (cells[n_cells - 1]->count, x, weight);
498 	}
499 
500       if (ag_func[cmd->agr].post)
501       	cells[n_cells - 1]->count
502       	  = ag_func[cmd->agr].post (cells[n_cells - 1]->count, cc);
503 
504       ccc += cc;
505     }
506 
507   casegrouper_destroy (grouper);
508 
509   for (int i = 0; i < n_cells; ++i)
510     {
511       if (ag_func[cmd->agr].ppost)
512 	{
513 	  struct freq *cell = cells[i];
514 	  if (cmd->n_by_vars > 1)
515 	    {
516 	      const union value *vv = &cell->values[1];
517 
518 	      int v1_width = var_get_width (cmd->by_var[1]);
519 	      size_t hash = value_hash (vv, v1_width, 0);
520 
521 	      struct freq *fcol = NULL;
522 	      HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns)
523 		if (value_equal (vv, &fcol->values[0], v1_width))
524 		  break;
525 
526 	      cell->count = ag_func[cmd->agr].ppost (cell->count, fcol->count);
527 	    }
528 	  else
529 	    cell->count = ag_func[cmd->agr].ppost (cell->count, ccc);
530 	}
531     }
532 
533   if (cmd->n_by_vars > 1)
534     {
535       struct freq *col_cell;
536       struct freq *next;
537       HMAP_FOR_EACH_SAFE (col_cell, next, struct freq, node, &columns)
538 	{
539 
540 	  value_destroy (col_cell->values, var_get_width (cmd->by_var[1]));
541 	  free (col_cell);
542 	}
543     }
544   hmap_destroy (&columns);
545 
546   {
547     struct string label;
548     ds_init_empty (&label);
549 
550     if (cmd->n_dep_vars > 0)
551       ds_put_format (&label, _("%s of %s"),
552 		     ag_func[cmd->agr].description,
553 		     var_get_name (cmd->dep_vars[0]));
554     else
555       ds_put_cstr (&label,
556 		     ag_func[cmd->agr].description);
557 
558     chart_item_submit (barchart_create (cmd->by_var, cmd->n_by_vars,
559 					ds_cstr (&label), false,
560 					cells, n_cells));
561 
562     ds_destroy (&label);
563   }
564 
565   for (int i = 0; i < n_cells; ++i)
566     free (cells[i]);
567 
568   free (cells);
569 }
570 
571 
572 static void
run_graph(struct graph * cmd,struct casereader * input)573 run_graph (struct graph *cmd, struct casereader *input)
574 {
575   struct ccase *c;
576   struct casereader *reader;
577   struct casewriter *writer;
578 
579   cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars, sizeof *cmd->es);
580   for(int v=0;v<cmd->n_dep_vars;v++)
581     {
582       cmd->es[v].mom = moments_create (MOMENT_KURTOSIS);
583       cmd->es[v].cmin = DBL_MAX;
584       cmd->es[v].maximum = -DBL_MAX;
585       cmd->es[v].minimum =  DBL_MAX;
586     }
587   /* Always remove cases listwise. This is correct for */
588   /* the histogram because there is only one variable  */
589   /* and a simple bivariate scatterplot                */
590   /* if (cmd->missing_pw == false)                    */
591     input = casereader_create_filter_missing (input,
592                                               cmd->dep_vars,
593                                               cmd->n_dep_vars,
594                                               cmd->dep_excl,
595                                               NULL,
596                                               NULL);
597 
598   writer = autopaging_writer_create (cmd->gr_proto);
599 
600   /* The case data is copied to a new writer        */
601   /* The setup of the case depends on the Charttype */
602   /* For Scatterplot x is assumed in dep_vars[0]    */
603   /*                 y is assumed in dep_vars[1]    */
604   /* For Histogram   x is assumed in dep_vars[0]    */
605   assert(SP_IDX_X == 0 && SP_IDX_Y == 1 && HG_IDX_X == 0);
606 
607   for (;(c = casereader_read (input)) != NULL; case_unref (c))
608     {
609       struct ccase *outcase = case_create (cmd->gr_proto);
610       const double weight = dict_get_case_weight (cmd->dict,c,NULL);
611       if (cmd->chart_type == CT_HISTOGRAM)
612 	case_data_rw_idx (outcase, HG_IDX_WT)->f = weight;
613       if (cmd->chart_type == CT_SCATTERPLOT && cmd->n_by_vars > 0)
614 	value_copy (case_data_rw_idx (outcase, SP_IDX_BY),
615 		    case_data (c, cmd->by_var[0]),
616 		    var_get_width (cmd->by_var[0]));
617       for(int v=0;v<cmd->n_dep_vars;v++)
618 	{
619 	  const struct variable *var = cmd->dep_vars[v];
620 	  const double x = case_data (c, var)->f;
621 
622 	  if (var_is_value_missing (var, case_data (c, var), cmd->dep_excl))
623 	    {
624 	      cmd->es[v].missing += weight;
625 	      continue;
626 	    }
627 	  /* Magically v value fits to SP_IDX_X, SP_IDX_Y, HG_IDX_X */
628 	  case_data_rw_idx (outcase, v)->f = x;
629 
630 	  if (x > cmd->es[v].maximum)
631 	    cmd->es[v].maximum = x;
632 
633 	  if (x < cmd->es[v].minimum)
634 	    cmd->es[v].minimum =  x;
635 
636 	  cmd->es[v].non_missing += weight;
637 
638 	  moments_pass_one (cmd->es[v].mom, x, weight);
639 
640 	  cmd->es[v].cc += weight;
641 
642 	  if (cmd->es[v].cmin > weight)
643 	    cmd->es[v].cmin = weight;
644 	}
645       casewriter_write (writer,outcase);
646     }
647 
648   reader = casewriter_make_reader (writer);
649 
650   switch (cmd->chart_type)
651     {
652     case CT_HISTOGRAM:
653       show_histogr (cmd,reader);
654       break;
655     case CT_SCATTERPLOT:
656       show_scatterplot (cmd,reader);
657       break;
658     default:
659       NOT_REACHED ();
660       break;
661     };
662 
663   casereader_destroy (input);
664   cleanup_exploratory_stats (cmd);
665 }
666 
667 
668 int
cmd_graph(struct lexer * lexer,struct dataset * ds)669 cmd_graph (struct lexer *lexer, struct dataset *ds)
670 {
671   struct graph graph;
672 
673   graph.missing_pw = false;
674 
675   graph.pool = pool_create ();
676 
677   graph.dep_excl = MV_ANY;
678   graph.fctr_excl = MV_ANY;
679 
680   graph.dict = dataset_dict (ds);
681 
682   graph.dep_vars = NULL;
683   graph.chart_type = CT_NONE;
684   graph.scatter_type = ST_BIVARIATE;
685   graph.n_by_vars = 0;
686   graph.gr_proto = caseproto_create ();
687 
688   subcase_init_empty (&graph.ordering);
689 
690   while (lex_token (lexer) != T_ENDCMD)
691     {
692       lex_match (lexer, T_SLASH);
693 
694       if (lex_match_id (lexer, "HISTOGRAM"))
695 	{
696 	  if (graph.chart_type != CT_NONE)
697 	    {
698 	      lex_error (lexer, _("Only one chart type is allowed."));
699 	      goto error;
700 	    }
701           graph.normal = false;
702           if (lex_match (lexer, T_LPAREN))
703             {
704               if (!lex_force_match_id (lexer, "NORMAL"))
705                 goto error;
706 
707               if (!lex_force_match (lexer, T_RPAREN))
708                 goto error;
709 
710               graph.normal = true;
711             }
712 	  if (!lex_force_match (lexer, T_EQUALS))
713 	    goto error;
714 	  graph.chart_type = CT_HISTOGRAM;
715 	  if (!parse_variables_const (lexer, graph.dict,
716 				      &graph.dep_vars, &graph.n_dep_vars,
717 				      PV_NO_DUPLICATE | PV_NUMERIC))
718 	    goto error;
719 	  if (graph.n_dep_vars > 1)
720 	    {
721 	      lex_error (lexer, _("Only one variable is allowed."));
722 	      goto error;
723 	    }
724 	}
725       else if (lex_match_id (lexer, "BAR"))
726 	{
727 	  if (graph.chart_type != CT_NONE)
728 	    {
729 	      lex_error (lexer, _("Only one chart type is allowed."));
730 	      goto error;
731 	    }
732 	  graph.chart_type = CT_BAR;
733 	  graph.bar_type = CBT_SIMPLE;
734 
735 	  if (lex_match (lexer, T_LPAREN))
736 	    {
737 	      if (lex_match_id (lexer, "SIMPLE"))
738 		{
739 		  /* This is the default anyway */
740 		}
741 	      else if (lex_match_id (lexer, "GROUPED"))
742 		{
743 		  graph.bar_type = CBT_GROUPED;
744 		  goto error;
745 		}
746 	      else if (lex_match_id (lexer, "STACKED"))
747 		{
748 		  graph.bar_type = CBT_STACKED;
749 		  lex_error (lexer, _("%s is not yet implemented."), "STACKED");
750 		  goto error;
751 		}
752 	      else if (lex_match_id (lexer, "RANGE"))
753 		{
754 		  graph.bar_type = CBT_RANGE;
755 		  lex_error (lexer, _("%s is not yet implemented."), "RANGE");
756 		  goto error;
757 		}
758 	      else
759 		{
760 		  lex_error (lexer, NULL);
761 		  goto error;
762 		}
763 	      if (!lex_force_match (lexer, T_RPAREN))
764 		goto error;
765 	    }
766 
767 	  if (!lex_force_match (lexer, T_EQUALS))
768 	    goto error;
769 
770 	  if (! parse_function (lexer, &graph))
771 	    goto error;
772 	}
773       else if (lex_match_id (lexer, "SCATTERPLOT"))
774 	{
775 	  if (graph.chart_type != CT_NONE)
776 	    {
777 	      lex_error (lexer, _("Only one chart type is allowed."));
778 	      goto error;
779 	    }
780 	  graph.chart_type = CT_SCATTERPLOT;
781 	  if (lex_match (lexer, T_LPAREN))
782 	    {
783 	      if (lex_match_id (lexer, "BIVARIATE"))
784 		{
785 		  /* This is the default anyway */
786 		}
787 	      else if (lex_match_id (lexer, "OVERLAY"))
788 		{
789 		  lex_error (lexer, _("%s is not yet implemented."),"OVERLAY");
790 		  goto error;
791 		}
792 	      else if (lex_match_id (lexer, "MATRIX"))
793 		{
794 		  lex_error (lexer, _("%s is not yet implemented."),"MATRIX");
795 		  goto error;
796 		}
797 	      else if (lex_match_id (lexer, "XYZ"))
798 		{
799 		  lex_error(lexer, _("%s is not yet implemented."),"XYZ");
800 		  goto error;
801 		}
802 	      else
803 		{
804 		  lex_error_expecting (lexer, "BIVARIATE");
805 		  goto error;
806 		}
807 	      if (!lex_force_match (lexer, T_RPAREN))
808 		goto error;
809 	    }
810 	  if (!lex_force_match (lexer, T_EQUALS))
811 	    goto error;
812 
813 	  if (!parse_variables_const (lexer, graph.dict,
814 				      &graph.dep_vars, &graph.n_dep_vars,
815 				      PV_NO_DUPLICATE | PV_NUMERIC))
816 	    goto error;
817 
818 	  if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 1)
819 	    {
820 	      lex_error(lexer, _("Only one variable is allowed."));
821 	      goto error;
822 	    }
823 
824 	  if (!lex_force_match (lexer, T_WITH))
825 	    goto error;
826 
827 	  if (!parse_variables_const (lexer, graph.dict,
828 				      &graph.dep_vars, &graph.n_dep_vars,
829 				      PV_NO_DUPLICATE | PV_NUMERIC | PV_APPEND))
830 	    goto error;
831 
832 	  if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 2)
833 	    {
834 	      lex_error (lexer, _("Only one variable is allowed."));
835 	      goto error;
836 	    }
837 
838 	  if (lex_match (lexer, T_BY))
839 	    {
840 	      const struct variable *v = NULL;
841 	      if (!lex_match_variable (lexer,graph.dict,&v))
842 		{
843 		  lex_error (lexer, _("Variable expected"));
844 		  goto error;
845 		}
846 	      graph.by_var[0] = v;
847               graph.n_by_vars = 1;
848 	    }
849 	}
850       else if (lex_match_id (lexer, "LINE"))
851 	{
852 	  lex_error (lexer, _("%s is not yet implemented."),"LINE");
853 	  goto error;
854 	}
855       else if (lex_match_id (lexer, "PIE"))
856 	{
857 	  lex_error (lexer, _("%s is not yet implemented."),"PIE");
858 	  goto error;
859 	}
860       else if (lex_match_id (lexer, "ERRORBAR"))
861 	{
862 	  lex_error (lexer, _("%s is not yet implemented."),"ERRORBAR");
863 	  goto error;
864 	}
865       else if (lex_match_id (lexer, "PARETO"))
866 	{
867 	  lex_error (lexer, _("%s is not yet implemented."),"PARETO");
868 	  goto error;
869 	}
870       else if (lex_match_id (lexer, "TITLE"))
871 	{
872 	  lex_error (lexer, _("%s is not yet implemented."),"TITLE");
873 	  goto error;
874 	}
875       else if (lex_match_id (lexer, "SUBTITLE"))
876 	{
877 	  lex_error (lexer, _("%s is not yet implemented."),"SUBTITLE");
878 	  goto error;
879 	}
880       else if (lex_match_id (lexer, "FOOTNOTE"))
881 	{
882 	  lex_error (lexer, _("%s is not yet implemented."),"FOOTNOTE");
883 	  lex_error (lexer, _("FOOTNOTE is not implemented yet for GRAPH"));
884 	  goto error;
885 	}
886       else if (lex_match_id (lexer, "MISSING"))
887         {
888 	  lex_match (lexer, T_EQUALS);
889 
890 	  while (lex_token (lexer) != T_ENDCMD
891 		 && lex_token (lexer) != T_SLASH)
892 	    {
893               if (lex_match_id (lexer, "LISTWISE"))
894                 {
895                   graph.missing_pw = false;
896                 }
897               else if (lex_match_id (lexer, "VARIABLE"))
898                 {
899                   graph.missing_pw = true;
900                 }
901               else if (lex_match_id (lexer, "EXCLUDE"))
902                 {
903                   graph.dep_excl = MV_ANY;
904                 }
905               else if (lex_match_id (lexer, "INCLUDE"))
906                 {
907                   graph.dep_excl = MV_SYSTEM;
908                 }
909               else if (lex_match_id (lexer, "REPORT"))
910                 {
911                   graph.fctr_excl = MV_NEVER;
912                 }
913               else if (lex_match_id (lexer, "NOREPORT"))
914                 {
915                   graph.fctr_excl = MV_ANY;
916                 }
917               else
918                 {
919                   lex_error (lexer, NULL);
920                   goto error;
921                 }
922             }
923         }
924       else
925         {
926           lex_error (lexer, NULL);
927           goto error;
928         }
929     }
930 
931   switch (graph.chart_type)
932     {
933     case CT_SCATTERPLOT:
934       /* See scatterplot.h for the setup of the case prototype */
935 
936       /* x value - SP_IDX_X*/
937       graph.gr_proto = caseproto_add_width (graph.gr_proto, 0);
938 
939       /* y value - SP_IDX_Y*/
940       graph.gr_proto = caseproto_add_width (graph.gr_proto, 0);
941       /* The by_var contains the plot categories for the different xy
942 	 plot colors */
943       if (graph.n_by_vars > 0) /* SP_IDX_BY */
944 	graph.gr_proto = caseproto_add_width (graph.gr_proto,
945 					      var_get_width(graph.by_var[0]));
946       break;
947     case CT_HISTOGRAM:
948       /* x value      */
949       graph.gr_proto = caseproto_add_width (graph.gr_proto, 0);
950       /* weight value */
951       graph.gr_proto = caseproto_add_width (graph.gr_proto, 0);
952       break;
953     case CT_BAR:
954       break;
955     case CT_NONE:
956       lex_error_expecting (lexer, "HISTOGRAM", "SCATTERPLOT", "BAR");
957       goto error;
958     default:
959       NOT_REACHED ();
960       break;
961     };
962 
963   {
964     struct casegrouper *grouper;
965     struct casereader *group;
966     bool ok;
967 
968     grouper = casegrouper_create_splits (proc_open (ds), graph.dict);
969     while (casegrouper_get_next_group (grouper, &group))
970       {
971 	if (graph.chart_type == CT_BAR)
972 	  run_barchart (&graph, group);
973 	else
974 	  run_graph (&graph, group);
975       }
976     ok = casegrouper_destroy (grouper);
977     ok = proc_commit (ds) && ok;
978   }
979 
980   subcase_destroy (&graph.ordering);
981   free (graph.dep_vars);
982   pool_destroy (graph.pool);
983   caseproto_unref (graph.gr_proto);
984 
985   return CMD_SUCCESS;
986 
987  error:
988   subcase_destroy (&graph.ordering);
989   caseproto_unref (graph.gr_proto);
990   free (graph.dep_vars);
991   pool_destroy (graph.pool);
992 
993   return CMD_FAILURE;
994 }
995