1% This file is part of the Stanford GraphBase (c) Stanford University 1993
2@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
3@i gb_types.w
4
5\def\title{GB\_\,GAMES}
6
7\prerequisites{GB\_\,GRAPH}{GB\_\,IO}
8@* Introduction. This GraphBase module contains the |games| subroutine,
9which creates a family of undirected graphs based on college football
10scores. An example of the use of this procedure can be
11found in the demo program {\sc FOOTBALL}.
12
13@(gb_games.h@>=
14extern Graph *games();
15
16@ The subroutine call |games|(|n|, |ap0_weight|, |upi0_weight|, |ap1_weight|,
17|upi1_weight|, |first_day|, |last_day|, |seed|)
18constructs a graph based on the information in \.{games.dat}.
19Each vertex of the graph corresponds to one of 120 football teams
20at American colleges and universities (more precisely, to the 106 college
21football teams of division I-A together with the 14 division I-AA teams
22of the Ivy League and the Patriot League).
23Each edge of the graph corresponds to one of the 638 games played
24between those teams during the 1990 season.
25
26An arc from vertex~|u| to vertex~|v| is assigned a length representing
27the number of points scored by |u| when playing~|v|. Thus the graph
28isn't really ``undirected,'' although it is true that its arcs are
29paired (i.e., that |u| played~|v| if and only if |v| played~|u|).
30A truly undirected graph with the same vertices and edges can be obtained
31by applying the |complement| routine of {\sc GB\_\,BASIC}.
32
33The constructed graph will have $\min(n,120)$ vertices. If |n| is less
34than 120, the |n| teams will be selected by assigning a weight to
35each team and choosing the |n| with largest weight, using random
36numbers to break ties in case of equal weights. Weights are computed
37by the formula
38$$ |ap0_weight|\cdot|ap0|+|upi0_weight|\cdot|upi0|
39   +|ap1_weight|\cdot|ap1|+|upi1_weight|\cdot|upi1|, $$
40where |ap0| and |upi0| are the point scores given to a team in the
41Associated Press and United Press International polls at the beginning
42of the season, and |ap1| and |upi1| are the similar scores given at
43the end of the season. (The \\{ap} scores were obtained by asking 60
44sportswriters to choose and rank the top 25 teams, assigning 25 points
45to a team ranked 1st and 1 point to a team ranked 25th; thus the
46total of each of the \\{ap} scores, summed over all teams,
47is 19500. The \\{upi} scores were
48obtained by asking football coaches to choose and rank the top 15
49teams, assigning 15 points to a team ranked 1st and 1 point to a team
50ranked 15th. In the case of \\{upi0}, there were 48 coaches voting,
51making 5760 points altogether; but in the case of \\{upi1}, 59 coaches
52were polled, yielding a total of 7080 points. The coaches agreed not
53to vote for any team that was on probation for violating NCAA rules,
54but the sportswriters had no such policy.)
55
56Parameters |first_day| and |last_day| can be used to vary the number of
57edges; only games played between |first_day| and |last_day|, inclusive,
58will be included in the constructed graph. Day~0 was August~26, 1990,
59when Colorado and Tennessee competed in the Disneyland Pigskin Classic.
60Day~128 was January~1, 1991, when the final end-of-season bowl games
61were played. About half of each team's games were played between day~0 and
62day~50. If |last_day=0|, the value of |last_day| is automatically
63increased to~128.
64
65As usual in GraphBase routines, you can set |n=0| to get the default
66situation where |n| has its maximum value. For example, either
67|games(0,0,0,0,0,0,0,0)| or |games(120,0,0,0,0,0,0,0)| produces the full graph;
68|games(0,0,0,0,0,50,0,0)| or |games(120,0,0,0,0,50,0,0)|
69or |games(120,0,0,0,0,50,128,0)| produces the graph for the last half
70of the season. One way to select a subgraph containing the
7130 ``best'' teams is to ask for |games(30,0,0,1,2,0,0,0)|, which adds
72the votes of the sportswriters to the votes of the coaches
73(considering that a coach's first choice is worth 30 points
74while a sportswriter's first choice is worth only 25). It turns out
75that 67 of the teams did not receive votes in any of the four polls;
76the subroutine call |games(53,1,1,1,1,0,0,0)| will pick out the 53 teams
77that were selected at least once by some sportswriter or coach, and
78|games(67,-1,-1,-1,-1,0,0,0)| will pick out the 67 that were not.
79A~random selection of 60 teams can be obtained by calling
80|games(60,0,0,0,0,0,0,s)|. Different choices of the seed number~|s|
81will produce different selections in a system-independent manner;
82any value of |s| between 0 and $2^{31}-1$ is permissible.
83If you ask for |games(120,0,0,0,0,0,0,s)| with different choices of~|s|,
84you always get the full graph, but the vertices will appear in different
85(random) orderings depending on~|s|.
86
87Parameters |ap0_weight|, |upi0_weight|, |ap1_weight|, and |upi1_weight| must be
88at most $2^{17}=131072$ in absolute value.
89
90@d MAX_N 120
91@d MAX_DAY 128
92@d MAX_WEIGHT 131072
93@d ap u.I /* Associated Press scores: |(ap0<<16)+ap1| */
94@d upi v.I /* United Press International scores |(upi0<<16)+upi1| */
95
96@ Most of the teams belong to a ``conference,'' and they play against
97almost every other team that belongs to the same conference. For
98example, Stanford and nine other teams belong to the
99Pacific Ten conference. Eight of Stanford's eleven games were against
100other teams of the Pacific Ten; the other three were played against
101Colorado (from the Big Eight), San Jos\'e State (from the Big West)
102and Notre Dame (which is independent). The graphs produced by |games|
103therefore illustrate ``cliquey'' patterns of social interaction.
104
105Eleven different conferences are included in \.{games.dat}. Utility
106field |z.S| of a vertex is set to the name of a team's conference, or to |NULL|
107if that team is independent. (Exactly 24 of the I-A football teams
108were independent in 1990.) Two teams |u| and |v| belong to the same
109conference if and only if |u->conference==v->conference| and
110|u->conference!=NULL|.
111
112@d conference z.S
113
114@ Each team has a nickname, which is recorded in utility field |y.S|.
115For example, Georgia Tech's team is called the Yellow Jackets.
116Six teams (Auburn, Clemson, Memphis State, Missouri, Pacific, and
117Princeton) are called the Tigers, and five teams
118(Fresno State, Georgia, Louisiana Tech, Mississippi State,
119Yale) are called the Bulldogs. But most of the teams have a unique
120nickname, and 94 distinct nicknames exist.
121
122A shorthand code for team names is also provided, in the |abbr| field.
123
124@d nickname y.S
125@d abbr x.S
126
127@ If |a| points to an arc from |u| to |v|, utility field |a->a.I| contains
128the value 3 if |u| was the home team, 1 if |v| was the home team, and 2 if both
129teams played on neutral territory. The date of that game, represented
130as a integer number of days after August~26, 1990, appears in utility
131field |a->b.I|. The arcs in each vertex list |v->arcs| appear in reverse order
132of their dates: last game first and first game last.
133
134@d HOME 1
135@d NEUTRAL 2 /* this value is halfway between |HOME| and |AWAY| */
136@d AWAY 3
137@d venue a.I
138@d date b.I
139
140@(gb_games.h@>=
141#define ap @[u.I@] /* repeat the definitions in the header file */
142#define upi @[v.I@]
143#define abbr @[x.S@]
144#define nickname @[y.S@]
145#define conference @[z.S@]
146#define HOME 1
147#define NEUTRAL 2
148#define AWAY 3
149#define venue @[a.I@]
150#define date @[b.I@]
151
152@ If the |games| routine encounters a problem, it returns |NULL|
153(\.{NULL}), after putting a code number into the external variable
154|panic_code|. This code number identifies the type of failure.
155Otherwise |games| returns a pointer to the newly created graph, which
156will be represented with the data structures explained in {\sc GB\_\,GRAPH}.
157(The external variable |panic_code| is itself defined in {\sc GB\_\,GRAPH}.)
158
159@d panic(c) @+{@+panic_code=c;@+gb_trouble_code=0;@+return NULL;@+}
160
161@ The \CEE/ file \.{gb\_games.c} has the following overall shape:
162
163@p
164#include "gb_io.h" /* we will use the {\sc GB\_\,IO} routines for input */
165#include "gb_flip.h"
166 /* we will use the {\sc GB\_\,FLIP} routines for random numbers */
167#include "gb_graph.h" /* we will use the {\sc GB\_\,GRAPH} data structures */
168#include "gb_sort.h" /* and |gb_linksort| for sorting */
169@h@#
170@<Type declarations@>@;
171@<Private variables@>@;
172@<Private functions@>@;
173@#
174Graph *games(n,ap0_weight,upi0_weight,ap1_weight,upi1_weight,
175     first_day,last_day,seed)
176  unsigned long n; /* number of vertices desired */
177  long ap0_weight; /* coefficient of |ap0| in the weight function */
178  long ap1_weight; /* coefficient of |ap1| in the weight function */
179  long upi0_weight; /* coefficient of |upi0| in the weight function */
180  long upi1_weight; /* coefficient of |upi1| in the weight function */
181  long first_day; /* lower cutoff for games to be considered */
182  long last_day; /* upper cutoff for games to be considered */
183  long seed; /* random number seed */
184{@+@<Local variables@>@;@#
185  gb_init_rand(seed);
186  @<Check that the parameters are valid@>;
187  @<Set up a graph with |n| vertices@>;
188  @<Read the first part of \.{games.dat} and compute team weights@>;
189  @<Determine the |n| teams to use in the graph@>;
190  @<Put the appropriate edges into the graph@>;
191  if (gb_close()!=0)
192    panic(late_data_fault);
193      /* something's wrong with |"games.dat"|; see |io_errors| */
194  gb_free(working_storage);
195  if (gb_trouble_code) {
196    gb_recycle(new_graph);
197    panic(alloc_fault); /* oops, we ran out of memory somewhere back there */
198  }
199  return new_graph;
200}
201
202@ @<Local var...@>=
203Graph *new_graph; /* the graph constructed by |games| */
204register long j,k; /* all-purpose indices */
205
206@ @<Check that the parameters are valid@>=
207if (n==0 || n>MAX_N) n=MAX_N;
208if (ap0_weight>MAX_WEIGHT || ap0_weight<-MAX_WEIGHT ||
209    upi0_weight>MAX_WEIGHT || upi0_weight<-MAX_WEIGHT ||@|
210    ap1_weight>MAX_WEIGHT || ap1_weight<-MAX_WEIGHT ||
211    upi1_weight>MAX_WEIGHT || upi1_weight<-MAX_WEIGHT)
212  panic(bad_specs); /* the magnitude of at least one weight is too big */
213if (first_day<0) first_day=0;
214if (last_day==0 || last_day>MAX_DAY) last_day=MAX_DAY;
215
216@ @<Set up a graph with |n| vertices@>=
217new_graph=gb_new_graph(n);
218if (new_graph==NULL)
219  panic(no_room); /* out of memory before we're even started */
220sprintf(new_graph->id,"games(%lu,%ld,%ld,%ld,%ld,%ld,%ld,%ld)",
221  n,ap0_weight,upi0_weight,ap1_weight,upi1_weight,first_day,last_day,seed);
222strcpy(new_graph->util_types,"IIZSSSIIZZZZZZ");
223
224@* Vertices.
225As we read in the data, we construct a list of nodes, each of which contains
226a team's name, nickname, conference, and weight. After this list
227has been sorted by weight, the top |n| entries will be the vertices of the
228new graph.
229
230@<Type decl...@>=
231typedef struct node_struct { /* records to be sorted by |gb_linksort| */
232  long key; /* the nonnegative sort key (weight plus $2^{30}$) */
233  struct node_struct *link; /* pointer to next record */
234  char name[24]; /* |"College Name"| */
235  char nick[22]; /* |"Team Nickname"| */
236  char abb[6]; /* |"ABBR"| */
237  long a0,u0,a1,u1; /* team scores in press polls */
238  char *conf; /* pointer to conference name */
239  struct node_struct *hash_link; /* pointer to next \.{ABBR} in hash list */
240  Vertex *vert; /* vertex corresponding to this team */
241} node;
242
243@ The data in \.{games.dat} appears in two parts. The first 120 lines
244have the form
245$$\hbox{\tt ABBR College Name(Team Nickname)Conference;a0,u0;a1,u1}$$
246and they give basic information about the teams. An internal abbreviation code
247\.{ABBR} is used to identify each team in the second part of the data.
248
249The second part presents scores of the games, and it
250contains two kinds of lines. If the first character of a line is
251`\.>', it means ``change the current date,'' and the remaining
252characters specify a date as a one-letter month code followed by the day
253of the month. Otherwise the line gives scores of a game, using the
254\.{ABBR} codes for two teams. The scores are separated by `\.@@' if
255the second team was the home team and by `\.,' if both teams were on
256neutral territory.
257
258For example, two games were played on December 8, namely the annual Army-Navy
259game and the California Raisin Bowl game. These are recorded in three lines
260of \.{games.dat} as follows:
261$$\vbox{\halign{\tt#\hfil\cr
262>D8\cr
263NAVY20@@ARMY30\cr
264SJSU48,CMICH24\cr}}$$
265We deduce that Navy played at Army's home stadium, losing 20 to~30;
266moreover, San Jos\'e State played Central Michigan on neutral territory and
267won, 48 to~24. (The California Raisin Bowl is traditionally a playoff between
268the champions of the Big West and Mid-American conferences.)
269
270@ In order to map \.{ABBR} codes to team names, we use a simple
271hash coding scheme. Two abbreviations with the same hash address are
272linked together via the |hash_link| address in their node.
273
274The constants defined here are taken from the specific data in \.{games.dat},
275because this routine is not intended to be perfectly general.
276
277@d HASH_PRIME 1009
278
279@<Private v...@>=
280static long ma0=1451,mu0=666,ma1=1475,mu1=847;
281             /* maximum poll values in the data */
282static node *node_block; /* array of nodes holding team info */
283static node **hash_block; /* array of heads of hash code lists */
284static Area working_storage; /* memory needed only while |games| is working */
285static char **conf_block; /* array of conference names */
286static long m; /* the number of conference names known so far */
287
288@ @<Read the first part of \.{games.dat} and compute team weights@>=
289node_block=gb_typed_alloc(MAX_N+2,node,working_storage);
290 /* leave room for string overflow */
291hash_block=gb_typed_alloc(HASH_PRIME,node*,working_storage);
292conf_block=gb_typed_alloc(MAX_N,char*,working_storage);
293m=0;
294if (gb_trouble_code) {
295  gb_free(working_storage);
296  panic(no_room+1); /* nowhere to copy the data */
297}
298if (gb_open("games.dat")!=0)
299  panic(early_data_fault); /* couldn't open |"games.dat"| using
300        GraphBase conventions; |io_errors| tells why */
301for (k=0; k<MAX_N; k++) @<Read and store data for team |k|@>;
302
303@ @<Read and store...@>=
304{@+register node *p;
305  register char *q;
306  p=node_block+k;
307  if (k) p->link=p-1;
308  q=gb_string(p->abb,' ');
309  if (q>&p->abb[6] || gb_char()!=' ')
310    panic(syntax_error); /* out of sync in \.{games.dat} */
311  @<Enter |p->abb| in the hash table@>;
312  q=gb_string(p->name,'(');
313  if (q>&p->name[24] || gb_char()!='(')
314    panic(syntax_error+1); /* team name too long */
315  q=gb_string(p->nick,')');
316  if (q>&p->nick[22] || gb_char()!=')')
317    panic(syntax_error+2); /* team nickname too long */
318  @<Read the conference name for |p|@>;
319  @<Read the press poll scores for |p| and compute |p->key|@>;
320  gb_newline();
321}
322
323@ @<Enter |p->abb| in the hash table@>=
324{@+long h=0; /* the hash code */
325  for (q=p->abb;*q;q++)
326    h=(h+h+*q)%HASH_PRIME;
327  p->hash_link=hash_block[h];
328  hash_block[h]=p;
329}
330
331@ @<Read the conference name for |p|@>=
332{
333  gb_string(str_buf,';');
334  if (gb_char()!=';') panic(syntax_error+3); /* conference name clobbered */
335  if (strcmp(str_buf,"Independent")!=0) {
336    for (j=0;j<m;j++)
337      if (strcmp(str_buf,conf_block[j])==0) goto found;
338    conf_block[m++]=gb_save_string(str_buf);
339 found:p->conf=conf_block[j];
340  }
341}
342
343@ The key value computed here will be between 0 and~$2^{31}$, because of
344the bound we've imposed on the weight parameters.
345
346@<Read the press poll scores for |p| and compute |p->key|@>=
347p->a0=gb_number(10);
348if (p->a0>ma0 || gb_char()!=',') panic(syntax_error+4);
349  /* first AP score clobbered */
350p->u0=gb_number(10);
351if (p->u0>mu0 || gb_char()!=';') panic(syntax_error+5);
352  /* first UPI score clobbered */
353p->a1=gb_number(10);
354if (p->a1>ma1 || gb_char()!=',') panic(syntax_error+6);
355  /* second AP score clobbered */
356p->u1=gb_number(10);
357if (p->u1>mu1 || gb_char()!='\n') panic(syntax_error+7);
358  /* second UPI score clobbered */
359p->key=ap0_weight*(p->a0)+upi0_weight*(p->u0)
360        +ap1_weight*(p->a1)+upi1_weight*(p->u1)+0x40000000;
361
362@ Once all the nodes have been set up, we can use the |gb_linksort|
363routine to sort them into the desired order. It builds 128
364lists from which the desired nodes are readily accessed in decreasing
365order of weight, using random numbers to break ties.
366
367We set the abbreviation code to zero in every team that isn't chosen. Then
368games involving that team will be excluded when edges are generated below.
369
370@<Determine the |n| teams to use in the graph@>=
371{@+register node *p; /* the current node being considered */
372  register Vertex *v=new_graph->vertices; /* the next vertex to use */
373  gb_linksort(node_block+MAX_N-1);
374  for (j=127; j>=0; j--)
375    for (p=(node*)gb_sorted[j]; p; p=p->link) {
376      if (v<new_graph->vertices+n) @<Add team |p| to the graph@>@;
377      else p->abb[0]='\0'; /* this team is not being used */
378    }
379}
380
381@ @<Add team |p| to the graph@>=
382{
383  v->ap=((long)(p->a0)<<16)+p->a1;
384  v->upi=((long)(p->u0)<<16)+p->u1;
385  v->abbr=gb_save_string(p->abb);
386  v->nickname=gb_save_string(p->nick);
387  v->conference=p->conf;
388  v->name=gb_save_string(p->name);
389  p->vert=v++;
390}
391
392@* Arcs.
393Finally, we read through the rest of \.{games.dat}, adding a pair of
394arcs for each game that belongs to the selected time interval
395and was played by two of the selected teams.
396
397@<Put the appropriate edges into the graph@>=
398{@+register Vertex *u,*v;
399  register long today=0; /* current day of play */
400  long su,sv; /* points scored by each team */
401  long ven; /* |HOME| if |v| is home team, |NEUTRAL| if on neutral ground */
402  while (!gb_eof()) {
403    if (gb_char()=='>') @<Change the current date@>@;
404    else gb_backup();
405    u=team_lookup();
406    su=gb_number(10);
407    ven=gb_char();
408    if (ven=='@@') ven=HOME;
409    else if (ven==',') ven=NEUTRAL;
410    else panic(syntax_error+8); /* bad syntax in game score line */
411    v=team_lookup();
412    sv=gb_number(10);
413    if (gb_char()!='\n') panic(syntax_error+9);
414      /* bad syntax in game score line */
415    if (u!=NULL && v!=NULL && today>=first_day && today<=last_day)
416      @<Enter a new edge@>;
417    gb_newline();
418  }
419}
420
421@ @<Change the current...@>=
422{@+register char c=gb_char(); /* month code */
423  register long d; /* day of football season */
424  switch(c) {
425  case 'A': d=-26;@+break; /* August */
426  case 'S': d=5;@+break; /* thirty days hath September */
427  case 'O': d=35;@+break; /* October */
428  case 'N': d=66;@+break; /* November */
429  case 'D': d=96;@+break; /* December */
430  case 'J': d=127;@+break; /* January */
431  default: d=1000;
432  }
433  d+=gb_number(10);
434  if (d<0 || d>MAX_DAY) panic(syntax_error-1); /* date was clobbered */
435  today=d;
436  gb_newline(); /* now ready to read a non-date line */
437}
438
439@ @<Private f...@>=
440static Vertex *team_lookup() /* read and decode an abbreviation */
441{@+register char *q=str_buf; /* position in |str_buf| */
442  register long h=0; /* hash code */
443  register node *p; /* position in hash list */
444  while (gb_digit(10)<0) {
445   *q=gb_char();
446   h=(h+h+*q)%HASH_PRIME;
447   q++;
448  }
449  gb_backup(); /* prepare to re-scan the digit following the abbreviation */
450  *q='\0'; /* null-terminate the abbreviation just scanned */
451  for (p=hash_block[h];p;p=p->hash_link)
452    if (strcmp(p->abb,str_buf)==0) return p->vert;
453  return NULL; /* not found */
454}
455
456@ We retain the convention of {\sc GB\_\,GRAPH} that the arc from |v| to |u|
457appears immediately after a matching arc from |u| to |v| when |u<v|.
458
459@<Enter a new edge@>=
460{@+register Arc *a;
461  if (u>v) {@+register Vertex *w; register long sw;
462    w=u;@+u=v;@+v=w;
463    sw=su;@+su=sv;@+sv=sw;
464    ven=HOME+AWAY-ven;
465  }
466  gb_new_arc(u,v,su);
467  gb_new_arc(v,u,sv);
468  a=u->arcs; /* a pointer to the new arc */
469  if (v->arcs!=a+1) panic (impossible+9); /* can't happen */
470  a->venue=ven;@+(a+1)->venue=HOME+AWAY-ven;
471  a->date=(a+1)->date=today;
472}
473
474@* Index. As usual, we close with an index that
475shows where the identifiers of \\{gb\_games} are defined and used.
476