1% This file is part of the Stanford GraphBase (c) Stanford University 1993 2@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES! 3@i gb_types.w 4 5\def\title{GB\_\,GAMES} 6 7\prerequisites{GB\_\,GRAPH}{GB\_\,IO} 8@* Introduction. This GraphBase module contains the |games| subroutine, 9which creates a family of undirected graphs based on college football 10scores. An example of the use of this procedure can be 11found in the demo program {\sc FOOTBALL}. 12 13@(gb_games.h@>= 14extern Graph *games(); 15 16@ The subroutine call |games|(|n|, |ap0_weight|, |upi0_weight|, |ap1_weight|, 17|upi1_weight|, |first_day|, |last_day|, |seed|) 18constructs a graph based on the information in \.{games.dat}. 19Each vertex of the graph corresponds to one of 120 football teams 20at American colleges and universities (more precisely, to the 106 college 21football teams of division I-A together with the 14 division I-AA teams 22of the Ivy League and the Patriot League). 23Each edge of the graph corresponds to one of the 638 games played 24between those teams during the 1990 season. 25 26An arc from vertex~|u| to vertex~|v| is assigned a length representing 27the number of points scored by |u| when playing~|v|. Thus the graph 28isn't really ``undirected,'' although it is true that its arcs are 29paired (i.e., that |u| played~|v| if and only if |v| played~|u|). 30A truly undirected graph with the same vertices and edges can be obtained 31by applying the |complement| routine of {\sc GB\_\,BASIC}. 32 33The constructed graph will have $\min(n,120)$ vertices. If |n| is less 34than 120, the |n| teams will be selected by assigning a weight to 35each team and choosing the |n| with largest weight, using random 36numbers to break ties in case of equal weights. Weights are computed 37by the formula 38$$ |ap0_weight|\cdot|ap0|+|upi0_weight|\cdot|upi0| 39 +|ap1_weight|\cdot|ap1|+|upi1_weight|\cdot|upi1|, $$ 40where |ap0| and |upi0| are the point scores given to a team in the 41Associated Press and United Press International polls at the beginning 42of the season, and |ap1| and |upi1| are the similar scores given at 43the end of the season. (The \\{ap} scores were obtained by asking 60 44sportswriters to choose and rank the top 25 teams, assigning 25 points 45to a team ranked 1st and 1 point to a team ranked 25th; thus the 46total of each of the \\{ap} scores, summed over all teams, 47is 19500. The \\{upi} scores were 48obtained by asking football coaches to choose and rank the top 15 49teams, assigning 15 points to a team ranked 1st and 1 point to a team 50ranked 15th. In the case of \\{upi0}, there were 48 coaches voting, 51making 5760 points altogether; but in the case of \\{upi1}, 59 coaches 52were polled, yielding a total of 7080 points. The coaches agreed not 53to vote for any team that was on probation for violating NCAA rules, 54but the sportswriters had no such policy.) 55 56Parameters |first_day| and |last_day| can be used to vary the number of 57edges; only games played between |first_day| and |last_day|, inclusive, 58will be included in the constructed graph. Day~0 was August~26, 1990, 59when Colorado and Tennessee competed in the Disneyland Pigskin Classic. 60Day~128 was January~1, 1991, when the final end-of-season bowl games 61were played. About half of each team's games were played between day~0 and 62day~50. If |last_day=0|, the value of |last_day| is automatically 63increased to~128. 64 65As usual in GraphBase routines, you can set |n=0| to get the default 66situation where |n| has its maximum value. For example, either 67|games(0,0,0,0,0,0,0,0)| or |games(120,0,0,0,0,0,0,0)| produces the full graph; 68|games(0,0,0,0,0,50,0,0)| or |games(120,0,0,0,0,50,0,0)| 69or |games(120,0,0,0,0,50,128,0)| produces the graph for the last half 70of the season. One way to select a subgraph containing the 7130 ``best'' teams is to ask for |games(30,0,0,1,2,0,0,0)|, which adds 72the votes of the sportswriters to the votes of the coaches 73(considering that a coach's first choice is worth 30 points 74while a sportswriter's first choice is worth only 25). It turns out 75that 67 of the teams did not receive votes in any of the four polls; 76the subroutine call |games(53,1,1,1,1,0,0,0)| will pick out the 53 teams 77that were selected at least once by some sportswriter or coach, and 78|games(67,-1,-1,-1,-1,0,0,0)| will pick out the 67 that were not. 79A~random selection of 60 teams can be obtained by calling 80|games(60,0,0,0,0,0,0,s)|. Different choices of the seed number~|s| 81will produce different selections in a system-independent manner; 82any value of |s| between 0 and $2^{31}-1$ is permissible. 83If you ask for |games(120,0,0,0,0,0,0,s)| with different choices of~|s|, 84you always get the full graph, but the vertices will appear in different 85(random) orderings depending on~|s|. 86 87Parameters |ap0_weight|, |upi0_weight|, |ap1_weight|, and |upi1_weight| must be 88at most $2^{17}=131072$ in absolute value. 89 90@d MAX_N 120 91@d MAX_DAY 128 92@d MAX_WEIGHT 131072 93@d ap u.I /* Associated Press scores: |(ap0<<16)+ap1| */ 94@d upi v.I /* United Press International scores |(upi0<<16)+upi1| */ 95 96@ Most of the teams belong to a ``conference,'' and they play against 97almost every other team that belongs to the same conference. For 98example, Stanford and nine other teams belong to the 99Pacific Ten conference. Eight of Stanford's eleven games were against 100other teams of the Pacific Ten; the other three were played against 101Colorado (from the Big Eight), San Jos\'e State (from the Big West) 102and Notre Dame (which is independent). The graphs produced by |games| 103therefore illustrate ``cliquey'' patterns of social interaction. 104 105Eleven different conferences are included in \.{games.dat}. Utility 106field |z.S| of a vertex is set to the name of a team's conference, or to |NULL| 107if that team is independent. (Exactly 24 of the I-A football teams 108were independent in 1990.) Two teams |u| and |v| belong to the same 109conference if and only if |u->conference==v->conference| and 110|u->conference!=NULL|. 111 112@d conference z.S 113 114@ Each team has a nickname, which is recorded in utility field |y.S|. 115For example, Georgia Tech's team is called the Yellow Jackets. 116Six teams (Auburn, Clemson, Memphis State, Missouri, Pacific, and 117Princeton) are called the Tigers, and five teams 118(Fresno State, Georgia, Louisiana Tech, Mississippi State, 119Yale) are called the Bulldogs. But most of the teams have a unique 120nickname, and 94 distinct nicknames exist. 121 122A shorthand code for team names is also provided, in the |abbr| field. 123 124@d nickname y.S 125@d abbr x.S 126 127@ If |a| points to an arc from |u| to |v|, utility field |a->a.I| contains 128the value 3 if |u| was the home team, 1 if |v| was the home team, and 2 if both 129teams played on neutral territory. The date of that game, represented 130as a integer number of days after August~26, 1990, appears in utility 131field |a->b.I|. The arcs in each vertex list |v->arcs| appear in reverse order 132of their dates: last game first and first game last. 133 134@d HOME 1 135@d NEUTRAL 2 /* this value is halfway between |HOME| and |AWAY| */ 136@d AWAY 3 137@d venue a.I 138@d date b.I 139 140@(gb_games.h@>= 141#define ap @[u.I@] /* repeat the definitions in the header file */ 142#define upi @[v.I@] 143#define abbr @[x.S@] 144#define nickname @[y.S@] 145#define conference @[z.S@] 146#define HOME 1 147#define NEUTRAL 2 148#define AWAY 3 149#define venue @[a.I@] 150#define date @[b.I@] 151 152@ If the |games| routine encounters a problem, it returns |NULL| 153(\.{NULL}), after putting a code number into the external variable 154|panic_code|. This code number identifies the type of failure. 155Otherwise |games| returns a pointer to the newly created graph, which 156will be represented with the data structures explained in {\sc GB\_\,GRAPH}. 157(The external variable |panic_code| is itself defined in {\sc GB\_\,GRAPH}.) 158 159@d panic(c) @+{@+panic_code=c;@+gb_trouble_code=0;@+return NULL;@+} 160 161@ The \CEE/ file \.{gb\_games.c} has the following overall shape: 162 163@p 164#include "gb_io.h" /* we will use the {\sc GB\_\,IO} routines for input */ 165#include "gb_flip.h" 166 /* we will use the {\sc GB\_\,FLIP} routines for random numbers */ 167#include "gb_graph.h" /* we will use the {\sc GB\_\,GRAPH} data structures */ 168#include "gb_sort.h" /* and |gb_linksort| for sorting */ 169@h@# 170@<Type declarations@>@; 171@<Private variables@>@; 172@<Private functions@>@; 173@# 174Graph *games(n,ap0_weight,upi0_weight,ap1_weight,upi1_weight, 175 first_day,last_day,seed) 176 unsigned long n; /* number of vertices desired */ 177 long ap0_weight; /* coefficient of |ap0| in the weight function */ 178 long ap1_weight; /* coefficient of |ap1| in the weight function */ 179 long upi0_weight; /* coefficient of |upi0| in the weight function */ 180 long upi1_weight; /* coefficient of |upi1| in the weight function */ 181 long first_day; /* lower cutoff for games to be considered */ 182 long last_day; /* upper cutoff for games to be considered */ 183 long seed; /* random number seed */ 184{@+@<Local variables@>@;@# 185 gb_init_rand(seed); 186 @<Check that the parameters are valid@>; 187 @<Set up a graph with |n| vertices@>; 188 @<Read the first part of \.{games.dat} and compute team weights@>; 189 @<Determine the |n| teams to use in the graph@>; 190 @<Put the appropriate edges into the graph@>; 191 if (gb_close()!=0) 192 panic(late_data_fault); 193 /* something's wrong with |"games.dat"|; see |io_errors| */ 194 gb_free(working_storage); 195 if (gb_trouble_code) { 196 gb_recycle(new_graph); 197 panic(alloc_fault); /* oops, we ran out of memory somewhere back there */ 198 } 199 return new_graph; 200} 201 202@ @<Local var...@>= 203Graph *new_graph; /* the graph constructed by |games| */ 204register long j,k; /* all-purpose indices */ 205 206@ @<Check that the parameters are valid@>= 207if (n==0 || n>MAX_N) n=MAX_N; 208if (ap0_weight>MAX_WEIGHT || ap0_weight<-MAX_WEIGHT || 209 upi0_weight>MAX_WEIGHT || upi0_weight<-MAX_WEIGHT ||@| 210 ap1_weight>MAX_WEIGHT || ap1_weight<-MAX_WEIGHT || 211 upi1_weight>MAX_WEIGHT || upi1_weight<-MAX_WEIGHT) 212 panic(bad_specs); /* the magnitude of at least one weight is too big */ 213if (first_day<0) first_day=0; 214if (last_day==0 || last_day>MAX_DAY) last_day=MAX_DAY; 215 216@ @<Set up a graph with |n| vertices@>= 217new_graph=gb_new_graph(n); 218if (new_graph==NULL) 219 panic(no_room); /* out of memory before we're even started */ 220sprintf(new_graph->id,"games(%lu,%ld,%ld,%ld,%ld,%ld,%ld,%ld)", 221 n,ap0_weight,upi0_weight,ap1_weight,upi1_weight,first_day,last_day,seed); 222strcpy(new_graph->util_types,"IIZSSSIIZZZZZZ"); 223 224@* Vertices. 225As we read in the data, we construct a list of nodes, each of which contains 226a team's name, nickname, conference, and weight. After this list 227has been sorted by weight, the top |n| entries will be the vertices of the 228new graph. 229 230@<Type decl...@>= 231typedef struct node_struct { /* records to be sorted by |gb_linksort| */ 232 long key; /* the nonnegative sort key (weight plus $2^{30}$) */ 233 struct node_struct *link; /* pointer to next record */ 234 char name[24]; /* |"College Name"| */ 235 char nick[22]; /* |"Team Nickname"| */ 236 char abb[6]; /* |"ABBR"| */ 237 long a0,u0,a1,u1; /* team scores in press polls */ 238 char *conf; /* pointer to conference name */ 239 struct node_struct *hash_link; /* pointer to next \.{ABBR} in hash list */ 240 Vertex *vert; /* vertex corresponding to this team */ 241} node; 242 243@ The data in \.{games.dat} appears in two parts. The first 120 lines 244have the form 245$$\hbox{\tt ABBR College Name(Team Nickname)Conference;a0,u0;a1,u1}$$ 246and they give basic information about the teams. An internal abbreviation code 247\.{ABBR} is used to identify each team in the second part of the data. 248 249The second part presents scores of the games, and it 250contains two kinds of lines. If the first character of a line is 251`\.>', it means ``change the current date,'' and the remaining 252characters specify a date as a one-letter month code followed by the day 253of the month. Otherwise the line gives scores of a game, using the 254\.{ABBR} codes for two teams. The scores are separated by `\.@@' if 255the second team was the home team and by `\.,' if both teams were on 256neutral territory. 257 258For example, two games were played on December 8, namely the annual Army-Navy 259game and the California Raisin Bowl game. These are recorded in three lines 260of \.{games.dat} as follows: 261$$\vbox{\halign{\tt#\hfil\cr 262>D8\cr 263NAVY20@@ARMY30\cr 264SJSU48,CMICH24\cr}}$$ 265We deduce that Navy played at Army's home stadium, losing 20 to~30; 266moreover, San Jos\'e State played Central Michigan on neutral territory and 267won, 48 to~24. (The California Raisin Bowl is traditionally a playoff between 268the champions of the Big West and Mid-American conferences.) 269 270@ In order to map \.{ABBR} codes to team names, we use a simple 271hash coding scheme. Two abbreviations with the same hash address are 272linked together via the |hash_link| address in their node. 273 274The constants defined here are taken from the specific data in \.{games.dat}, 275because this routine is not intended to be perfectly general. 276 277@d HASH_PRIME 1009 278 279@<Private v...@>= 280static long ma0=1451,mu0=666,ma1=1475,mu1=847; 281 /* maximum poll values in the data */ 282static node *node_block; /* array of nodes holding team info */ 283static node **hash_block; /* array of heads of hash code lists */ 284static Area working_storage; /* memory needed only while |games| is working */ 285static char **conf_block; /* array of conference names */ 286static long m; /* the number of conference names known so far */ 287 288@ @<Read the first part of \.{games.dat} and compute team weights@>= 289node_block=gb_typed_alloc(MAX_N+2,node,working_storage); 290 /* leave room for string overflow */ 291hash_block=gb_typed_alloc(HASH_PRIME,node*,working_storage); 292conf_block=gb_typed_alloc(MAX_N,char*,working_storage); 293m=0; 294if (gb_trouble_code) { 295 gb_free(working_storage); 296 panic(no_room+1); /* nowhere to copy the data */ 297} 298if (gb_open("games.dat")!=0) 299 panic(early_data_fault); /* couldn't open |"games.dat"| using 300 GraphBase conventions; |io_errors| tells why */ 301for (k=0; k<MAX_N; k++) @<Read and store data for team |k|@>; 302 303@ @<Read and store...@>= 304{@+register node *p; 305 register char *q; 306 p=node_block+k; 307 if (k) p->link=p-1; 308 q=gb_string(p->abb,' '); 309 if (q>&p->abb[6] || gb_char()!=' ') 310 panic(syntax_error); /* out of sync in \.{games.dat} */ 311 @<Enter |p->abb| in the hash table@>; 312 q=gb_string(p->name,'('); 313 if (q>&p->name[24] || gb_char()!='(') 314 panic(syntax_error+1); /* team name too long */ 315 q=gb_string(p->nick,')'); 316 if (q>&p->nick[22] || gb_char()!=')') 317 panic(syntax_error+2); /* team nickname too long */ 318 @<Read the conference name for |p|@>; 319 @<Read the press poll scores for |p| and compute |p->key|@>; 320 gb_newline(); 321} 322 323@ @<Enter |p->abb| in the hash table@>= 324{@+long h=0; /* the hash code */ 325 for (q=p->abb;*q;q++) 326 h=(h+h+*q)%HASH_PRIME; 327 p->hash_link=hash_block[h]; 328 hash_block[h]=p; 329} 330 331@ @<Read the conference name for |p|@>= 332{ 333 gb_string(str_buf,';'); 334 if (gb_char()!=';') panic(syntax_error+3); /* conference name clobbered */ 335 if (strcmp(str_buf,"Independent")!=0) { 336 for (j=0;j<m;j++) 337 if (strcmp(str_buf,conf_block[j])==0) goto found; 338 conf_block[m++]=gb_save_string(str_buf); 339 found:p->conf=conf_block[j]; 340 } 341} 342 343@ The key value computed here will be between 0 and~$2^{31}$, because of 344the bound we've imposed on the weight parameters. 345 346@<Read the press poll scores for |p| and compute |p->key|@>= 347p->a0=gb_number(10); 348if (p->a0>ma0 || gb_char()!=',') panic(syntax_error+4); 349 /* first AP score clobbered */ 350p->u0=gb_number(10); 351if (p->u0>mu0 || gb_char()!=';') panic(syntax_error+5); 352 /* first UPI score clobbered */ 353p->a1=gb_number(10); 354if (p->a1>ma1 || gb_char()!=',') panic(syntax_error+6); 355 /* second AP score clobbered */ 356p->u1=gb_number(10); 357if (p->u1>mu1 || gb_char()!='\n') panic(syntax_error+7); 358 /* second UPI score clobbered */ 359p->key=ap0_weight*(p->a0)+upi0_weight*(p->u0) 360 +ap1_weight*(p->a1)+upi1_weight*(p->u1)+0x40000000; 361 362@ Once all the nodes have been set up, we can use the |gb_linksort| 363routine to sort them into the desired order. It builds 128 364lists from which the desired nodes are readily accessed in decreasing 365order of weight, using random numbers to break ties. 366 367We set the abbreviation code to zero in every team that isn't chosen. Then 368games involving that team will be excluded when edges are generated below. 369 370@<Determine the |n| teams to use in the graph@>= 371{@+register node *p; /* the current node being considered */ 372 register Vertex *v=new_graph->vertices; /* the next vertex to use */ 373 gb_linksort(node_block+MAX_N-1); 374 for (j=127; j>=0; j--) 375 for (p=(node*)gb_sorted[j]; p; p=p->link) { 376 if (v<new_graph->vertices+n) @<Add team |p| to the graph@>@; 377 else p->abb[0]='\0'; /* this team is not being used */ 378 } 379} 380 381@ @<Add team |p| to the graph@>= 382{ 383 v->ap=((long)(p->a0)<<16)+p->a1; 384 v->upi=((long)(p->u0)<<16)+p->u1; 385 v->abbr=gb_save_string(p->abb); 386 v->nickname=gb_save_string(p->nick); 387 v->conference=p->conf; 388 v->name=gb_save_string(p->name); 389 p->vert=v++; 390} 391 392@* Arcs. 393Finally, we read through the rest of \.{games.dat}, adding a pair of 394arcs for each game that belongs to the selected time interval 395and was played by two of the selected teams. 396 397@<Put the appropriate edges into the graph@>= 398{@+register Vertex *u,*v; 399 register long today=0; /* current day of play */ 400 long su,sv; /* points scored by each team */ 401 long ven; /* |HOME| if |v| is home team, |NEUTRAL| if on neutral ground */ 402 while (!gb_eof()) { 403 if (gb_char()=='>') @<Change the current date@>@; 404 else gb_backup(); 405 u=team_lookup(); 406 su=gb_number(10); 407 ven=gb_char(); 408 if (ven=='@@') ven=HOME; 409 else if (ven==',') ven=NEUTRAL; 410 else panic(syntax_error+8); /* bad syntax in game score line */ 411 v=team_lookup(); 412 sv=gb_number(10); 413 if (gb_char()!='\n') panic(syntax_error+9); 414 /* bad syntax in game score line */ 415 if (u!=NULL && v!=NULL && today>=first_day && today<=last_day) 416 @<Enter a new edge@>; 417 gb_newline(); 418 } 419} 420 421@ @<Change the current...@>= 422{@+register char c=gb_char(); /* month code */ 423 register long d; /* day of football season */ 424 switch(c) { 425 case 'A': d=-26;@+break; /* August */ 426 case 'S': d=5;@+break; /* thirty days hath September */ 427 case 'O': d=35;@+break; /* October */ 428 case 'N': d=66;@+break; /* November */ 429 case 'D': d=96;@+break; /* December */ 430 case 'J': d=127;@+break; /* January */ 431 default: d=1000; 432 } 433 d+=gb_number(10); 434 if (d<0 || d>MAX_DAY) panic(syntax_error-1); /* date was clobbered */ 435 today=d; 436 gb_newline(); /* now ready to read a non-date line */ 437} 438 439@ @<Private f...@>= 440static Vertex *team_lookup() /* read and decode an abbreviation */ 441{@+register char *q=str_buf; /* position in |str_buf| */ 442 register long h=0; /* hash code */ 443 register node *p; /* position in hash list */ 444 while (gb_digit(10)<0) { 445 *q=gb_char(); 446 h=(h+h+*q)%HASH_PRIME; 447 q++; 448 } 449 gb_backup(); /* prepare to re-scan the digit following the abbreviation */ 450 *q='\0'; /* null-terminate the abbreviation just scanned */ 451 for (p=hash_block[h];p;p=p->hash_link) 452 if (strcmp(p->abb,str_buf)==0) return p->vert; 453 return NULL; /* not found */ 454} 455 456@ We retain the convention of {\sc GB\_\,GRAPH} that the arc from |v| to |u| 457appears immediately after a matching arc from |u| to |v| when |u<v|. 458 459@<Enter a new edge@>= 460{@+register Arc *a; 461 if (u>v) {@+register Vertex *w; register long sw; 462 w=u;@+u=v;@+v=w; 463 sw=su;@+su=sv;@+sv=sw; 464 ven=HOME+AWAY-ven; 465 } 466 gb_new_arc(u,v,su); 467 gb_new_arc(v,u,sv); 468 a=u->arcs; /* a pointer to the new arc */ 469 if (v->arcs!=a+1) panic (impossible+9); /* can't happen */ 470 a->venue=ven;@+(a+1)->venue=HOME+AWAY-ven; 471 a->date=(a+1)->date=today; 472} 473 474@* Index. As usual, we close with an index that 475shows where the identifiers of \\{gb\_games} are defined and used. 476