1% This file is part of the Stanford GraphBase (c) Stanford University 1993
2@i boilerplate.w %<< legal stuff: PLEASE READ IT BEFORE MAKING ANY CHANGES!
3@i gb_types.w
4
5\def\title{GB\_\,MILES}
6
7\prerequisites{GB\_\,GRAPH}{GB\_\,IO}
8@* Introduction. This GraphBase module contains the |miles| subroutine,
9which creates a family of undirected graphs based on highway mileage data
10between North American cities. Examples of the use of this procedure can be
11found in the demo programs {\sc MILES\_\,SPAN} and {\sc GB\_\,PLANE}.
12
13@(gb_miles.h@>=
14extern Graph *miles();
15
16@ The subroutine call {\advance\thinmuskip 0mu plus 4mu
17|miles(n,north_weight,west_weight,pop_weight,max_distance,max_degree,seed)|}
18constructs a graph based on the information in \.{miles.dat}.
19Each vertex of the graph corresponds to one of the 128 cities whose
20name is alphabetically greater than or equal to `Ravenna, Ohio' in
21the 1949 edition of Rand McNally {\char`\&} Company's {\sl Standard Highway
22Mileage Guide}. Edges between vertices are assigned lengths representing
23distances between cities, in miles. In most cases these mileages come
24from the Rand McNally Guide, but several dozen entries needed to be changed
25drastically because they were obviously too large or too small; in such cases
26an educated guess was made. Furthermore, about 5\% of the entries were
27adjusted slightly in order to
28ensure that all distances satisfy the ``triangle inequality'': The
29graph generated by |miles| has the property that the
30distance from |u| to~|v| plus the distance from |v| to~|w| always exceeds
31or equals the distance from |u| to~|w|.
32
33The constructed graph will have $\min(n,128)$ vertices; the default value
34|n=128| is substituted if |n=0|. If |n| is less
35than 128, the |n| cities will be selected by assigning a weight to
36each city and choosing the |n| with largest weight, using random
37numbers to break ties in case of equal weights. Weights are computed
38by the formula
39$$ |north_weight|\cdot|lat|+|west_weight|\cdot|lon|+|pop_weight|\cdot|pop|, $$
40where |lat| is latitude north of the equator, |lon| is longitude
41west of Greenwich, and |pop| is the population in 1980. Both |lat| and |lon|
42are given in ``centidegrees'' (hundredths of degrees). For example,
43San Francisco has |lat=3778|, |lon=12242|, and |pop=678974|;
44this means that, before the recent earthquake, it was located at
45$37.78^\circ$ north latitude and $122.42^\circ$ west longitude, and that it had
46678,974 residents in the 1980 census. The weight parameters must satisfy
47$$ \vert|north_weight|\vert\le100{,}000,\quad
48   \vert|west_weight|\vert\le100{,}000,\quad
49   \vert|pop_weight|\vert\le100.$$
50
51The constructed graph will be ``complete''---that is, it will have
52edges between every pair of vertices---unless special values are given to
53the parameters
54|max_distance| or |max_degree|. If |max_distance!=0|, edges with more
55than |max_distance| miles will not appear; if |max_degree!=0|, each
56vertex will be limited to at most |max_degree| of its shortest edges.
57
58Vertices of the graph will appear in order of decreasing weight.
59The |seed| parameter defines the pseudo-random numbers used wherever
60a ``random'' choice between equal-weight vertices or equal-length edges
61needs to be made.
62
63@d MAX_N 128
64
65@(gb_miles.h@>=
66#define MAX_N 128 /* maximum and default number of cities */
67
68@ Examples: The call |miles(100,0,0,1,0,0,0)| will construct a
69complete graph on 100 vertices, representing the 100 most populous
70cities in the database.  It turns out that San Diego, with a
71population of 875,538, is the winning city by this criterion, followed
72by San Antonio (population 786,023), San Francisco (678,974), and
73Washington D.C. (638,432).
74
75To get |n| cities in the western United States and Canada, you can say
76$|miles|(n,0,1,0,\ldots\,)$; to get |n| cities in the Northeast, use a
77call like $|miles|(n,1,-1,0,\ldots\,)$. A parameter setting like
78$(50,-500,0,1,\ldots\,)$ produces mostly Southern cities, except for a
79few large metropolises in the north.
80
81If you ask for |miles(n,a,b,c,0,1,0)|, you get an edge between cities if
82and only if each city is the nearest to the other, among the |n| cities
83selected. (The graph is always undirected: There is an arc from |u| to~|v|
84if and only if there's an arc of the same length from |v| to~|u|.)
85
86A random selection of cities can be obtained by calling
87|miles(n,0,0,0,m,d,s)|.  Different choices of the seed number |s| will
88produce different selections, in a system-independent manner;
89identical results will be obtained on all computers when identical
90parameters have been specified.  Equivalent experiments on algorithms
91for graph manipulation can therefore be performed by researchers in
92different parts of the world. Any value of |s| between 0 and
93$2^{31}-1$ is permissible.
94
95@ If the |miles| routine encounters a problem, it returns |NULL|
96(\.{NULL}), after putting a code number into the external variable
97|panic_code|. This code number identifies the type of failure.
98Otherwise |miles| returns a pointer to the newly created graph, which
99will be represented with the data structures explained in {\sc GB\_\,GRAPH}.
100(The external variable |panic_code| is itself defined in {\sc GB\_\,GRAPH}.)
101
102@d panic(c) @+{@+panic_code=c;@+gb_trouble_code=0;@+return NULL;@+}
103
104@ The \CEE/ file \.{gb\_miles.c} has the following overall shape:
105
106@p
107#include "gb_io.h" /* we will use the {\sc GB\_\,IO} routines for input */
108#include "gb_flip.h"
109 /* we will use the {\sc GB\_\,FLIP} routines for random numbers */
110#include "gb_graph.h" /* we will use the {\sc GB\_\,GRAPH} data structures */
111#include "gb_sort.h" /* and the linksort routine */
112@h@#
113@<Type declarations@>@;
114@<Private variables@>@;
115@#
116Graph *miles(n,north_weight,west_weight,pop_weight,
117    max_distance,max_degree,seed)
118  unsigned long n; /* number of vertices desired */
119  long north_weight; /* coefficient of latitude in the weight function */
120  long west_weight; /* coefficient of longitude in the weight function */
121  long pop_weight; /* coefficient of population in the weight function */
122  unsigned long max_distance; /* maximum distance in an edge, if nonzero */
123  unsigned long max_degree;
124       /* maximum number of edges per vertex, if nonzero */
125  long seed; /* random number seed */
126{@+@<Local variables@>@;@#
127  gb_init_rand(seed);
128  @<Check that the parameters are valid@>;
129  @<Set up a graph with |n| vertices@>;
130  @<Read the data file \.{miles.dat} and compute city weights@>;
131  @<Determine the |n| cities to use in the graph@>;
132  @<Put the appropriate edges into the graph@>;
133  if (gb_trouble_code) {
134    gb_recycle(new_graph);
135    panic(alloc_fault); /* oops, we ran out of memory somewhere back there */
136  }
137  return new_graph;
138}
139
140@ @<Local var...@>=
141Graph *new_graph; /* the graph constructed by |miles| */
142register long j,k; /* all-purpose indices */
143
144@ @<Check that the parameters are valid@>=
145if (n==0 || n>MAX_N) n=MAX_N;
146if (max_degree==0 || max_degree>=n) max_degree=n-1;
147if (north_weight>100000 || west_weight>100000 || pop_weight>100 @|
148 || north_weight<-100000 || west_weight<-100000 || pop_weight<-100)
149  panic(bad_specs); /* the magnitude of at least one weight is too big */
150
151@ @<Set up a graph with |n| vertices@>=
152new_graph=gb_new_graph(n);
153if (new_graph==NULL)
154  panic(no_room); /* out of memory before we're even started */
155sprintf(new_graph->id,"miles(%lu,%ld,%ld,%ld,%lu,%lu,%ld)",
156  n,north_weight,west_weight,pop_weight,max_distance,max_degree,seed);
157strcpy(new_graph->util_types,"ZZIIIIZZZZZZZZ");
158
159@* Vertices.  As we read in the data, we construct a list of nodes,
160each of which contains a city's name, latitude, longitude, population,
161and weight. These nodes conform to the specifications stipulated in
162the {\sc GB\_\,SORT} module. After the list has been sorted by weight, the
163top |n| entries will be the vertices of the new graph.
164
165@<Type decl...@>=
166typedef struct node_struct { /* records to be sorted by |gb_linksort| */
167  long key; /* the nonnegative sort key (weight plus $2^{30}$) */
168  struct node_struct *link; /* pointer to next record */
169  long kk; /* index of city in the original database */
170  long lat,lon,pop; /* latitude, longitude, population */
171  char name[30]; /* |"City Name, ST"| */
172} node;
173
174@ The constants defined here are taken from the specific data in \.{miles.dat},
175because this routine is not intended to be perfectly general.
176
177@<Private...@>=
178static long min_lat=2672, max_lat=5042, min_lon=7180, max_lon=12312,
179 min_pop=2521, max_pop=875538; /* tight bounds on data entries */
180static node *node_block; /* array of nodes holding city info */
181static long *distance; /* array of distances */
182
183@ The data in \.{miles.dat} appears in 128 groups of lines, one for each
184city, in reverse alphabetical order. These groups have the general form
185$$\vcenter{\halign{\tt#\hfil\cr
186City Name, ST[lat,lon]pop\cr
187d1 d2 d3 d4 d5 d6 ... (possibly several lines' worth)\cr
188}}$$
189where \.{City Name} is the name of the city (possibly including spaces);
190\.{ST} is the two-letter state code; \.{lat} and \.{lon} are latitude
191and longitude in hundredths of degrees; \.{pop} is the population; and
192the remaining numbers \.{d1}, \.{d2}, \dots\ are distances to the
193previously named cities in reverse order. Each distance is separated
194from the previous item by either a blank space or a newline character.
195For example, the line
196$$\hbox{\tt San Francisco, CA[3778,12242]678974}$$
197specifies the data about San Francisco that was mentioned earlier.
198From the first few groups
199$$\vcenter{\halign{\tt#\hfil\cr
200Youngstown, OH[4110,8065]115436\cr
201Yankton, SD[4288,9739]12011\cr
202966\cr
203Yakima, WA[4660,12051]49826\cr
2041513 2410\cr
205Worcester, MA[4227,7180]161799\cr
2062964 1520 604\cr
207}}$$
208we learn that the distance from Worcester, Massachusetts, to Yakima,
209Washington, is 2964 miles; from Worcester to Youngstown it is 604 miles.
210
211The following two-letter ``state codes'' are used for Canadian provinces:
212$\.{BC}=\null$British Columbia,
213$\.{MB}=\null$Manitoba,
214$\.{ON}=\null$Ontario,
215$\.{SK}=\null$Saskatchewan.
216
217@<Read the data file \.{miles.dat} and compute city weights@>=
218node_block=gb_typed_alloc(MAX_N,node,new_graph->aux_data);
219distance=gb_typed_alloc(MAX_N*MAX_N,long,new_graph->aux_data);
220if (gb_trouble_code) {
221  gb_free(new_graph->aux_data);
222  panic(no_room+1); /* no room to copy the data */
223}
224if (gb_open("miles.dat")!=0)
225  panic(early_data_fault);
226    /* couldn't open |"miles.dat"| using GraphBase conventions;
227                 |io_errors| tells why */
228for (k=MAX_N-1; k>=0; k--) @<Read and store data for city |k|@>;
229if (gb_close()!=0)
230  panic(late_data_fault);
231    /* something's wrong with |"miles.dat"|; see |io_errors| */
232
233@ The bounds we've imposed on |north_weight|, |west_weight|, and |pop_weight|
234guarantee that the key value computed here will be between 0 and~$2^{31}$.
235
236@<Read and store...@>=
237{@+register node *p;
238  p=node_block+k;
239  p->kk=k;
240  if (k) p->link=p-1;
241  gb_string(p->name,'[');
242  if (gb_char()!='[') panic(syntax_error); /* out of sync in \.{miles.dat} */
243  p->lat=gb_number(10);
244  if (p->lat<min_lat || p->lat>max_lat || gb_char()!=',')
245    panic(syntax_error+1); /* latitude data was clobbered */
246  p->lon=gb_number(10);
247  if (p->lon<min_lon || p->lon>max_lon || gb_char()!=']')
248    panic(syntax_error+2); /* longitude data was clobbered */
249  p->pop=gb_number(10);
250  if (p->pop<min_pop || p->pop>max_pop)
251    panic(syntax_error+3); /* population data was clobbered */
252  p->key=north_weight*(p->lat-min_lat)
253   +west_weight*(p->lon-min_lon)
254   +pop_weight*(p->pop-min_pop)+0x40000000;
255  @<Read the mileage data for city |k|@>;
256  gb_newline();
257}
258
259@ @d d(j,k) *(distance+(MAX_N*j+k))
260
261@<Read the mileage...@>=
262{
263  for (j=k+1; j<MAX_N; j++) {
264    if (gb_char()!=' ')
265      gb_newline();
266    d(j,k)=d(k,j)=gb_number(10);
267  }
268}
269
270@ Once all the nodes have been set up, we can use the |gb_linksort| routine
271to sort them into the desired order. This routine, which is part of
272the \\{GB\_\,SORT} module, builds 128 lists from which the desired nodes
273are readily accessed in decreasing order of weight, using random numbers
274to break ties.
275
276We set the population to zero in every city that isn't chosen. Then
277that city will be excluded when edges are examined later.
278
279@<Determine the |n| cities to use in the graph@>=
280{@+register node *p; /* the current node being considered */
281  register Vertex *v=new_graph->vertices; /* the first unfilled vertex */
282  gb_linksort(node_block+MAX_N-1);
283  for (j=127; j>=0; j--)
284    for (p=(node*)gb_sorted[j]; p; p=p->link) {
285      if (v<new_graph->vertices+n) @<Add city |p->kk| to the graph@>@;
286      else p->pop=0; /* this city is not being used */
287    }
288}
289
290@ Utility fields |x| and |y| for each vertex are set to coordinates that
291can be used in geometric computations; these coordinates are obtained by
292simple linear transformations of latitude and longitude (not by any
293kind of sophisticated polyconic projection). We will have
294$$0\le x\le5132, \qquad 0\le y\le 3555.$$
295Utility field~|z| is set to the city's index number (0 to 127) in the
296original database. Utility field~|w| is set to the city's population.
297
298The coordinates computed here are compatible with those in the \TEX/ file
299\.{cities.texmap}. Users might want to incorporate edited copies of that file
300into documents that display results obtained with |miles| graphs.
301@.cities.texmap@>
302
303@d x_coord x.I
304@d y_coord y.I
305@d index_no z.I
306@d people w.I
307
308@<Add city |p->kk| to the graph@>=
309{
310  v->x_coord=max_lon-p->lon; /* |x| coordinate is complement of longitude */
311  v->y_coord=p->lat-min_lat;
312  v->y_coord+=(v->y_coord)>>1; /* |y| coordinate is 1.5 times latitude */
313  v->index_no=p->kk;
314  v->people=p->pop;
315  v->name=gb_save_string(p->name);
316  v++;
317}
318
319@ @(gb_miles.h@>=
320#define x_coord @t\quad@> x.I
321 /* utility field definitions for the header file */
322#define y_coord @t\quad@> y.I
323#define index_no @t\quad@> z.I
324#define people @t\quad@> w.I
325
326@* Arcs.  We make the distance negative in the matrix entry for an arc
327that is not to be included.  Nothing needs to be done in this regard
328unless the user has specified a maximum degree or a maximum edge length.
329
330@<Put the approp...@>=
331if (max_distance>0 || max_degree>0)
332  @<Prune unwanted edges by negating their distances@>;
333{@+register Vertex *u,*v;
334  for (u=new_graph->vertices;u<new_graph->vertices+n;u++) {
335    j=u->index_no;
336    for (v=u+1;v<new_graph->vertices+n;v++) {
337      k=v->index_no;
338      if (d(j,k)>0 && d(k,j)>0)
339        gb_new_edge(u,v,d(j,k));
340    }
341  }
342}
343
344@ @<Prune...@>=
345{@+register node *p;
346  if (max_degree==0) max_degree=MAX_N;
347  if (max_distance==0) max_distance=30000;
348  for (p=node_block; p<node_block+MAX_N; p++)
349    if (p->pop) { /* this city not deleted */
350      k=p->kk;
351      @<Blank out all undesired edges from city |k|@>;
352    }
353}
354
355@ Here we reuse the key fields of the nodes, storing complementary distances
356there instead of weights. We also let the sorting routine change the
357link fields. The other fields, however---especially |pop|---remain
358unchanged. Yes, the author knows this is a wee bit tricky, but why~not?
359
360@<Blank...@>=
361{@+register node *q;
362  register node*s=NULL; /* list of nodes containing edges from city |k| */
363  for (q=node_block; q<node_block+MAX_N; q++)
364    if (q->pop && q!=p) { /* another city not deleted */
365      j=d(k,q->kk); /* distance from |p| to |q| */
366      if (j>max_distance)
367        d(k,q->kk)=-j;
368      else {
369        q->key=max_distance-j;
370        q->link=s;
371        s=q;
372      }
373    }
374  gb_linksort(s);
375  /* now all the surviving edges from |p| are in the list |gb_sorted[0]| */
376  j=0; /* |j| counts how many edges have been accepted */
377  for (q=(node*)gb_sorted[0]; q; q=q->link)
378    if (++j>max_degree)
379      d(k,q->kk)=-d(k,q->kk);
380}
381
382@ Random access to the distance matrix is provided to users via
383the external function |miles_distance|. Caution: This function can be
384used only with the graph most recently made by |miles|, and only when
385the graph's |aux_data| has not been recycled, and only when the
386|z| utility fields have not been used for another purpose.
387
388The result might be negative when an edge has been suppressed. Moreover,
389we can in fact have |miles_distance(u,v)<0| when |miles_distance(v,u)>0|,
390if the distance in question was suppressed by the |max_degree| constraint
391on~|u| but not on~|v|.
392
393@p long miles_distance(u,v)
394  Vertex *u,*v;
395{
396  return d(u->index_no,v->index_no);
397}
398
399@ @(gb_miles.h@>=
400extern long miles_distance();
401
402@* Index. As usual, we close with an index that
403shows where the identifiers of \\{gb\_miles} are defined and used.
404