1#!/usr/local/bin/perl -w
2
3#
4# TODO:
5#
6# Apply oid filters on table2
7#
8
9use Pg;
10
11$VERBOSE = 0;
12
13sub usage
14{
15	local($me) = `basename $0`;
16	chop($me);
17	print STDERR "$me [-v] [-vacuum] <table1> <table2> [<col1>] [<col2>]\n";
18}
19
20$TABLE1='';
21$TABLE2='';
22$COLUMN1='';
23$COLUMN2='';
24for ($i=0; $i<@ARGV; $i++)
25{
26	if ( $ARGV[$i] =~ m/^-/ )
27	{
28		if ( $ARGV[$i] eq '-v' )
29		{
30			$VERBOSE++;
31		}
32		elsif ( $ARGV[$i] eq '-vacuum' )
33		{
34			$VACUUM=1;
35		}
36		else
37		{
38			print STDERR "Unknown option $ARGV[$i]:\n";
39			usage();
40			exit(1);
41		}
42	}
43	elsif ( ! $TABLE1 )
44	{
45		$TABLE1 = $ARGV[$i];
46	}
47	elsif ( ! $TABLE2 )
48	{
49		$TABLE2 = $ARGV[$i];
50	}
51	elsif ( ! $COLUMN1 )
52	{
53		$COLUMN1 = $ARGV[$i];
54	}
55	elsif ( ! $COLUMN2 )
56	{
57		$COLUMN2 = $ARGV[$i];
58	}
59	else
60	{
61		print STDERR "Too many options:\n";
62		usage();
63		exit(1);
64	}
65}
66
67if ( ! $TABLE1 || ! $TABLE2 )
68{
69	usage();
70	exit 1;
71}
72
73
74$SCHEMA1 = 'public';
75$COLUMN1 = 'the_geom' if ( $COLUMN1 eq '' );
76if ( $TABLE1 =~ /(.*)\.(.*)/ )
77{
78	$SCHEMA1 = $1;
79	$TABLE1 = $2;
80}
81$SCHEMA2 = 'public';
82$COLUMN2 = 'the_geom' if ( $COLUMN2 eq '' );
83if ( $TABLE2 =~ /(.*)\.(.*)/ )
84{
85	$SCHEMA2 = $1;
86	$TABLE2 = $2;
87}
88
89#connect
90$conn = Pg::connectdb("");
91if ( $conn->status != PGRES_CONNECTION_OK ) {
92        print STDERR $conn->errorMessage;
93	exit(1);
94}
95
96if ( $VERBOSE )
97{
98	print "Table1: \"$SCHEMA1\".\"$TABLE1\".\"$COLUMN1\"\n";
99	print "Table2: \"$SCHEMA2\".\"$TABLE2\".\"$COLUMN2\"\n";
100}
101
102# Get geometry types
103
104#$TYPE1 = get_geometry_type($SCHEMA1, $TABLE1, $COLUMN1);
105#$TYPE2 = get_geometry_type($SCHEMA2, $TABLE2, $COLUMN2);
106
107# vacuum analyze table
108if ( $VACUUM )
109{
110	print "VACUUM ANALYZE\n";
111	vacuum($SCHEMA1, $TABLE1);
112	vacuum($SCHEMA2, $TABLE2);
113}
114
115# get number of features from pg_class.ntuples
116# (correct if vacuum have been run after last insertion/deletions)
117$NROWS1 = get_num_rows($SCHEMA1, $TABLE1);
118$NROWS2 = get_num_rows($SCHEMA2, $TABLE2);
119$TOTROWS = $NROWS1*$NROWS2;
120
121
122# Test join selectivity
123
124($est,$real) = test_join();
125$delta = $est-$real;
126$error = $delta/$TOTROWS;
127$error = int(($error)*10000)/100;
128
129#print "      Types: $TYPE1 - $TYPE2\n";
130print "       Rows: $NROWS1 x $NROWS2 = $TOTROWS\n";
131print "  Estimated: ".$est."\n";
132print "       Real: ".$real."\n";
133print "  Error: ".$error."%\n";
134
135##################################################################
136
137sub print_extent
138{
139	local($ext) = shift;
140	local($s);
141
142	$s = $ext->{'xmin'}." ".$ext->{'ymin'}."  ";
143	$s .= $ext->{'xmax'}." ".$ext->{'ymax'};
144
145	return $s;
146}
147
148sub split_extent
149{
150	local($ext) = shift;
151	local($bps) = shift;
152
153	local($width, $height, $cell_width, $cell_height);
154	local($x,$y);
155	local(@stack);
156
157	$width = $ext->{'xmax'} - $ext->{'xmin'};
158	$height = $ext->{'ymax'} - $ext->{'ymin'};
159	$cell_width = $width / $bps;
160	$cell_height = $height / $bps;
161
162	if ($VERBOSE)
163	{
164		print "cell_w: $cell_width\n";
165		print "cell_h: $cell_height\n";
166	}
167
168	@stack = ();
169	for ($x=0; $x<$bps; $x++)
170	{
171		for($y=0; $y<$bps; $y++)
172		{
173			local(%cell);
174			$cell{'xmin'} = $ext->{'xmin'}+$x*$cell_width;
175			$cell{'ymin'} = $ext->{'ymin'}+$y*$cell_height;
176			$cell{'xmax'} = $ext->{'xmin'}+($x+1)*$cell_width;
177			$cell{'ymax'} = $ext->{'ymin'}+($y+1)*$cell_height;
178			print "cell: ".print_extent(\%cell)."\n" if ($VERBOSE);
179			push(@stack, \%cell);
180		}
181	}
182	return @stack;
183}
184
185sub test_join
186{
187	local($ext) = shift;
188
189	# Test whole extent query
190	$query = 'explain analyze select count(1) from "'.
191		$SCHEMA1.'"."'.$TABLE1.'" t1, "'.$SCHEMA2.'"."'.$TABLE2.
192		'" t2 WHERE t1."'.$COLUMN1.'" && '.
193		' t2."'.$COLUMN2.'"';
194	print $query."\n";
195	$res = $conn->exec($query);
196	if ( $res->resultStatus != PGRES_TUPLES_OK )  {
197		print STDERR $conn->errorMessage;
198		exit(1);
199	}
200
201	while ( ($row=$res->fetchrow) )
202	{
203		if ( ($row =~ /.* rows=([0-9]+) .* rows=([0-9]+) /) && ! ($row =~ /.*Aggregate.*/) )
204		{
205			$est = $1;
206			$real = $2;
207			last;
208		}
209	}
210
211	return ($est,$real);
212}
213
214sub get_geometry_type
215{
216	my $schema = shift;
217	my $table = shift;
218	my $col = shift;
219	my $query = 'select distinct geometrytype("'.$col.'") from "'.$schema.'"."'.$table.'"';
220	my $res = $conn->exec($query);
221	if ( $res->resultStatus != PGRES_TUPLES_OK )  {
222		print STDERR $conn->errorMessage;
223		exit(1);
224	}
225	if ( $res->ntuples() > 1 ) {
226		print STDERR "Mixed geometry types in \"$SCHEMA1\".\"$TABLE1\".\"$COLUMN1\"\n";
227		exit(1);
228	}
229	return $res->getvalue(0, 0);
230}
231
232sub vacuum
233{
234	my $SCHEMA = shift;
235	my $TABLE = shift;
236	my $query = 'vacuum analyze "'.$SCHEMA.'"."'.$TABLE.'"';
237	my $res = $conn->exec($query);
238	if ( $res->resultStatus != PGRES_COMMAND_OK )  {
239		print STDERR $conn->errorMessage;
240		exit(1);
241	}
242}
243
244sub get_num_rows
245{
246	my $SCHEMA = shift;
247	my $TABLE = shift;
248	my $query = 'SELECT c.reltuples FROM pg_class c, pg_namespace n '.
249		"WHERE c.relnamespace = n.oid AND n.nspname = '$SCHEMA' ".
250		" AND c.relname = '$TABLE'";
251	my $res = $conn->exec($query);
252	if ( $res->resultStatus != PGRES_TUPLES_OK )  {
253		print STDERR $conn->errorMessage;
254		exit(1);
255	}
256	return $res->getvalue(0, 0);
257}
258
259#
260# $Log$
261# Revision 1.3  2005/04/18 13:50:14  strk
262# Fixed bug in table2 schema parsing.
263#
264# Revision 1.2  2004/12/23 14:48:25  strk
265# Fixed help string, and added a TODO item
266#
267# Revision 1.1  2004/12/22 17:02:17  strk
268# initial revision
269#
270# Revision 1.8  2004/03/08 17:21:57  strk
271# changed error computation code to delta/totrows
272#
273# Revision 1.7  2004/03/06 18:02:48  strk
274# Comma-separated bps values accepted
275#
276# Revision 1.6  2004/03/05 21:06:04  strk
277# Added -vacuum switch
278#
279# Revision 1.5  2004/03/05 21:03:18  strk
280# Made the -bps switch specify the exact level(s) at which to run the test
281#
282# Revision 1.4  2004/03/05 16:40:30  strk
283# rewritten split_extent to be more datatype-conservative
284#
285# Revision 1.3  2004/03/05 16:01:02  strk
286# added -bps switch to set maximun query level. reworked command line parsing
287#
288# Revision 1.2  2004/03/05 15:29:35  strk
289# more verbose output
290#
291# Revision 1.1  2004/03/05 11:52:24  strk
292# initial import
293#
294#
295