1#!/usr/local/bin/perl
2
3#
4# TODO:
5#
6# Apply oid filters on table2
7#
8
9use warnings;
10use Pg;
11
12$VERBOSE = 0;
13
14sub usage
15{
16	local($me) = `basename $0`;
17	chop($me);
18	print STDERR "$me [-v] [-vacuum] <table1> <table2> [<col1>] [<col2>]\n";
19}
20
21$TABLE1='';
22$TABLE2='';
23$COLUMN1='';
24$COLUMN2='';
25for ($i=0; $i<@ARGV; $i++)
26{
27	if ( $ARGV[$i] =~ m/^-/ )
28	{
29		if ( $ARGV[$i] eq '-v' )
30		{
31			$VERBOSE++;
32		}
33		elsif ( $ARGV[$i] eq '-vacuum' )
34		{
35			$VACUUM=1;
36		}
37		else
38		{
39			print STDERR "Unknown option $ARGV[$i]:\n";
40			usage();
41			exit(1);
42		}
43	}
44	elsif ( ! $TABLE1 )
45	{
46		$TABLE1 = $ARGV[$i];
47	}
48	elsif ( ! $TABLE2 )
49	{
50		$TABLE2 = $ARGV[$i];
51	}
52	elsif ( ! $COLUMN1 )
53	{
54		$COLUMN1 = $ARGV[$i];
55	}
56	elsif ( ! $COLUMN2 )
57	{
58		$COLUMN2 = $ARGV[$i];
59	}
60	else
61	{
62		print STDERR "Too many options:\n";
63		usage();
64		exit(1);
65	}
66}
67
68if ( ! $TABLE1 || ! $TABLE2 )
69{
70	usage();
71	exit 1;
72}
73
74
75$SCHEMA1 = 'public';
76$COLUMN1 = 'the_geom' if ( $COLUMN1 eq '' );
77if ( $TABLE1 =~ /(.*)\.(.*)/ )
78{
79	$SCHEMA1 = $1;
80	$TABLE1 = $2;
81}
82$SCHEMA2 = 'public';
83$COLUMN2 = 'the_geom' if ( $COLUMN2 eq '' );
84if ( $TABLE2 =~ /(.*)\.(.*)/ )
85{
86	$SCHEMA2 = $1;
87	$TABLE2 = $2;
88}
89
90#connect
91$conn = Pg::connectdb("");
92if ( $conn->status != PGRES_CONNECTION_OK ) {
93        print STDERR $conn->errorMessage;
94	exit(1);
95}
96
97if ( $VERBOSE )
98{
99	print "Table1: \"$SCHEMA1\".\"$TABLE1\".\"$COLUMN1\"\n";
100	print "Table2: \"$SCHEMA2\".\"$TABLE2\".\"$COLUMN2\"\n";
101}
102
103# Get geometry types
104
105$TYPE1 = get_geometry_type($SCHEMA1, $TABLE1, $COLUMN1);
106$TYPE2 = get_geometry_type($SCHEMA2, $TABLE2, $COLUMN2);
107
108# vacuum analyze table
109if ( $VACUUM )
110{
111	print "VACUUM ANALYZE\n";
112	vacuum($SCHEMA1, $TABLE1);
113	vacuum($SCHEMA2, $TABLE2);
114}
115
116# get number of features from pg_class.ntuples
117# (correct if vacuum have been run after last insertion/deletions)
118$NROWS1 = get_num_rows($SCHEMA1, $TABLE1);
119$NROWS2 = get_num_rows($SCHEMA2, $TABLE2);
120$TOTROWS = $NROWS1*$NROWS2;
121
122
123# Test join selectivity
124
125($est,$real) = test_join();
126$delta = $est-$real;
127$error = $delta/$TOTROWS;
128$error = int(($error)*10000)/100;
129
130print "      Types: $TYPE1 - $TYPE2\n";
131print "       Rows: $NROWS1 x $NROWS2 = $TOTROWS\n";
132print "  Estimated: ".$est."\n";
133print "       Real: ".$real."\n";
134print "  Error: ".$error."%\n";
135
136##################################################################
137
138sub print_extent
139{
140	local($ext) = shift;
141	local($s);
142
143	$s = $ext->{'xmin'}." ".$ext->{'ymin'}."  ";
144	$s .= $ext->{'xmax'}." ".$ext->{'ymax'};
145
146	return $s;
147}
148
149sub split_extent
150{
151	local($ext) = shift;
152	local($bps) = shift;
153
154	local($width, $height, $cell_width, $cell_height);
155	local($x,$y);
156	local(@stack);
157
158	$width = $ext->{'xmax'} - $ext->{'xmin'};
159	$height = $ext->{'ymax'} - $ext->{'ymin'};
160	$cell_width = $width / $bps;
161	$cell_height = $height / $bps;
162
163	if ($VERBOSE)
164	{
165		print "cell_w: $cell_width\n";
166		print "cell_h: $cell_height\n";
167	}
168
169	@stack = ();
170	for ($x=0; $x<$bps; $x++)
171	{
172		for($y=0; $y<$bps; $y++)
173		{
174			local(%cell);
175			$cell{'xmin'} = $ext->{'xmin'}+$x*$cell_width;
176			$cell{'ymin'} = $ext->{'ymin'}+$y*$cell_height;
177			$cell{'xmax'} = $ext->{'xmin'}+($x+1)*$cell_width;
178			$cell{'ymax'} = $ext->{'ymin'}+($y+1)*$cell_height;
179			print "cell: ".print_extent(\%cell)."\n" if ($VERBOSE);
180			push(@stack, \%cell);
181		}
182	}
183	return @stack;
184}
185
186sub test_join
187{
188	local($ext) = shift;
189
190	# Test whole extent query
191	$query = 'explain analyze select * from "'.
192		$SCHEMA1.'"."'.$TABLE1.'" t1, "'.$SCHEMA2.'"."'.$TABLE2.
193		'" t2 WHERE t1."'.$COLUMN1.'" && '.
194		' t2."'.$COLUMN2.'"';
195	print $query."\n";
196	$res = $conn->exec($query);
197	if ( $res->resultStatus != PGRES_TUPLES_OK )  {
198		print STDERR $conn->errorMessage;
199		exit(1);
200	}
201	while ( ($row=$res->fetchrow()) )
202	{
203		next unless $row =~ /.* rows=([0-9]+) .* rows=([0-9]+) /;
204		$est = $1;
205		$real = $2;
206		last;
207	}
208
209	return ($est,$real);
210}
211
212sub get_geometry_type
213{
214	my $schema = shift;
215	my $table = shift;
216	my $col = shift;
217	my $query = 'select distinct geometrytype("'.$col.'") from "'.$schema.'"."'.$table.'"';
218	my $res = $conn->exec($query);
219	if ( $res->resultStatus != PGRES_TUPLES_OK )  {
220		print STDERR $conn->errorMessage;
221		exit(1);
222	}
223	if ( $res->ntuples() > 1 ) {
224		print STDERR "WARNING: Mixed geometry types in \"$SCHEMA1\".\"$TABLE1\".\"$COLUMN1\"\n";
225	}
226	return $res->getvalue(0, 0);
227}
228
229sub vacuum
230{
231	my $SCHEMA = shift;
232	my $TABLE = shift;
233	my $query = 'vacuum analyze "'.$SCHEMA.'"."'.$TABLE.'"';
234	my $res = $conn->exec($query);
235	if ( $res->resultStatus != PGRES_COMMAND_OK )  {
236		print STDERR $conn->errorMessage;
237		exit(1);
238	}
239}
240
241sub get_num_rows
242{
243	my $SCHEMA = shift;
244	my $TABLE = shift;
245	my $query = 'SELECT c.reltuples FROM pg_class c, pg_namespace n '.
246		"WHERE c.relnamespace = n.oid AND n.nspname = '$SCHEMA' ".
247		" AND c.relname = '$TABLE'";
248	my $res = $conn->exec($query);
249	if ( $res->resultStatus != PGRES_TUPLES_OK )  {
250		print STDERR $conn->errorMessage;
251		exit(1);
252	}
253	return $res->getvalue(0, 0);
254}
255
256#
257# $Log$
258# Revision 1.3  2005/04/18 13:50:14  strk
259# Fixed bug in table2 schema parsing.
260#
261# Revision 1.2  2004/12/23 14:48:25  strk
262# Fixed help string, and added a TODO item
263#
264# Revision 1.1  2004/12/22 17:02:17  strk
265# initial revision
266#
267# Revision 1.8  2004/03/08 17:21:57  strk
268# changed error computation code to delta/totrows
269#
270# Revision 1.7  2004/03/06 18:02:48  strk
271# Comma-separated bps values accepted
272#
273# Revision 1.6  2004/03/05 21:06:04  strk
274# Added -vacuum switch
275#
276# Revision 1.5  2004/03/05 21:03:18  strk
277# Made the -bps switch specify the exact level(s) at which to run the test
278#
279# Revision 1.4  2004/03/05 16:40:30  strk
280# rewritten split_extent to be more datatype-conservative
281#
282# Revision 1.3  2004/03/05 16:01:02  strk
283# added -bps switch to set maximun query level. reworked command line parsing
284#
285# Revision 1.2  2004/03/05 15:29:35  strk
286# more verbose output
287#
288# Revision 1.1  2004/03/05 11:52:24  strk
289# initial import
290#
291#
292