1#!/usr/bin/perl -w
2
3# This is a perl front end to run dot as a web service.
4# To install, set the perl path above, and configuration paths below:
5# $Tdir, $SigCommand, $GraphvizBinDir
6
7# This script takes as an argument the URL of a dot (graph) file with
8# the name of a graphviz layout server and an output type as suffixes.
9# The argument can be passed in the PATH_INFO environment variable as
10# in a typical Apache setup, or as a command line argument for manual
11# testing.
12#
13# The server must be: dot, neato, or twopi.
14# The output type must be one of dot's output types.  The server
15# returns a layout of the requested type as an HTTP stream.  The dot
16# output type is mapped to an appropriate MIME type.
17# For example, if yourhost.company.com/unix.gv is a dot graph file, try
18# webdot.cgi http://yourhost.company.com/unix.gv.dot.ps
19# webdot.cgi http://yourhost.company.com/unix.gv.neato.gif
20# webdot.cgi http://yourhost.company.com/unix.gv.twopi.pdf
21#
22# More details:
23# PDF and EPSI files are made by postprocessors.
24#
25# The server maintains a cache directory of dot files and layouts.
26# The server always pulls the dot file, but doesn't bother with layout
27# if its cache is valid.  This is checked using $SigCommand on the dot source
28# (typically md5 or at least cksum).
29#
30# The cache should be cleaned externally, for example by a cron job.
31# When testing, remember to clobber cache entries manually as needed.
32#
33# If we thought users were going to request many layouts of the same
34# graph but in different layout formats, we might just cache the layout
35# in canonical dot format, and run neato -nop for code generation.
36#
37# The first version of this script was written in tclsh by John Ellson
38# and had some additional features for tclet integration, background
39# images, and a "Graph by Webdot" logo in each image; they are not
40# included here.
41#
42# Thanks to John Linderman for perl hacking. --Stephen North
43#
44#
45
46use strict;
47use FileHandle;
48use Fcntl ':flock';
49use File::Path qw( mkpath );
50use LWP;
51
52# bugs:
53# need to test imap, ismap, svg
54# vrml requires its own subdir?
55
56# set $Tdir to the webdot cache directory.  note that this script must have
57# write permission on the directory when it is run by your web server.
58# for example apache's default httpd.conf specifies that CGI programs such
59# as this one run as user 'nobody'.  in that case the cache directory must
60# be writable by 'nobody' - either mode 0777 or chown to nobody.
61my $Tdir = '/home/north/www/webdot/tmp';
62
63# set $GraphvizBinDir to the dot/neato/twopi standalone command directory.
64# DotFontPath shouldn't be necessary, but our graphviz installation is broken.
65my $DotFontPath = '/home/north/lib/fonts/dos/windows/fonts';
66my $GraphvizBinDir = '/home/north/arch/linux.i386/bin';
67
68# set $EPSIfilter to the script that maps Postscript into epsi.
69my $EPSIfilter = '/usr/bin/ps2epsi';
70
71# set $GS to Ghostscript - must be compiled with -sDEVICE=pdfwrite enabled!
72my $GS = '/usr/bin/gs';
73
74# set $SigCommand to the path of your signature utility.  if you don't have md5,
75# you could likely use GNU cksum or just /usr/bin/sum in a pinch.
76# my $SigCommand = '/usr/local/SSLeay-0.9.0b/bin/md5';  for www.research.att.com
77my $SigCommand = '/usr/bin/cksum';
78
79# set
80
81my %KnownTypes = (
82	dot =>    'text/vnd.graphviz',
83	gv =>     'text/vnd.graphviz',
84	xdot =>   'text/vnd.graphviz',
85	gif =>    'image/gif',
86	png =>    'image/png',
87	mif =>    'application/x-mif',
88	hpgl =>   'application/x-hpgl',
89	pcl =>    'application/x-pcl',
90	vrml =>   'x-world/x-vrml',
91	vtx =>    'application/x-vtx',
92	ps =>     'application/postscript',
93	epsi =>   'application/postscript',
94	pdf =>    'application/pdf',
95	map =>    'text/plain',
96	cmapx =>  'text/plain',
97	txt =>    'text/plain',
98	src =>    'text/plain',
99	svg =>    'image/svg+xml',
100);
101
102my %KnownServers = ( 'dot' => 1, 'neato' => 1, 'twopi' => 1, 'circo' => 1, 'fdp' => 1 );
103
104# What content type is returned.  Usually $KnownTypes{$tag},
105# but not always.
106my $ContentType = 'text/plain';
107
108# What is returned.  In good times, the results of running dot,
109# (and maybe a postprocessor), in bad times, an apologetic message.
110my $TheGoods = 'Server Error, profound apologies';
111
112# Arrange to return an error message
113sub trouble {
114    $TheGoods = shift;
115    $ContentType = 'text/plain';
116}
117
118
119sub run_under_lock {
120    my ($fh, $cmd) = @_;
121    my $rc;
122
123    flock($fh, LOCK_EX);	# Upgrade to exclusive lock
124    truncate($fh, 0);		# Make sure file is empty
125    $rc = system($cmd);		# Run command to load file
126    unless ($rc == 0) {
127	trouble("Server error: Non-zero exit $rc from $cmd\n");
128	return;
129    }
130    flock($fh, LOCK_SH);	# Downgrade to shared lock
131    return 1;
132}
133
134sub up_doc {
135    my ($base, $url, $layouter, $tag) = @_;
136    my $dotdir = "$Tdir/$layouter/$base";
137    my $dotfile = "$dotdir/source";
138    my $tagfile = "$dotdir/$tag";
139    my $dotfh = new FileHandle;
140    my $tagfh = new FileHandle;
141    my $fh = new FileHandle;
142    my ($size, $mtime, $cmd, $webdoc, $content);
143    my ($ttime, $rc);
144    my $now = time();
145    my ($oldsig, $newsig);
146
147    unless (-d $dotdir) {
148	unless (mkpath( [ $dotdir ], 0, 02775)) {
149	    trouble("Server error: Unable to make directory $dotdir: $!");
150	    return;
151	}
152    }
153    unless (open($dotfh, "+>> $dotfile")) {
154	trouble("Server error: Open failed on $dotfile: $!");
155	return;
156    }
157    flock($dotfh, LOCK_SH);
158    ($size, $mtime) = (stat($dotfh))[7,9];
159    # if($size > 0) { $oldsig = `$SigCommand $dotfile`; }
160    $oldsig = ($size > 0? `$SigCommand $dotfile` : 0);
161
162    my $browser = LWP::UserAgent->new();   ## Create a virtual browser
163    $browser->agent("Kipper Browser");     ## Name it
164    ## Do a GET request on the URL with the fake browser
165    $webdoc = $browser->request(HTTP::Request->new(GET => $url));
166    if($webdoc->is_success){ ## found it
167	$content = $webdoc->content();
168	flock($dotfh, LOCK_EX);
169	truncate($dotfh, 0);
170	print $dotfh $content;
171	$dotfh->autoflush();
172	flock($dotfh, LOCK_SH);
173	($size, $mtime) = (stat($dotfh))[7,9];
174    } else {                 ## did not find it
175	trouble("Server error: Could not find $url\n");
176	return;
177    }
178
179    ($size, $mtime) = (stat($dotfh))[7,9];
180    # if (($size == 0) || ((($now - $mtime)/(60*60)) > $SourceHours)) { }
181    unless ($size) {
182	trouble("Empty dot source\n");
183	return;
184    }
185    unless (open($tagfh, "+>> $tagfile")) {
186	trouble("Server error: Open failed on $tagfile: $!");
187	return;
188    }
189    flock($tagfh, LOCK_SH);
190    ($size, $ttime) = (stat($tagfh))[7,9];
191    $newsig = `$SigCommand $dotfile`;
192    if (($size == 0) || ($oldsig ne $newsig)) {
193	my $dottag = $tag;
194	my $tmpfile;
195	my $tmpfh;
196	if (($tag eq 'epsi') || ($tag eq 'pdf')) {
197	    $dottag = 'ps';
198	    $tmpfile = "$dotdir/ps";
199	    $tmpfh = new FileHandle;
200	    unless (open($tmpfh, "+>> $tmpfile")) {
201		trouble("Server error: Open failed on $tmpfile: $!");
202		return;
203	    }
204	} else {
205	    $tmpfile = $tagfile;
206	    $tmpfh = $tagfh;
207	}
208	$cmd = "DOTFONTPATH=\"$DotFontPath\" $GraphvizBinDir/$layouter -T$dottag < $dotfile > $tmpfile";
209	return unless (run_under_lock($tmpfh, $cmd));
210	## might have to postprocess ps into epsi or pdf
211	if ($tag eq 'epsi') {
212		$cmd = "$EPSIfilter < $tmpfile > $tagfile";
213		return unless (run_under_lock($tagfh, $cmd));
214	} elsif ($tag eq 'pdf') {
215		# need BoundingBox
216		my @box;
217		open(EPS, "<$tmpfile") or
218			trouble "webdot: Cannot open $tmpfile for reading", return;
219		while(<EPS>) {
220			if(/^%%BoundingBox:\s*(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s*$/) {
221				@box = ($1, $2, $3, $4);
222				last;
223			}
224		}
225		unless( @box ) {
226			trouble "webdot: I didn't find a valid boundingbox in $tmpfile";
227			return;
228		}
229		$cmd = "$GS -dDEVICEWIDTHPOINTS=$box[2] -dDEVICEHEIGHTPOINTS=$box[3] -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile=$tagfile $tmpfile";
230		return unless (run_under_lock($tagfh, $cmd));
231	}
232    }
233    seek($tagfh,0,0);
234    {
235	local($/);	# slurp mode
236	$TheGoods = <$tagfh>;
237    }
238    1;
239}
240
241
242sub get_dot {
243    my $urltag = shift;
244    my ($url, $base, $layouter, $tag);
245
246    if ($urltag =~ m%^/%) {
247	my $serverport;
248	if ($serverport = $ENV{'SERVER_NAME'}) {
249	    unless (80 == $ENV{'SERVER_PORT'}) {
250		$serverport .= ":$ENV{'SERVER_PORT'}";
251	    }
252	} else {
253	    $serverport = 'localhost';
254	}
255	$urltag = "http://$serverport$urltag";
256    }
257
258    # if ($urltag =~ /^(.+)[.]([^.]+)$/) {
259    if ($urltag =~ /^(.+)[.]([^.]+)[.]([^.]+)$/) {
260	($url, $layouter, $tag) = ($1, $2, $3);
261	unless ($KnownServers{$layouter}) {
262	    trouble("Unknown layout service $layouter from $url\n");
263	    return;
264	}
265	unless ($ContentType = $KnownTypes{$tag}) {
266	    trouble("Unknown tag type $tag from $url\n");
267	    return;
268	}
269	$base = $url;
270	$base =~ s%[/:]%-%g;	# remember to make safe for PC's
271	# trouble("I see: '$base' '$url' '$layouter' '$tag' \n"); return;
272	up_doc($base, $url, $layouter, $tag);
273    } else {
274	trouble("Unknown url format: $urltag\n");
275    }
276}
277
278
279sub show_results {
280    my $size = length($TheGoods);
281
282    print <<EOF ;
283Content-type: $ContentType
284Content-length: $size
285Pragma: no-cache
286
287EOF
288    print($TheGoods);
289}
290
291
292sub main {
293    my $arg;
294    if ($arg = ($ENV{'PATH_INFO'})) {
295	    $arg =~ s%^/([^:]+:/)%$1%;		# strip initial slash before fully-qualified URLs
296	    $arg =~ s%^([^:]+:/)([^/])%$1/$2%;	# reinstate double slash before hostname if web server removed it
297	}
298	else  {
299		$arg = $ARGV[0];
300	}
301    get_dot($arg);
302    show_results();
303}
304main();
305