xref: /netbsd/distrib/sets/join.awk (revision 24d9b0a6)
1*24d9b0a6Srhialto#	$NetBSD: join.awk,v 1.7 2019/10/24 16:52:11 rhialto Exp $
282871120Slukem#
382871120Slukem# Copyright (c) 2002 The NetBSD Foundation, Inc.
482871120Slukem# All rights reserved.
582871120Slukem#
682871120Slukem# This code is derived from software contributed to The NetBSD Foundation
782871120Slukem# by Luke Mewburn of Wasabi Systems.
882871120Slukem#
982871120Slukem# Redistribution and use in source and binary forms, with or without
1082871120Slukem# modification, are permitted provided that the following conditions
1182871120Slukem# are met:
1282871120Slukem# 1. Redistributions of source code must retain the above copyright
1382871120Slukem#    notice, this list of conditions and the following disclaimer.
1482871120Slukem# 2. Redistributions in binary form must reproduce the above copyright
1582871120Slukem#    notice, this list of conditions and the following disclaimer in the
1682871120Slukem#    documentation and/or other materials provided with the distribution.
1782871120Slukem#
1882871120Slukem# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1982871120Slukem# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
2082871120Slukem# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2182871120Slukem# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2282871120Slukem# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2382871120Slukem# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2482871120Slukem# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2582871120Slukem# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2682871120Slukem# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2782871120Slukem# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2882871120Slukem# POSSIBILITY OF SUCH DAMAGE.
2982871120Slukem#
3082871120Slukem# join.awk F1 F2
3182871120Slukem#	Similar to join(1), this reads a list of words from F1
3282871120Slukem#	and outputs lines in F2 with a first word that is in F1.
33*24d9b0a6Srhialto#	For purposes of matching the first word, both instances are
34*24d9b0a6Srhialto#	canonicalised via unvis(word); the version from F2 is printed.
35*24d9b0a6Srhialto#	Neither file needs to be sorted.
3682871120Slukem
372bd53f24Sapbfunction unvis(s) \
382bd53f24Sapb{
392bd53f24Sapb	# XXX: We don't handle the complete range of vis encodings
402bd53f24Sapb	unvis_result = ""
412bd53f24Sapb	while (length(s) > 0) {
422bd53f24Sapb		unvis_pos = match(s, "\\\\.")
432bd53f24Sapb		if (unvis_pos == 0) {
442bd53f24Sapb			unvis_result = unvis_result "" s
452bd53f24Sapb			s = ""
462bd53f24Sapb			break
472bd53f24Sapb		}
482bd53f24Sapb		# copy the part before the next backslash
492bd53f24Sapb		unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1)
502bd53f24Sapb		s = substr(s, unvis_pos)
512bd53f24Sapb		# process the backslash and next few chars
522bd53f24Sapb		if (substr(s, 1, 2) == "\\\\") {
532bd53f24Sapb			# double backslash -> single backslash
542bd53f24Sapb			unvis_result = unvis_result "\\"
552bd53f24Sapb			s = substr(s, 3)
562bd53f24Sapb		} else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) {
572bd53f24Sapb			# \ooo with three octal digits.
583c49a566Sriz			# XXX: use strnum() is that is available
592bd53f24Sapb			unvis_result = unvis_result "" sprintf("%c", \
602bd53f24Sapb				0+substr(s, 2, 1) * 64 + \
612bd53f24Sapb				0+substr(s, 3, 1) * 8 + \
622bd53f24Sapb				0+substr(s, 4, 1))
632bd53f24Sapb			s = substr(s, 5)
642bd53f24Sapb		} else {
652bd53f24Sapb			# unrecognised escape: keep the literal backslash
662bd53f24Sapb			printf "%s: %s:%s: unrecognised escape %s\n", \
672bd53f24Sapb				ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \
682bd53f24Sapb				substr(s, 1, 2) \
692bd53f24Sapb				>"/dev/stderr"
702bd53f24Sapb			unvis_result = unvis_result "" substr(s, 1, 1)
712bd53f24Sapb			s = substr(s, 2)
722bd53f24Sapb		}
732bd53f24Sapb	}
742bd53f24Sapb	return unvis_result
752bd53f24Sapb}
762bd53f24Sapb
7782871120SlukemBEGIN \
7882871120Slukem{
7982871120Slukem	if (ARGC != 3) {
80e2551d1dSlukem		printf("Usage: join file1 file2\n") >"/dev/stderr"
81e2551d1dSlukem		exit 1
8282871120Slukem	}
832bd53f24Sapb	while ( (getline < ARGV[1]) > 0) {
84*24d9b0a6Srhialto		f1 = unvis($1)
85*24d9b0a6Srhialto		words[f1] = $0
862bd53f24Sapb	}
87e2551d1dSlukem	delete ARGV[1]
8882871120Slukem}
8982871120Slukem
90*24d9b0a6Srhialto{ f1 = unvis($1) }
912bd53f24Sapb
92*24d9b0a6Srhialtof1 in words \
93e2551d1dSlukem{
94e2551d1dSlukem	$1=""
95e2551d1dSlukem	print words[f1] $0
96e2551d1dSlukem}
97