1*24d9b0a6Srhialto# $NetBSD: join.awk,v 1.7 2019/10/24 16:52:11 rhialto Exp $ 282871120Slukem# 382871120Slukem# Copyright (c) 2002 The NetBSD Foundation, Inc. 482871120Slukem# All rights reserved. 582871120Slukem# 682871120Slukem# This code is derived from software contributed to The NetBSD Foundation 782871120Slukem# by Luke Mewburn of Wasabi Systems. 882871120Slukem# 982871120Slukem# Redistribution and use in source and binary forms, with or without 1082871120Slukem# modification, are permitted provided that the following conditions 1182871120Slukem# are met: 1282871120Slukem# 1. Redistributions of source code must retain the above copyright 1382871120Slukem# notice, this list of conditions and the following disclaimer. 1482871120Slukem# 2. Redistributions in binary form must reproduce the above copyright 1582871120Slukem# notice, this list of conditions and the following disclaimer in the 1682871120Slukem# documentation and/or other materials provided with the distribution. 1782871120Slukem# 1882871120Slukem# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 1982871120Slukem# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 2082871120Slukem# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 2182871120Slukem# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 2282871120Slukem# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2382871120Slukem# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2482871120Slukem# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2582871120Slukem# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2682871120Slukem# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 2782871120Slukem# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 2882871120Slukem# POSSIBILITY OF SUCH DAMAGE. 2982871120Slukem# 3082871120Slukem# join.awk F1 F2 3182871120Slukem# Similar to join(1), this reads a list of words from F1 3282871120Slukem# and outputs lines in F2 with a first word that is in F1. 33*24d9b0a6Srhialto# For purposes of matching the first word, both instances are 34*24d9b0a6Srhialto# canonicalised via unvis(word); the version from F2 is printed. 35*24d9b0a6Srhialto# Neither file needs to be sorted. 3682871120Slukem 372bd53f24Sapbfunction unvis(s) \ 382bd53f24Sapb{ 392bd53f24Sapb # XXX: We don't handle the complete range of vis encodings 402bd53f24Sapb unvis_result = "" 412bd53f24Sapb while (length(s) > 0) { 422bd53f24Sapb unvis_pos = match(s, "\\\\.") 432bd53f24Sapb if (unvis_pos == 0) { 442bd53f24Sapb unvis_result = unvis_result "" s 452bd53f24Sapb s = "" 462bd53f24Sapb break 472bd53f24Sapb } 482bd53f24Sapb # copy the part before the next backslash 492bd53f24Sapb unvis_result = unvis_result "" substr(s, 1, unvis_pos - 1) 502bd53f24Sapb s = substr(s, unvis_pos) 512bd53f24Sapb # process the backslash and next few chars 522bd53f24Sapb if (substr(s, 1, 2) == "\\\\") { 532bd53f24Sapb # double backslash -> single backslash 542bd53f24Sapb unvis_result = unvis_result "\\" 552bd53f24Sapb s = substr(s, 3) 562bd53f24Sapb } else if (match(s, "\\\\[0-7][0-7][0-7]") == 1) { 572bd53f24Sapb # \ooo with three octal digits. 583c49a566Sriz # XXX: use strnum() is that is available 592bd53f24Sapb unvis_result = unvis_result "" sprintf("%c", \ 602bd53f24Sapb 0+substr(s, 2, 1) * 64 + \ 612bd53f24Sapb 0+substr(s, 3, 1) * 8 + \ 622bd53f24Sapb 0+substr(s, 4, 1)) 632bd53f24Sapb s = substr(s, 5) 642bd53f24Sapb } else { 652bd53f24Sapb # unrecognised escape: keep the literal backslash 662bd53f24Sapb printf "%s: %s:%s: unrecognised escape %s\n", \ 672bd53f24Sapb ARGV[0], (FILENAME ? FILENAME : "stdin"), FNR, \ 682bd53f24Sapb substr(s, 1, 2) \ 692bd53f24Sapb >"/dev/stderr" 702bd53f24Sapb unvis_result = unvis_result "" substr(s, 1, 1) 712bd53f24Sapb s = substr(s, 2) 722bd53f24Sapb } 732bd53f24Sapb } 742bd53f24Sapb return unvis_result 752bd53f24Sapb} 762bd53f24Sapb 7782871120SlukemBEGIN \ 7882871120Slukem{ 7982871120Slukem if (ARGC != 3) { 80e2551d1dSlukem printf("Usage: join file1 file2\n") >"/dev/stderr" 81e2551d1dSlukem exit 1 8282871120Slukem } 832bd53f24Sapb while ( (getline < ARGV[1]) > 0) { 84*24d9b0a6Srhialto f1 = unvis($1) 85*24d9b0a6Srhialto words[f1] = $0 862bd53f24Sapb } 87e2551d1dSlukem delete ARGV[1] 8882871120Slukem} 8982871120Slukem 90*24d9b0a6Srhialto{ f1 = unvis($1) } 912bd53f24Sapb 92*24d9b0a6Srhialtof1 in words \ 93e2551d1dSlukem{ 94e2551d1dSlukem $1="" 95e2551d1dSlukem print words[f1] $0 96e2551d1dSlukem} 97