1# Written by Aleksey Cheusov <vle@gmx.net>, public domain
2#
3# This awk module is a part of RunAWK distribution,
4#        http://sourceforge.net/projects/runawk
5#
6############################################################
7
8# =head2 tokenre.awk
9#
10# By default AWK splits input lines into tokens according to regular
11# expression that defines "spaces" between tokens using special
12# variable FS. In many situations it is more useful to define regular
13# expressions for tokens themselves. This is what this module does.
14#
15# =over 2
16#
17# =item I<tokenre(STRING, REGEXP)>
18#
19# extracts substrings from STRING
20# according to REGEXP from the left to the right and assigns $1, $2
21# etc. and NF variable.
22#
23# =item I<tokenre0(REGEXP)>
24#
25# Does the the same as `tokenre' but splits $0 instead.
26#
27# =item I<splitre(STRING, ARR, REGEXP)>
28#
29# The same as `tokenre' but ARR[1], ARR[2]... are assigned.
30# A number of extracted tokens is a return value.
31#
32# =item I<TRE>
33#
34# global variable. If it is set to non-empty string, all input
35# lines are split automatically.
36#
37# =back
38#
39
40# See example/demo_tokenre for the sample of usage
41
42function tokenre (s, re){
43	NF = 0
44	while (match(s, re)){
45		++NF
46		$NF = substr(s, RSTART, RLENGTH)
47		s = substr(s, RSTART+RLENGTH)
48	}
49}
50
51function tokenre0 (re){
52	tokenre($0, re)
53}
54
55function splitre (s, arr, re,             cnt){
56	cnt = 0
57	while (match(s, re)){
58		++cnt
59		arr [cnt] = substr(s, RSTART, RLENGTH)
60		s = substr(s, RSTART+RLENGTH)
61	}
62	return cnt
63}
64
65function splitre0 (arr, re){
66	return splitre($0, arr, re)
67}
68
69{
70	if (TRE != ""){
71		tokenre0(TRE)
72	}
73}
74