1<?php
2/**
3 * Utility Class for Horde_Ldap
4 *
5 * This class servers some functionality to the other classes of Horde_Ldap but
6 * most of the methods can be used separately as well.
7 *
8 * Copyright 2009 Benedikt Hallinger
9 * Copyright 2010-2017 Horde LLC (http://www.horde.org/)
10 *
11 * @category  Horde
12 * @package   Ldap
13 * @author    Benedikt Hallinger <beni@php.net>
14 * @author    Jan Schneider <jan@horde.org>
15 * @license   http://www.gnu.org/licenses/lgpl-3.0.html LGPL-3.0
16 */
17class Horde_Ldap_Util
18{
19    /**
20     * Explodes the given DN into its elements
21     *
22     * {@link http://www.ietf.org/rfc/rfc2253.txt RFC 2253} says, a
23     * Distinguished Name is a sequence of Relative Distinguished Names (RDNs),
24     * which themselves are sets of Attributes. For each RDN a array is
25     * constructed where the RDN part is stored.
26     *
27     * For example, the DN 'OU=Sales+CN=J. Smith,DC=example,DC=net' is exploded
28     * to:
29     * <code>
30     * array(array('OU=Sales', 'CN=J. Smith'),
31     *       'DC=example',
32     *       'DC=net')
33     * </code>
34     *
35     * [NOT IMPLEMENTED] DNs might also contain values, which are the bytes of
36     * the BER encoding of the X.500 AttributeValue rather than some LDAP
37     * string syntax. These values are hex-encoded and prefixed with a #. To
38     * distinguish such BER values, explodeDN uses references to the
39     * actual values, e.g. '1.3.6.1.4.1.1466.0=#04024869,DC=example,DC=com' is
40     * exploded to:
41     * <code>
42     * array(array('1.3.6.1.4.1.1466.0' => "\004\002Hi"),
43     *       array('DC' => 'example',
44     *       array('DC' => 'com'))
45     * <code>
46     * See {@link http://www.vijaymukhi.com/vmis/berldap.htm} for more
47     * information on BER.
48     *
49     * It also performs the following operations on the given DN:
50     * - Unescape "\" followed by ",", "+", """, "\", "<", ">", ";", "#", "=",
51     *   " ", or a hexpair and strings beginning with "#".
52     * - Removes the leading 'OID.' characters if the type is an OID instead of
53     *   a name.
54     * - If an RDN contains multiple parts, the parts are re-ordered so that
55     *   the attribute type names are in alphabetical order.
56     *
57     * $options is a list of name/value pairs, valid options are:
58     * - casefold:   Controls case folding of attribute types names.
59     *               Attribute values are not affected by this option.
60     *               The default is to uppercase. Valid values are:
61     *               - lower: Lowercase attribute types names.
62     *               - upper: Uppercase attribute type names. This is the
63     *                        default.
64     *               - none:  Do not change attribute type names.
65     * - reverse:    If true, the RDN sequence is reversed.
66     * - onlyvalues: If true, then only attributes values are returned ('foo'
67     *               instead of 'cn=foo')
68     *
69     * @todo implement BER
70     * @todo replace preg_replace() callbacks.
71     *
72     * @param string $dn      The DN that should be exploded.
73     * @param array  $options Options to use.
74     *
75     * @return array   Parts of the exploded DN.
76     */
77    public static function explodeDN($dn, array $options = array())
78    {
79        $options = array_merge(
80            array(
81                'casefold' => 'upper',
82                'onlyvalues' => false,
83                'reverse' => false,
84            ),
85            $options
86        );
87
88        // Escaping of DN and stripping of "OID.".
89        $dn = self::canonicalDN($dn, array('casefold' => $options['casefold']));
90
91        // Splitting the DN.
92        $dn_array = preg_split('/(?<!\\\\),/', $dn);
93
94        // Clear wrong splitting (possibly we have split too much).
95        // Not clear, if this is neccessary here:
96        //$dn_array = self::_correctDNSplitting($dn_array, ',');
97
98        $callback_upper = function($value) {
99            return Horde_String::upper($value[1]);
100        };
101        $callback_lower = function($value) {
102            return Horde_String::lower($value[1]);
103        };
104
105        // Construct subarrays for multivalued RDNs and unescape DN value, also
106        // convert to output format and apply casefolding.
107        foreach ($dn_array as $key => $value) {
108            $value_u = self::unescapeDNValue($value);
109            $rdns    = self::splitRDNMultivalue($value_u[0]);
110            // TODO: nuke code duplication
111            if (count($rdns) > 1) {
112                // Multivalued RDN!
113                foreach ($rdns as $subrdn_k => $subrdn_v) {
114                    // Casefolding.
115                    if ($options['casefold'] == 'upper') {
116                        $subrdn_v = preg_replace_callback('/^(\w+=)/',
117                                                          $callback_upper,
118                                                          $subrdn_v);
119                    }
120                    if ($options['casefold'] == 'lower') {
121                        $subrdn_v = preg_replace_callback('/^(\w+=)/',
122                                                          $callback_lower,
123                                                          $subrdn_v);
124                    }
125
126                    if ($options['onlyvalues']) {
127                        preg_match('/(.+?)(?<!\\\\)=(.+)/', $subrdn_v, $matches);
128                        $rdn_val         = $matches[2];
129                        $unescaped       = self::unescapeDNValue($rdn_val);
130                        $rdns[$subrdn_k] = $unescaped[0];
131                    } else {
132                        $unescaped = self::unescapeDNValue($subrdn_v);
133                        $rdns[$subrdn_k] = $unescaped[0];
134                    }
135                }
136
137                $dn_array[$key] = $rdns;
138            } else {
139                // Singlevalued RDN.
140                // Casefolding.
141                if ($options['casefold'] == 'upper') {
142                    $value = preg_replace_callback('/^(\w+=)/',
143                                                   $callback_upper,
144                                                   $value);
145                }
146                if ($options['casefold'] == 'lower') {
147                    $value = preg_replace_callback('/^(\w+=)/',
148                                                   $callback_lower,
149                                                   $value);
150                }
151
152                if ($options['onlyvalues']) {
153                    preg_match('/(.+?)(?<!\\\\)=(.+)/', $value, $matches);
154                    $dn_val         = $matches[2];
155                    $unescaped      = self::unescapeDNValue($dn_val);
156                    $dn_array[$key] = $unescaped[0];
157                } else {
158                    $unescaped = self::unescapeDNValue($value);
159                    $dn_array[$key] = $unescaped[0];
160                }
161            }
162        }
163
164        if ($options['reverse']) {
165            return array_reverse($dn_array);
166        }
167
168        return $dn_array;
169    }
170
171    /**
172     * Escapes DN values according to RFC 2253.
173     *
174     * Escapes the given VALUES according to RFC 2253 so that they can be
175     * safely used in LDAP DNs.  The characters ",", "+", """, "\", "<", ">",
176     * ";", "#", "=" with a special meaning in RFC 2252 are preceeded by ba
177     * backslash. Control characters with an ASCII code < 32 are represented as
178     * \hexpair.  Finally all leading and trailing spaces are converted to
179     * sequences of \20.
180     *
181     * @param string|array $values  DN values that should be escaped.
182     *
183     * @return array  The escaped values.
184     */
185    public static function escapeDNValue($values)
186    {
187        // Parameter validation.
188        if (!is_array($values)) {
189            $values = array($values);
190        }
191
192        foreach ($values as $key => $val) {
193            // Escaping of filter meta characters.
194            $val = addcslashes($val, '\\,+"<>;#=');
195
196            // ASCII < 32 escaping.
197            $val = self::asc2hex32($val);
198
199            // Convert all leading and trailing spaces to sequences of \20.
200            if (preg_match('/^(\s*)(.+?)(\s*)$/', $val, $matches)) {
201                $val = str_repeat('\20', strlen($matches[1])) . $matches[2] . str_repeat('\20', strlen($matches[3]));
202            }
203
204            if (null === $val) {
205                // Apply escaped "null" if string is empty.
206                $val = '\0';
207            }
208
209            $values[$key] = $val;
210        }
211
212        return $values;
213    }
214
215    /**
216     * Unescapes DN values according to RFC 2253.
217     *
218     * Reverts the conversion done by escapeDNValue().
219     *
220     * Any escape sequence starting with a baskslash - hexpair or special
221     * character - will be transformed back to the corresponding character.
222     *
223     * @param array $values  DN values.
224     *
225     * @return array  Unescaped DN values.
226     */
227    public static function unescapeDNValue($values)
228    {
229        // Parameter validation.
230        if (!is_array($values)) {
231            $values = array($values);
232        }
233
234        foreach ($values as $key => $val) {
235            // Strip slashes from special chars.
236            $val = str_replace(
237                array('\\\\', '\,', '\+', '\"', '\<', '\>', '\;', '\#', '\='),
238                array('\\', ',', '+', '"', '<', '>', ';', '#', '='),
239                $val);
240
241            // Translate hex code into ascii.
242            $values[$key] = self::hex2asc($val);
243        }
244
245        return $values;
246    }
247
248    /**
249     * Converts a DN into a canonical form.
250     *
251     * DN can either be a string or an array as returned by explodeDN(),
252     * which is useful when constructing a DN.  The DN array may have be
253     * indexed (each array value is a OCL=VALUE pair) or associative (array key
254     * is OCL and value is VALUE).
255     *
256     * It performs the following operations on the given DN:
257     * - Removes the leading 'OID.' characters if the type is an OID instead of
258     *   a name.
259     * - Escapes all RFC 2253 special characters (",", "+", """, "\", "<", ">",
260     *   ";", "#", "="), slashes ("/"), and any other character where the ASCII
261     *   code is < 32 as \hexpair.
262     * - Converts all leading and trailing spaces in values to be \20.
263     * - If an RDN contains multiple parts, the parts are re-ordered so that
264     *   the attribute type names are in alphabetical order.
265     *
266     * $options is a list of name/value pairs, valid options are:
267     *
268     * - casefold:  Controls case folding of attribute type names. Attribute
269     *              values are not affected by this option. The default is to
270     *              uppercase. Valid values are:
271     *              - lower: Lowercase attribute type names.
272     *              - upper: Uppercase attribute type names.
273     *              - none:  Do not change attribute type names.
274     * - reverse:   If true, the RDN sequence is reversed.
275     * - separator: Separator to use between RDNs. Defaults to comma (',').
276     *
277     * The empty string "" is a valid DN, so be sure not to do a "$can_dn ==
278     * false" test, because an empty string evaluates to false. Use the "==="
279     * operator instead.
280     *
281     * @param array|string $dn      The DN.
282     * @param array        $options Options to use.
283     *
284     * @return boolean|string The canonical DN or false if the DN is not valid.
285     */
286    public static function canonicalDN($dn, array $options = array())
287    {
288        if ($dn === '') {
289            // Empty DN is valid.
290            return $dn;
291        }
292
293        // Options check.
294        $options = array_merge(
295            array(
296                'casefold' => 'upper',
297                'reverse' => false,
298                'separator' => ',',
299            ),
300            $options
301        );
302
303        if (!is_array($dn)) {
304            // It is not clear to me if the perl implementation splits by the
305            // user defined separator or if it just uses this separator to
306            // construct the new DN.
307            $dn = preg_split('/(?<!\\\\)' . preg_quote($options['separator']) . '/', $dn);
308
309            // Clear wrong splitting (possibly we have split too much).
310            $dn = self::_correctDNSplitting($dn, $options['separator']);
311        } else {
312            // Is array, check if the array is indexed or associative.
313            $assoc = false;
314            foreach ($dn as $dn_key => $dn_part) {
315                if (!is_int($dn_key)) {
316                    $assoc = true;
317                    break;
318                }
319            }
320
321            // Convert to indexed, if associative array detected.
322            if ($assoc) {
323                $newdn = array();
324                foreach ($dn as $dn_key => $dn_part) {
325                    if (is_array($dn_part)) {
326                        // We assume here that the RDN parts are also
327                        // associative.
328                        ksort($dn_part, SORT_STRING);
329                        // Copy array as-is, so we can resolve it later.
330                        $newdn[] = $dn_part;
331                    } else {
332                        $newdn[] = $dn_key . '=' . $dn_part;
333                    }
334                }
335                $dn =& $newdn;
336            }
337        }
338
339        // Escaping and casefolding.
340        foreach ($dn as $pos => $dnval) {
341            if (is_array($dnval)) {
342                // Subarray detected, this means most probably that we had a
343                // multivalued DN part, which must be resolved.
344                $dnval_new = '';
345                foreach ($dnval as $subkey => $subval) {
346                    // Build RDN part.
347                    if (!is_int($subkey)) {
348                        $subval = $subkey . '=' . $subval;
349                    }
350                    $subval_processed = self::canonicalDN($subval, $options);
351                    if (false === $subval_processed) {
352                        return false;
353                    }
354                    $dnval_new .= $subval_processed . '+';
355                }
356                // Store RDN part, strip last plus.
357                $dn[$pos] = substr($dnval_new, 0, -1);
358            } else {
359                // Try to split multivalued RDNs into array.
360                $rdns = self::splitRDNMultivalue($dnval);
361                if (count($rdns) > 1) {
362                    // Multivalued RDN was detected. The RDN value is expected
363                    // to be correctly split by splitRDNMultivalue(). It's time
364                    // to sort the RDN and build the DN.
365                    $rdn_string = '';
366                    // Sort RDN keys alphabetically.
367                    sort($rdns, SORT_STRING);
368                    foreach ($rdns as $rdn) {
369                        $subval_processed = self::canonicalDN($rdn, $options);
370                        if (false === $subval_processed) {
371                            return false;
372                        }
373                        $rdn_string .= $subval_processed . '+';
374                    }
375
376                    // Store RDN part, strip last plus.
377                    $dn[$pos] = substr($rdn_string, 0, -1);
378                } else {
379                    // No multivalued RDN. Split at first unescaped "=".
380                    $dn_comp = self::splitAttributeString($rdns[0]);
381                    if (count($dn_comp) != 2) {
382                        throw new Horde_Ldap_Exception('Invalid RDN: ' . $rdns[0]);
383                    }
384                    // Trim left whitespaces because of "cn=foo, l=bar" syntax
385                    // (whitespace after comma).
386                    $ocl = ltrim($dn_comp[0]);
387                    $val = $dn_comp[1];
388
389                    // Strip 'OID.', otherwise apply casefolding and escaping.
390                    if (substr(Horde_String::lower($ocl), 0, 4) == 'oid.') {
391                        $ocl = substr($ocl, 4);
392                    } else {
393                        if ($options['casefold'] == 'upper') {
394                            $ocl = Horde_String::upper($ocl);
395                        }
396                        if ($options['casefold'] == 'lower') {
397                            $ocl = Horde_String::lower($ocl);
398                        }
399                        $ocl = self::escapeDNValue(array($ocl));
400                        $ocl = $ocl[0];
401                    }
402
403                    // Escaping of DN value.
404                    // TODO: if the value is already correctly escaped, we get
405                    //       double escaping.
406                    $val = self::escapeDNValue(array($val));
407                    $val = str_replace('/', '\/', $val[0]);
408
409                    $dn[$pos] = $ocl . '=' . $val;
410                }
411            }
412        }
413
414        if ($options['reverse']) {
415            $dn = array_reverse($dn);
416        }
417
418        return implode($options['separator'], $dn);
419    }
420
421    /**
422     * Escapes the given values according to RFC 2254 so that they can be
423     * safely used in LDAP filters.
424     *
425     * Any control characters with an ACII code < 32 as well as the characters
426     * with special meaning in LDAP filters "*", "(", ")", and "\" (the
427     * backslash) are converted into the representation of a backslash followed
428     * by two hex digits representing the hexadecimal value of the character.
429     *
430     * @param array $values Values to escape.
431     *
432     * @return array Escaped values.
433     */
434    public static function escapeFilterValue($values)
435    {
436        // Parameter validation.
437        if (!is_array($values)) {
438            $values = array($values);
439        }
440
441        foreach ($values as $key => $val) {
442            // Escaping of filter meta characters.
443            $val = str_replace(array('\\', '*', '(', ')'),
444                               array('\5c', '\2a', '\28', '\29'),
445                               $val);
446
447            // ASCII < 32 escaping.
448            $val = self::asc2hex32($val);
449
450            if (null === $val) {
451                // Apply escaped "null" if string is empty.
452                $val = '\0';
453            }
454
455            $values[$key] = $val;
456        }
457
458        return $values;
459    }
460
461    /**
462     * Unescapes the given values according to RFC 2254.
463     *
464     * Reverses the conversion done by {@link escapeFilterValue()}.
465     *
466     * Converts any sequences of a backslash followed by two hex digits into
467     * the corresponding character.
468     *
469     * @param array $values Values to unescape.
470     *
471     * @return array Unescaped values.
472     */
473    public static function unescapeFilterValue($values = array())
474    {
475        // Parameter validation.
476        if (!is_array($values)) {
477            $values = array($values);
478        }
479
480        foreach ($values as $key => $value) {
481            // Translate hex code into ascii.
482            $values[$key] = self::hex2asc($value);
483        }
484
485        return $values;
486    }
487
488    /**
489     * Converts all ASCII chars < 32 to "\HEX".
490     *
491     * @param string $string String to convert.
492     *
493     * @return string Hexadecimal representation of $string.
494     */
495    public static function asc2hex32($string)
496    {
497        for ($i = 0, $len = strlen($string); $i < $len; $i++) {
498            $char = substr($string, $i, 1);
499            if (ord($char) < 32) {
500                $hex = dechex(ord($char));
501                if (strlen($hex) == 1) {
502                    $hex = '0' . $hex;
503                }
504                $string = str_replace($char, '\\' . $hex, $string);
505            }
506        }
507        return $string;
508    }
509
510    /**
511     * Converts all hexadecimal expressions ("\HEX") to their original ASCII
512     * characters.
513     *
514     * @author beni@php.net, heavily based on work from DavidSmith@byu.net
515     *
516     * @param string $string String to convert.
517     *
518     * @return string ASCII representation of $string.
519     */
520    public static function hex2asc($string)
521    {
522        return preg_replace_callback(
523            '/\\\([0-9A-Fa-f]{2})/',
524            function($hex) {
525                return chr(hexdec($hex[1]));
526            },
527            $string);
528    }
529
530    /**
531     * Splits a multivalued RDN value into an array.
532     *
533     * A RDN can contain multiple values, spearated by a plus sign. This method
534     * returns each separate ocl=value pair of the RDN part.
535     *
536     * If no multivalued RDN is detected, an array containing only the original
537     * RDN part is returned.
538     *
539     * For example, the multivalued RDN 'OU=Sales+CN=J. Smith' is exploded to:
540     * <kbd>array([0] => 'OU=Sales', [1] => 'CN=J. Smith')</kbd>
541     *
542     * The method tries to be smart if it encounters unescaped "+" characters,
543     * but may fail, so better ensure escaped "+" in attribute names and
544     * values.
545     *
546     * [BUG] If you have a multivalued RDN with unescaped plus characters and
547     *       there is a unescaped plus sign at the end of an value followed by
548     *       an attribute name containing an unescaped plus, then you will get
549     *       wrong splitting:
550     *         $rdn = 'OU=Sales+C+N=J. Smith';
551     *       returns:
552     *         array('OU=Sales+C', 'N=J. Smith');
553     *       The "C+" is treaten as the value of the first pair instead of as
554     *       the attribute name of the second pair. To prevent this, escape
555     *       correctly.
556     *
557     * @param string $rdn Part of a (multivalued) escaped RDN (e.g. ou=foo or
558     *                    ou=foo+cn=bar)
559     *
560     * @return array The components of the multivalued RDN.
561     */
562    public static function splitRDNMultivalue($rdn)
563    {
564        $rdns = preg_split('/(?<!\\\\)\+/', $rdn);
565        $rdns = self::_correctDNSplitting($rdns, '+');
566        return array_values($rdns);
567    }
568
569    /**
570     * Splits a attribute=value syntax into an array.
571     *
572     * The split will occur at the first unescaped '=' character.
573     *
574     * @param string $attr An attribute-value string.
575     *
576     * @return array Indexed array: 0=attribute name, 1=attribute value.
577     */
578    public static function splitAttributeString($attr)
579    {
580        return preg_split('/(?<!\\\\)=/', $attr, 2);
581    }
582
583    /**
584     * Corrects splitting of DN parts.
585     *
586     * @param array $dn        Raw DN array.
587     * @param array $separator Separator that was used when splitting.
588     *
589     * @return array Corrected array.
590     */
591    protected static function _correctDNSplitting($dn = array(),
592                                                  $separator = ',')
593    {
594        foreach ($dn as $key => $dn_value) {
595            // Refresh value (foreach caches!)
596            $dn_value = $dn[$key];
597            // If $dn_value is not in attr=value format, we had an unescaped
598            // separator character inside the attr name or the value. We assume
599            // that it was the attribute value.
600
601            // TODO: To solve this, we might ask the schema. The
602            //       Horde_Ldap_Util class must remain independent from the
603            //       other classes or connections though.
604            if (!preg_match('/.+(?<!\\\\)=.+/', $dn_value)) {
605                unset($dn[$key]);
606                if (array_key_exists($key - 1, $dn)) {
607                    // Append to previous attribute value.
608                    $dn[$key - 1] = $dn[$key - 1] . $separator . $dn_value;
609                } elseif (array_key_exists($key + 1, $dn)) {
610                    // First element: prepend to next attribute name.
611                    $dn[$key + 1] = $dn_value . $separator . $dn[$key + 1];
612                } else {
613                    $dn[$key] = $dn_value;
614                }
615            }
616        }
617        return array_values($dn);
618    }
619}
620