1/*  Part of SWI-Prolog
2
3    Author:        Matt Lilley
4    E-mail:        thetrime@gmail.com
5    WWW:           http://www.swi-prolog.org
6    Copyright (c)  2004-2016, SWI-Prolog Foundation
7                              VU University Amsterdam
8    All rights reserved.
9
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions
12    are met:
13
14    1. Redistributions of source code must retain the above copyright
15       notice, this list of conditions and the following disclaimer.
16
17    2. Redistributions in binary form must reproduce the above copyright
18       notice, this list of conditions and the following disclaimer in
19       the documentation and/or other materials provided with the
20       distribution.
21
22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33    POSSIBILITY OF SUCH DAMAGE.
34*/
35
36
37:-module(xmlenc,
38         [ decrypt_xml/4,   % +EncryptedXML, -DecryptedXML, :KeyCallback, +Options
39           load_certificate_from_base64_string/2 % +Base64String, -Certificate
40         ]).
41:- autoload(library(base64),[base64/2]).
42:- autoload(library(crypto),
43	    [crypto_data_decrypt/6,rsa_private_decrypt/4,hex_bytes/2]).
44:- autoload(library(error),[existence_error/2,domain_error/2]).
45:- autoload(library(lists),[append/3]).
46:- autoload(library(sgml),[load_structure/3]).
47:- autoload(library(ssl),[load_certificate/2]).
48:- autoload(library(uri),[uri_components/2]).
49:- autoload(library(http/http_open),[http_open/3]).
50
51:- meta_predicate
52    decrypt_xml(+, -, 3, +).
53
54/** <module> XML encryption library
55
56This library is a partial implementation of the XML encryption standard.
57It implements the _decryption_ part, which is needed by SAML clients.
58
59@see https://www.w3.org/TR/xmlenc-core1/
60@see https://en.wikipedia.org/wiki/Security_Assertion_Markup_Language
61*/
62
63% These are the 4 mandatory block cipher algorithms
64% (actually aes-192-cbc is not mandatory, but it is easy to support)
65ssl_algorithm('http://www.w3.org/2001/04/xmlenc#tripledes-cbc', 'des3',         8).
66ssl_algorithm('http://www.w3.org/2001/04/xmlenc#aes128-cbc',    'aes-128-cbc', 16).
67ssl_algorithm('http://www.w3.org/2001/04/xmlenc#aes256-cbc',    'aes-256-cbc', 32).
68ssl_algorithm('http://www.w3.org/2001/04/xmlenc#aes192-cbc',    'aes-192-cbc', 24).
69
70%!  decrypt_xml(+DOMIn, -DOMOut, :KeyCallback, +Options) is det.
71%
72%   @arg KeyCallback may be called as follows:
73%           - call(KeyCallback, name,        KeyName,         Key)
74%           - call(KeyCallback, public_key,  public_key(RSA), Key)
75%           - call(KeyCallback, certificate, Certificate,     Key)
76
77decrypt_xml([], [], _, _):- !.
78decrypt_xml([element(ns(_, 'http://www.w3.org/2001/04/xmlenc#'):'EncryptedData',
79                     Attributes, EncryptedData)|Siblings],
80            [Decrypted|NewSiblings], KeyCallback, Options) :-
81    !,
82    decrypt_element(Attributes, EncryptedData, Decrypted, KeyCallback, Options),
83    decrypt_xml(Siblings, NewSiblings, KeyCallback, Options).
84
85decrypt_xml([element(Tag, Attributes, Children)|Siblings],
86            [element(Tag, Attributes, NewChildren)|NewSiblings], KeyCallback, Options) :-
87    !,
88    decrypt_xml(Children, NewChildren, KeyCallback, Options),
89    decrypt_xml(Siblings, NewSiblings, KeyCallback, Options).
90decrypt_xml([Other|Siblings], [Other|NewSiblings], KeyCallback, Options):-
91    decrypt_xml(Siblings, NewSiblings, KeyCallback, Options).
92
93%!   decrypt_element(+Attributes,
94%!                   +EncryptedData,
95%!                   -DecryptedElement,
96%!                   +Options).
97%
98%    Decrypt an EncryptedData element  with   Attributes  and  child
99%    EncryptedData DecryptedElement will either be an element/3 term
100%    or a string as dictacted by   the Type attribute in Attributes.
101%    If Attributes does not contain a  Type attribute then we assume
102%    it is a string
103
104:-meta_predicate(decrypt_element(+, +, -, 3, +)).
105
106decrypt_element(Attributes, EncryptedData, Decrypted, KeyCallback, Options):-
107    XENC = ns(_, 'http://www.w3.org/2001/04/xmlenc#'),
108    (  memberchk(element(XENC:'CipherData', _, CipherData), EncryptedData)
109    -> true
110    ;  existence_error(cipher_data, EncryptedData)
111    ),
112    % The Type attribute is not mandatory. However, 3.1 states that
113    % "Without this information, the decryptor will be unable to automatically restore the XML document to its original cleartext form."
114    (  memberchk('Type'=Type, Attributes)
115    -> true
116    ;  Type = 'http://www.w3.org/2001/04/xmlenc#Content'
117    ),
118
119    % First of all, determine the algorithm used to encrypt the data
120    determine_encryption_algorithm(EncryptedData, Algorithm, IVSize),
121
122    % There are now two tasks remaining, and they seem like they ought to be quite simple, but unfortunately they are not
123    % First, we must determine the key used to encrypt the message
124    determine_key(EncryptedData, Key, KeyCallback, Options),
125
126    % Then, we must determine what the encrypted data even IS
127    % If the message includes a CipherValue then this is straightfoward - the encrypted data is the base64-encoded child
128    % of this element.
129    (  memberchk(element(XENC:'CipherValue', _, CipherValueElement), CipherData)
130    -> base64_element(CipherValueElement, CipherValueWithIV),
131           string_codes(CipherValueWithIV, CipherValueWithIVCodes),
132           length(IVCodes, IVSize),
133           append(IVCodes, CipherCodes, CipherValueWithIVCodes),
134           string_codes(IV, IVCodes),
135           string_codes(CipherText, CipherCodes),
136           length(CipherValueWithIVCodes, _),
137           crypto_data_decrypt(CipherText, Algorithm, Key, IV, DecryptedStringWithPadding, [padding(none), encoding(octet)])
138    ;  memberchk(element(XENC:'CipherReference', CipherReferenceAttributes, CipherReference), CipherData)->
139           % However, it is allowed to include CipherReference instead. This is an arbitrary URI and a list of transforms to convert the
140           % data identified by that URI into the raw octets that represent the encrypted data
141           % The URI attribute of the CipherReference element is mandatory
142           memberchk('URI'=CipherURI, CipherReferenceAttributes),
143           % The transforms attribute is optional, though.
144           (  memberchk(element('Transforms', _, Transforms), CipherReference)
145           -> true
146           ;  Transforms = []
147           ),
148           uri_components(CipherURI, uri_components(Scheme, _, _, _, _)),
149           (  ( Scheme == 'http' ; Scheme == 'https')
150              % FIXME: URI may not be an *absolute* URL
151           ->  with_output_to(string(RawCipherValue),
152                          setup_call_cleanup(http_open(CipherURI, HTTPStream, []),
153                                             copy_stream_data(HTTPStream, current_output),
154                                             close(HTTPStream)))
155           ;  domain_error(resolvable_uri, CipherURI)
156           ),
157           apply_ciphertext_transforms(RawCipherValue, Transforms, CipherValue),
158           sub_string(CipherValue, 0, IVSize, _, IV),
159           sub_string(CipherValue, IVSize, _, 0, CipherText),
160           crypto_data_decrypt(CipherText, Algorithm, Key, IV, DecryptedStringWithPadding, [padding(none), encoding(octet)])
161    ),
162    % The XML-ENC padding scheme does not comply with RFC-1423. This has been noted a few times by people trying to write
163    % XML-ENC decryptors backed by OpenSSL, which insists on compliance. The only recourse we have is to disable padding entirely
164    % and do it in our application
165    xmlenc_padding(DecryptedStringWithPadding, DecryptedString),
166    % Now that we have the decrypted data, we can decide whether to turn it into an element or leave it as
167    % content
168    (  Type == 'http://www.w3.org/2001/04/xmlenc#Element'
169    -> setup_call_cleanup(open_string(DecryptedString, StringStream),
170                          load_structure(StringStream, [Decrypted], [dialect(xmlns), keep_prefix(true)]),
171                          close(StringStream))
172    ;  Decrypted = DecryptedString
173    ).
174
175xmlenc_padding(DecryptedStringWithPadding, DecryptedString):-
176    string_length(DecryptedStringWithPadding, _),
177    string_codes(DecryptedStringWithPadding, Codes),
178    append(_, [LastCode], Codes),
179    length(Padding, LastCode),
180    append(DecryptedCodes, Padding, Codes),
181    !,
182    string_codes(DecryptedString, DecryptedCodes).
183
184apply_ciphertext_transforms(CipherValue, [], CipherValue):- !.
185apply_ciphertext_transforms(_, [_AnythingElse|_], _):-
186    % FIXME: Not implemented
187    throw(error(implementation_missing('CipherReference transforms are not implemented', _))).
188
189:- meta_predicate determine_key(+,-,3,+).
190determine_key(EncryptedData, Key, KeyCallback, Options):-
191    DS = ns(_, 'http://www.w3.org/2000/09/xmldsig#'),
192    (  memberchk(element(DS:'KeyInfo', _, KeyInfo), EncryptedData)
193    -> true
194    ;  % Technically the KeyInfo is not mandatory. However, without a key we cannot decrypt
195           % so raise an error. In the future Options could contain a key if it is agreed upon
196           % by some other channel
197           existence_error(key_info, EncryptedData)
198    ),
199    resolve_key(KeyInfo, Key, KeyCallback, Options).
200
201:- meta_predicate resolve_key(+,-,3,+).
202
203resolve_key(Info, Key, KeyCallback, Options):-
204    % EncryptedKey
205    XENC = 'http://www.w3.org/2001/04/xmlenc#',
206    memberchk(element(ns(_, XENC):'EncryptedKey', _KeyAttributes, EncryptedKey), Info),
207    !,
208    % The EncryptedKey is slightly different to EncryptedData. For a start, the algorithms used to decrypt the
209    % key are orthogonal to those used for EncryptedData. However we can recursively search for the keys then
210    % decrypt them using the different algorithms as needed
211    memberchk(element(ns(_, XENC):'EncryptionMethod', MethodAttributes, EncryptionMethod), EncryptedKey),
212    memberchk('Algorithm'=Algorithm, MethodAttributes),
213
214    % Now find the KeyInfo
215    determine_key(EncryptedKey, PrivateKey, KeyCallback, Options),
216
217    memberchk(element(ns(_, XENC):'CipherData', _, CipherData), EncryptedKey),
218    memberchk(element(ns(_, XENC):'CipherValue', _, CipherValueElement), CipherData),
219    base64_element(CipherValueElement, CipherValue),
220    (  Algorithm == 'http://www.w3.org/2001/04/xmlenc#rsa-oaep-mgf1p'
221    -> rsa_private_decrypt(PrivateKey, CipherValue, Key, [encoding(octet), padding(pkcs1_oaep)])
222    ;  Algorithm == 'http://www.w3.org/2009/xmlenc11#rsa-oaep',
223           memberchk(element(ns(_, 'http://www.w3.org/2009/xmlenc11#'):'MGF', MGFAttributes, _), EncryptionMethod),
224           memberchk('Algorithm'='http://www.w3.org/2009/xmlenc11#mgf1sha1', MGFAttributes)   % This is just the same as rsa-oaep-mgf1p!
225    -> rsa_private_decrypt(PrivateKey, CipherValue, Key, [encoding(octet), padding(pkcs1_oaep)])
226    ;  Algorithm == 'http://www.w3.org/2001/04/xmlenc#rsa-1_5'
227    -> rsa_private_decrypt(PrivateKey, CipherValue, Key, [encoding(octet), padding(pkcs1)])
228    ;  domain_error(key_transport, Algorithm)
229    ).
230resolve_key(KeyInfo, _Key, _KeyCallback, _Options):-
231    % AgreementMethod. FIXME: Not implemented
232    XENC = ns(_, 'http://www.w3.org/2001/04/xmlenc#'),
233    memberchk(element(XENC:'AgreementMethod', _KeyAttributes, _AgreementMethod), KeyInfo),
234    !,
235    throw(not_implemented).
236% Additionally, we are allowed to use any elements from XML-DSIG
237resolve_key(KeyInfo, Key, KeyCallback, _Options):-
238    % KeyName. Use the callback with type=name and hint=KeyName
239    DS = ns(_, 'http://www.w3.org/2000/09/xmldsig#'),
240    memberchk(element(DS:'KeyName', _KeyAttributes, [KeyName]), KeyInfo),
241    !,
242    call(KeyCallback, name, KeyName, Key).
243resolve_key(KeyInfo, _Key, _KeyCallback, _Options):-
244    % RetrievalMethod. FIXME: Not implemented
245    DS = ns(_, 'http://www.w3.org/2000/09/xmldsig#'),
246    memberchk(element(DS:'RetrievalMethod', _KeyAttributes, _RetrievalMethod), KeyInfo),
247    !,
248    throw(not_implemented).
249resolve_key(KeyInfo, Key, KeyCallback, _Options):-
250    % KeyValue.
251    DS = ns(_, 'http://www.w3.org/2000/09/xmldsig#'),
252    memberchk(element(DS:'KeyValue', _KeyAttributes, KeyValue), KeyInfo),
253    !,
254    (  memberchk(element(DS:'RSAKeyValue', _, RSAKeyValue), KeyInfo)
255    -> memberchk(element(DS:'Modulus', _, [ModulusBase64]), RSAKeyValue),
256           memberchk(element(DS:'Exponent', _, [ExponentBase64]), RSAKeyValue),
257           base64_to_hex(ModulusBase64, Modulus),
258           base64_to_hex(ExponentBase64, Exponent),
259           call(KeyCallback, public_key, public_key(rsa(Modulus, Exponent, -, -, -, -, -, -)), Key)
260    ;  memberchk(element(DS:'DSAKeyValue', _, _DSAKeyValue), KeyInfo)
261    -> throw(error(not_implemented(dsa_key), _)) % FIXME: Not implemented
262    ;  existence_error(usable_key_value, KeyValue)
263    ).
264resolve_key(KeyInfo, Key, KeyCallback, _Options):-
265    % X509Data.
266    DS = ns(_, 'http://www.w3.org/2000/09/xmldsig#'),
267    memberchk(element(DS:'X509Data', _, X509Data), KeyInfo),
268    memberchk(element(DS:'X509Certificate', _, [X509Certificate]), X509Data),
269    !,
270    load_certificate_from_base64_string(X509Certificate, Certificate),
271    call(KeyCallback, certificate, Certificate, Key).
272resolve_key(KeyInfo, _Key, _KeyCallback, _Options):-
273    % PGPData. FIXME: Not implemented
274    DS = ns(_, 'http://www.w3.org/2000/09/xmldsig#'),
275    memberchk(element(DS:'PGPData', _KeyAttributes, _PGPData), KeyInfo),
276    !,
277    throw(not_implemented).
278resolve_key(KeyInfo, _Key, _KeyCallback, _Options):-
279    % SPKIData. FIXME: Not implemented
280    DS = ns(_, 'http://www.w3.org/2000/09/xmldsig#'),
281    memberchk(element(DS:'SPKIData', _KeyAttributes, _SPKIData), KeyInfo),
282    !,
283    throw(not_implemented).
284resolve_key(KeyInfo, _Key, _KeyCallback, _Options):-
285    % MgmtData. FIXME: Not implemented
286    DS = ns(_, 'http://www.w3.org/2000/09/xmldsig#'),
287    memberchk(element(DS:'MgmtData', _KeyAttributes, _SPKIData), KeyInfo),
288    !,
289    throw(not_implemented).
290resolve_key(Info, _, _, _):-
291    % The XML-ENC standard allows for arbitrary other means of transmitting keys in application-specific
292    % protocols. This is not supported here, though. In the future a callback could be provided in Options
293    % to obtain the key information from a KeyInfo structure.
294    existence_error(usable_key, Info).
295
296
297base64_to_hex(Base64, Hex):-
298    base64(Raw, Base64),
299    atom_codes(Raw, Codes),
300    hex_bytes(Hex0, Codes),
301    string_upper(Hex0, Hex).
302
303
304determine_encryption_algorithm(EncryptedData, Algorithm, IVSize):-
305    XENC = ns(_, 'http://www.w3.org/2001/04/xmlenc#'),
306    (  memberchk(element(XENC:'EncryptionMethod', EncryptionMethodAttributes, _), EncryptedData)
307    -> % This is a mandatory attribute
308           memberchk('Algorithm'=XMLAlgorithm, EncryptionMethodAttributes),
309           (  ssl_algorithm(XMLAlgorithm, Algorithm, IVSize)
310           -> true
311           ; domain_error(block_cipher, XMLAlgorithm)
312           )
313        % In theory the EncryptionMethod is optional. In pracitse though, if the method is not supplied we
314        % cannot decrypt the data. In the future we could support encryption_algorithm/1 as an option to
315        % decrypt_element/3 but for now raise an exception
316    ; existence_error(encryption_method, EncryptedData)
317    ).
318
319base64_element([CipherValueElement], CipherValue):-
320    atom_codes(CipherValueElement, Base64Codes),
321    delete_newlines(Base64Codes, TrimmedCodes),
322    string_codes(Trimmed, TrimmedCodes),
323    base64(CipherValue, Trimmed).
324
325delete_newlines([], []):- !.
326delete_newlines([13|As], B):- !, delete_newlines(As, B).
327delete_newlines([10|As], B):- !, delete_newlines(As, B).
328delete_newlines([A|As], [A|B]):- !, delete_newlines(As, B).
329
330
331
332%!	load_certificate_from_base64_string(+String, -Certificate) is det.
333%
334%	Loads a certificate from a string, adding newlines and header
335%       where appropriate so that OpenSSL 1.0.1+ will be able to parse it
336
337load_certificate_from_base64_string(UnnormalizedData, Certificate):-
338    normalize_space(codes(Codes), UnnormalizedData),
339    % Break into 64-byte chunks
340    chunk_certificate(Codes, Chunks),
341    atomics_to_string(["-----BEGIN CERTIFICATE-----"|Chunks], '\n', CompleteCertificate),
342    setup_call_cleanup(open_string(CompleteCertificate, StringStream),
343                       load_certificate(StringStream, Certificate),
344                       close(StringStream)).
345
346chunk_certificate(Codes, [Chunk|Chunks]):-
347    length(ChunkCodes, 64),
348    append(ChunkCodes, Rest, Codes),
349    !,
350    string_codes(Chunk, ChunkCodes),
351    chunk_certificate(Rest, Chunks).
352chunk_certificate([], ["-----END CERTIFICATE-----\n"]):- !.
353chunk_certificate(LastCodes, [LastChunk, "-----END CERTIFICATE-----\n"]):-
354    string_codes(LastChunk, LastCodes).
355