1#
2#  Chaffing.py : chaffing & winnowing support
3#
4# Part of the Python Cryptography Toolkit
5#
6# Written by Andrew M. Kuchling, Barry A. Warsaw, and others
7#
8# ===================================================================
9# The contents of this file are dedicated to the public domain.  To
10# the extent that dedication to the public domain is not available,
11# everyone is granted a worldwide, perpetual, royalty-free,
12# non-exclusive license to exercise all rights associated with the
13# contents of this file for any purpose whatsoever.
14# No rights are reserved.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23# SOFTWARE.
24# ===================================================================
25#
26"""This file implements the chaffing algorithm.
27
28Winnowing and chaffing is a technique for enhancing privacy without requiring
29strong encryption.  In short, the technique takes a set of authenticated
30message blocks (the wheat) and adds a number of chaff blocks which have
31randomly chosen data and MAC fields.  This means that to an adversary, the
32chaff blocks look as valid as the wheat blocks, and so the authentication
33would have to be performed on every block.  By tailoring the number of chaff
34blocks added to the message, the sender can make breaking the message
35computationally infeasible.  There are many other interesting properties of
36the winnow/chaff technique.
37
38For example, say Alice is sending a message to Bob.  She packetizes the
39message and performs an all-or-nothing transformation on the packets.  Then
40she authenticates each packet with a message authentication code (MAC).  The
41MAC is a hash of the data packet, and there is a secret key which she must
42share with Bob (key distribution is an exercise left to the reader).  She then
43adds a serial number to each packet, and sends the packets to Bob.
44
45Bob receives the packets, and using the shared secret authentication key,
46authenticates the MACs for each packet.  Those packets that have bad MACs are
47simply discarded.  The remainder are sorted by serial number, and passed
48through the reverse all-or-nothing transform.  The transform means that an
49eavesdropper (say Eve) must acquire all the packets before any of the data can
50be read.  If even one packet is missing, the data is useless.
51
52There's one twist: by adding chaff packets, Alice and Bob can make Eve's job
53much harder, since Eve now has to break the shared secret key, or try every
54combination of wheat and chaff packet to read any of the message.  The cool
55thing is that Bob doesn't need to add any additional code; the chaff packets
56are already filtered out because their MACs don't match (in all likelihood --
57since the data and MACs for the chaff packets are randomly chosen it is
58possible, but very unlikely that a chaff MAC will match the chaff data).  And
59Alice need not even be the party adding the chaff!  She could be completely
60unaware that a third party, say Charles, is adding chaff packets to her
61messages as they are transmitted.
62
63For more information on winnowing and chaffing see this paper:
64
65Ronald L. Rivest, "Chaffing and Winnowing: Confidentiality without Encryption"
66http://theory.lcs.mit.edu/~rivest/chaffing.txt
67
68"""
69
70__revision__ = "$Id$"
71
72from Crypto.Util.number import bytes_to_long
73
74class Chaff:
75    """Class implementing the chaff adding algorithm.
76
77    Methods for subclasses:
78
79            _randnum(size):
80                Returns a randomly generated number with a byte-length equal
81                to size.  Subclasses can use this to implement better random
82                data and MAC generating algorithms.  The default algorithm is
83                probably not very cryptographically secure.  It is most
84                important that the chaff data does not contain any patterns
85                that can be used to discern it from wheat data without running
86                the MAC.
87
88    """
89
90    def __init__(self, factor=1.0, blocksper=1):
91        """Chaff(factor:float, blocksper:int)
92
93        factor is the number of message blocks to add chaff to,
94        expressed as a percentage between 0.0 and 1.0.  blocksper is
95        the number of chaff blocks to include for each block being
96        chaffed.  Thus the defaults add one chaff block to every
97        message block.  By changing the defaults, you can adjust how
98        computationally difficult it could be for an adversary to
99        brute-force crack the message.  The difficulty is expressed
100        as:
101
102            pow(blocksper, int(factor * number-of-blocks))
103
104        For ease of implementation, when factor < 1.0, only the first
105        int(factor*number-of-blocks) message blocks are chaffed.
106        """
107
108        if not (0.0<=factor<=1.0):
109            raise ValueError, "'factor' must be between 0.0 and 1.0"
110        if blocksper < 0:
111            raise ValueError, "'blocksper' must be zero or more"
112
113        self.__factor = factor
114        self.__blocksper = blocksper
115
116
117    def chaff(self, blocks):
118        """chaff( [(serial-number:int, data:string, MAC:string)] )
119        : [(int, string, string)]
120
121        Add chaff to message blocks.  blocks is a list of 3-tuples of the
122        form (serial-number, data, MAC).
123
124        Chaff is created by choosing a random number of the same
125        byte-length as data, and another random number of the same
126        byte-length as MAC.  The message block's serial number is
127        placed on the chaff block and all the packet's chaff blocks
128        are randomly interspersed with the single wheat block.  This
129        method then returns a list of 3-tuples of the same form.
130        Chaffed blocks will contain multiple instances of 3-tuples
131        with the same serial number, but the only way to figure out
132        which blocks are wheat and which are chaff is to perform the
133        MAC hash and compare values.
134        """
135
136        chaffedblocks = []
137
138        # count is the number of blocks to add chaff to.  blocksper is the
139        # number of chaff blocks to add per message block that is being
140        # chaffed.
141        count = len(blocks) * self.__factor
142        blocksper = range(self.__blocksper)
143        for i, wheat in zip(range(len(blocks)), blocks):
144            # it shouldn't matter which of the n blocks we add chaff to, so for
145            # ease of implementation, we'll just add them to the first count
146            # blocks
147            if i < count:
148                serial, data, mac = wheat
149                datasize = len(data)
150                macsize = len(mac)
151                addwheat = 1
152                # add chaff to this block
153                for j in blocksper:
154                    import sys
155                    chaffdata = self._randnum(datasize)
156                    chaffmac = self._randnum(macsize)
157                    chaff = (serial, chaffdata, chaffmac)
158                    # mix up the order, if the 5th bit is on then put the
159                    # wheat on the list
160                    if addwheat and bytes_to_long(self._randnum(16)) & 0x40:
161                        chaffedblocks.append(wheat)
162                        addwheat = 0
163                    chaffedblocks.append(chaff)
164                if addwheat:
165                    chaffedblocks.append(wheat)
166            else:
167                # just add the wheat
168                chaffedblocks.append(wheat)
169        return chaffedblocks
170
171    def _randnum(self, size):
172        from Crypto import Random
173        return Random.new().read(size)
174
175
176if __name__ == '__main__':
177    text = """\
178We hold these truths to be self-evident, that all men are created equal, that
179they are endowed by their Creator with certain unalienable Rights, that among
180these are Life, Liberty, and the pursuit of Happiness. That to secure these
181rights, Governments are instituted among Men, deriving their just powers from
182the consent of the governed. That whenever any Form of Government becomes
183destructive of these ends, it is the Right of the People to alter or to
184abolish it, and to institute new Government, laying its foundation on such
185principles and organizing its powers in such form, as to them shall seem most
186likely to effect their Safety and Happiness.
187"""
188    print 'Original text:\n=========='
189    print text
190    print '=========='
191
192    # first transform the text into packets
193    blocks = [] ; size = 40
194    for i in range(0, len(text), size):
195        blocks.append( text[i:i+size] )
196
197    # now get MACs for all the text blocks.  The key is obvious...
198    print 'Calculating MACs...'
199    from Crypto.Hash import HMAC, SHA
200    key = 'Jefferson'
201    macs = [HMAC.new(key, block, digestmod=SHA).digest()
202            for block in blocks]
203
204    assert len(blocks) == len(macs)
205
206    # put these into a form acceptable as input to the chaffing procedure
207    source = []
208    m = zip(range(len(blocks)), blocks, macs)
209    print m
210    for i, data, mac in m:
211        source.append((i, data, mac))
212
213    # now chaff these
214    print 'Adding chaff...'
215    c = Chaff(factor=0.5, blocksper=2)
216    chaffed = c.chaff(source)
217
218    from base64 import encodestring
219
220    # print the chaffed message blocks.  meanwhile, separate the wheat from
221    # the chaff
222
223    wheat = []
224    print 'chaffed message blocks:'
225    for i, data, mac in chaffed:
226        # do the authentication
227        h = HMAC.new(key, data, digestmod=SHA)
228        pmac = h.digest()
229        if pmac == mac:
230            tag = '-->'
231            wheat.append(data)
232        else:
233            tag = '   '
234        # base64 adds a trailing newline
235        print tag, '%3d' % i, \
236              repr(data), encodestring(mac)[:-1]
237
238    # now decode the message packets and check it against the original text
239    print 'Undigesting wheat...'
240    # PY3K: This is meant to be text, do not change to bytes (data)
241    newtext = "".join(wheat)
242    if newtext == text:
243        print 'They match!'
244    else:
245        print 'They differ!'
246