1#! /usr/bin/env python3
2
3'''
4Provide information about a multideterminant wavefunction stored in HDF5 format.
5Can be used as a library or as CLI tool
6
7determinants_tools.py ./tests/molecules/C2_pp/C2.h5    # Summary excitation info only
8determinants_tools.py -v ./tests/molecules/C2_pp/C2.h5 # Verbose. Gives details of each determinant
9'''
10
11from __future__ import print_function
12from builtins import zip
13
14from functools import partial
15
16# A detspin is a list of integers that represent the alpha or beta part of determinant
17# A determinant is a pair of detspin alpha and detspin beta
18# A CSF is represented as a pair of list of integers coding respectively for the double and single occupied state.
19
20# A bitmask is a `n_state` length binary representation of a list of integers encoded in `bit_kind_size` bits.
21#   Note that this representation is 'reversed'. This allows the first bit to represent the first orbital, and so on.
22#   For performance and convenience, if `n_state` < `bit_kind_size` * `n_integer`,
23#      we assume that the bits coding for excessive state of the last integer are set to 0.
24#      To be clearer, if we have `n_state` = 1 with one alpha electron and `bit_kind_size` = 64
25#      the only valid integer representation of this determinants is ( (1,), (0,) )
26
27# Assume that each integer is unsigned. If not they should be converted before using the sanitize function.
28
29from collections import namedtuple
30Determinants = namedtuple('Determinants', ['alpha', 'beta'])
31CSF = namedtuple('CSF', ['double', 'single'])
32
33def sanitize(det_spin, bit_kind_size):
34     '''
35     This function transform signed numpy variable to native "positive" python number
36     In the case of :
37        -positive Numpy number, we just cast it to Python data-type
38        -negative Numpy number, we do the two-compelement trick.
39                                Because this value cannot fit on the initial Numpy datatype,
40                                it will silently casted to int.
41
42     >>> import numpy as np
43     >>> sanitize([np.int8(1)], 8)
44     [1]
45     >>> sanitize([np.int8(-1)], 8)
46     [255]
47     '''
48     return [ (s + (1 << bit_kind_size)) if s < 0 else int(s) for s in det_spin]
49
50def int_to_bitmask(s, bit_kind_size):
51    '''
52    >>> int_to_bitmask(1, 3)
53    '100'
54    >>> int_to_bitmask(2, 3)
55    '010'
56    >>> int_to_bitmask(3, 3)
57    '110'
58    >>> int_to_bitmask(7, 3)
59    '111'
60    '''
61    return '{s:0{width}b}'.format(s=s, width=bit_kind_size)[::-1]
62
63
64def detspin_to_bitmask(detspin, bit_kind_size, size=None):
65    '''
66    >>> detspin_to_bitmask([1,7], 3,6)
67    '100111'
68    >>> detspin_to_bitmask([1,2], 3,6)
69    '100010'
70    >>> detspin_to_bitmask([1,2], 3,5)
71    '10001'
72    '''
73    return ''.join(int_to_bitmask(i, bit_kind_size) for i in detspin)[:size]
74
75def det_excitation_degree(det_a, det_b):
76    '''
77    >>> det_excitation_degree( Determinants(alpha=[1],beta=[0] ),
78    ...                        Determinants(alpha=[1],beta=[0] ) )
79    0
80    >>> det_excitation_degree( Determinants(alpha=[1],beta=[0] ),
81    ...                        Determinants(alpha=[2],beta=[0] ) )
82    1
83    >>> det_excitation_degree( Determinants(alpha=[1,2],beta=[0] ),
84    ...                        Determinants(alpha=[2,1],beta=[0] ) )
85    2
86    >>> det_excitation_degree( Determinants(alpha=[7,0],beta=[0] ),
87    ...                        Determinants(alpha=[0,7],beta=[0] ))
88    3
89    '''
90    or_and_popcount = lambda det : sum(bin(i ^ j).count("1") for i, j in zip(*det))
91    return sum(map(or_and_popcount,zip(det_a, det_b))) // 2
92
93def det_to_csf(det):
94    '''
95    >>> det_to_csf( ( ([1,0]),([2,0]) ) )
96    CSF(double=(0, 0), single=(3, 0))
97    '''
98    # This function assume that determinant are zero-padded.
99    d = tuple(a & b for a, b in zip(*det))
100    s = tuple(a ^ b for a, b in zip(*det))
101    return CSF(d, s)
102
103
104def csf_to_string(csf, bit_kind_size, size):
105    '''
106    >>> csf_to_string( CSF(double=[1], single=[0]), 3, 3)
107    '200'
108    >>> csf_to_string( CSF(double=[0], single=[1]),  3, 3)
109    '100'
110    >>> csf_to_string( CSF(double=[1], single=[2]),  3, 3)
111    '210'
112    '''
113    f = partial(detspin_to_bitmask, bit_kind_size=bit_kind_size, size=size)
114    d_str, s_str = map(f, csf)
115
116    r = ''
117    for d, s in zip(d_str, s_str):
118        if d != '0':
119            r += '2'
120        elif s != '0':
121            r += '1'
122        else:
123            r += '0'
124    return r
125
126if __name__ == '__main__':
127    import h5py
128    import argparse
129
130    from collections import Counter
131    from itertools import chain
132
133    parser = argparse.ArgumentParser(
134        description='Provide information about a multideterminant wavefunction stored in HDF5 format.')
135
136    parser.add_argument("h5path", type=str,
137        help="Path to a h5file")
138
139    parser.add_argument("-v", "--verbose",
140        help="For each determinant, print its bitmask representation, associated CSF and excitation degree",
141        action="store_true")
142
143    parser.add_argument("-s", "--silent",
144        help="Disable check", action="store_true")
145
146    args = parser.parse_args()
147
148    # Run the unit test if needed
149    if not args.silent:
150        import doctest
151        r = doctest.testmod(verbose=False)
152        if r.failed:
153            import sys
154            sys.exit(1)
155
156    with h5py.File(args.h5path, 'r') as f:
157
158        bit_kind_size = f['MultiDet/CI_Alpha'].dtype.itemsize * 8
159
160        # We sanitize the determinants. Aka we transform any negative integers, into the proper unsigned one.
161        i_alpha= (sanitize(d,bit_kind_size) for d in f['MultiDet/CI_Alpha'])
162        i_beta= (sanitize(d,bit_kind_size) for d in f['MultiDet/CI_Beta'])
163        i_det = zip(i_alpha, i_beta)
164
165        # Getting the reference determinant to compute the excitation degree.
166        hf_det = next(i_det)
167        i_det = chain([hf_det], i_det) # Put back the original
168
169        # Gathering information only needed in verbose mode
170        if args.verbose:
171            nstate = f['MultiDet/nstate'][0]
172            to_string = partial(detspin_to_bitmask, bit_kind_size=bit_kind_size, size=nstate)
173
174        # Aggregate the result and display curent information about the determinant
175        c_e = Counter(); c_csf = Counter()
176        for i, det in enumerate(i_det, 1):
177            e = det_excitation_degree(det, hf_det); c_e[e] += 1
178            c = det_to_csf(det); c_csf[c] += 1
179
180            if args.verbose:
181                str_alpha, str_beta = map(to_string, det)
182                print(i)
183                print('alpha ', str_alpha)
184                print('beta  ', str_beta)
185                print('scf   ', csf_to_string(c, bit_kind_size, nstate))
186                print('excitation degree ', e)
187                print('')
188
189        # Display computed data
190        n_det = sum(c_e.values()); n_csf = len(c_csf)
191
192        print('Summary:')
193        for e, i in sorted(c_e.items()):
194            print('excitation degree', e, 'count:', i)
195
196        print('')
197        print('n_det', n_det)
198        print('n_csf', n_csf)
199