1(* $Id: outbase.ml,v 5.21 2007-01-19 01:53:16 ddr Exp $ *)
2(* Copyright (c) 2006-2007 INRIA *)
3
4open Dbdisk;
5open Dutil;
6open Def;
7open Mutil;
8
9value load_ascends_array base = base.data.ascends.load_array ();
10value load_unions_array base = base.data.unions.load_array ();
11value load_couples_array base = base.data.couples.load_array ();
12value load_descends_array base = base.data.descends.load_array ();
13value load_strings_array base = base.data.strings.load_array ();
14value close_base base = base.func.cleanup ();
15
16value save_mem = ref False;
17
18value trace s =
19  if verbose.val then do { Printf.eprintf "*** %s\n" s; flush stderr }
20  else ()
21;
22
23value count_error computed found =
24  do {
25    Printf.eprintf "Count error. Computed %d. Found %d.\n" computed found;
26    flush stderr;
27    exit 2
28  }
29;
30
31value just_copy bname what oc oc_acc =
32  do {
33    Printf.eprintf "*** copying %s\n" what;
34    flush stderr;
35    let ic =
36      let ic = Secure.open_in_bin (Filename.concat bname "base") in
37      do { check_magic ic; ic }
38    in
39    let ic_acc = Secure.open_in_bin (Filename.concat bname "base.acc") in
40    let persons_len = input_binary_int ic in
41    let families_len = input_binary_int ic in
42    let strings_len = input_binary_int ic in
43    let persons_array_pos = input_binary_int ic in
44    let ascends_array_pos = input_binary_int ic in
45    let unions_array_pos = input_binary_int ic in
46    let families_array_pos = input_binary_int ic in
47    let couples_array_pos = input_binary_int ic in
48    let descends_array_pos = input_binary_int ic in
49    let strings_array_pos = input_binary_int ic in
50    let _ (*norigin_file*) = input_value ic in
51    let (beg_pos, end_pos, beg_acc_pos, array_len) =
52      match what with
53      [ "persons" ->
54          let pos = 0 in
55          (persons_array_pos, ascends_array_pos, pos, persons_len)
56      | "ascends" ->
57          let pos = persons_len * Iovalue.sizeof_long in
58          (ascends_array_pos, unions_array_pos, pos, persons_len)
59      | "unions" ->
60          let pos = 2 * persons_len * Iovalue.sizeof_long in
61          (unions_array_pos, families_array_pos, pos, persons_len)
62      | "families" ->
63          let pos = 3 * persons_len * Iovalue.sizeof_long in
64          (families_array_pos, couples_array_pos, pos, families_len)
65      | "couples" ->
66          let pos = (3 * persons_len + families_len) * Iovalue.sizeof_long in
67          (couples_array_pos, descends_array_pos, pos, families_len)
68      | "descends" ->
69          let pos =
70            (3 * persons_len + 2 * families_len) * Iovalue.sizeof_long
71          in
72          (descends_array_pos, strings_array_pos, pos, families_len)
73      | "strings" ->
74          let pos =
75            (3 * persons_len + 3 * families_len) * Iovalue.sizeof_long
76          in
77          (strings_array_pos, in_channel_length ic, pos, strings_len)
78      | _ -> failwith ("just copy " ^ what) ]
79    in
80    let shift = pos_out oc - beg_pos in
81    seek_in ic beg_pos;
82    let rec loop pos =
83      if pos = end_pos then close_in ic
84      else do { output_char oc (input_char ic); loop (pos + 1) }
85    in
86    loop beg_pos;
87    seek_in ic_acc beg_acc_pos;
88    let rec loop len =
89      if len = array_len then close_in ic_acc
90      else do {
91        output_binary_int oc_acc (input_binary_int ic_acc + shift);
92        loop (len + 1)
93      }
94    in
95    loop 0;
96  }
97;
98
99value make_name_index base =
100  let t = Array.make table_size [| |] in
101  let add_name key valu =
102    let key = Name.crush (Name.abbrev key) in
103    let i = Hashtbl.hash key mod Array.length t in
104    if array_mem valu t.(i) then ()
105    else t.(i) := Array.append [| valu |] t.(i)
106  in
107  let rec add_names ip =
108    fun
109    [ [] -> ()
110    | [n :: nl] -> do { add_name n ip; add_names ip nl } ]
111  in
112  do {
113    for i = 0 to base.data.persons.len - 1 do {
114      let p = base.data.persons.get i in
115      let first_name = p_first_name base p in
116      let surname = p_surname base p in
117      if first_name <> "?" && surname <> "?" then
118        let names =
119          [Name.lower (first_name ^ " " ^ surname) ::
120           Dutil.dsk_person_misc_names base p (fun p -> p.titles)]
121        in
122        add_names p.key_index names
123      else ();
124    };
125    t
126  }
127;
128
129value create_name_index oc_inx oc_inx_acc base =
130  let ni = make_name_index base in
131  let bpos = pos_out oc_inx in
132  do {
133    output_value_no_sharing oc_inx (ni : name_index_data);
134    let epos =
135      Iovalue.output_array_access oc_inx_acc (Array.get ni) (Array.length ni)
136        bpos
137    in
138    if epos <> pos_out oc_inx then count_error epos (pos_out oc_inx)
139    else ()
140  }
141;
142
143value add_name t key valu =
144  let key = Name.crush_lower key in
145  let i = Hashtbl.hash key mod Array.length t in
146  if array_mem valu t.(i) then ()
147  else t.(i) := Array.append [| valu |] t.(i)
148;
149
150value make_strings_of_fsname base =
151  let t = Array.make table_size [| |] in
152  do {
153    for i = 0 to base.data.persons.len - 1 do {
154      let p = poi base (Adef.iper_of_int i) in
155      let first_name = p_first_name base p in
156      let surname = p_surname base p in
157      if first_name <> "?" then add_name t first_name p.first_name else ();
158      if surname <> "?" then do {
159        add_name t surname p.surname;
160        List.iter (fun sp -> add_name t sp p.surname)
161          (surnames_pieces surname);
162      }
163      else ();
164    };
165    t
166  }
167;
168
169value create_strings_of_fsname oc_inx oc_inx_acc base =
170  let t = make_strings_of_fsname base in
171  let bpos = pos_out oc_inx in
172  do {
173    output_value_no_sharing oc_inx (t : strings_of_fsname);
174    let epos =
175      Iovalue.output_array_access oc_inx_acc (Array.get t) (Array.length t)
176        bpos
177    in
178    if epos <> pos_out oc_inx then count_error epos (pos_out oc_inx)
179    else ()
180  }
181;
182
183value is_prime a =
184  loop 2 where rec loop b =
185    if a / b < b then True else if a mod b = 0 then False else loop (b + 1)
186;
187
188value rec prime_after n = if is_prime n then n else prime_after (n + 1);
189
190value output_strings_hash oc2 base =
191  let () = base.data.strings.load_array () in
192  let strings_array = base.data.strings in
193  let taba =
194    Array.make
195      (min Sys.max_array_length
196        (prime_after (max 2 (10 * strings_array.len))))
197      (-1)
198  in
199  let tabl = Array.make strings_array.len (-1) in
200  do {
201    for i = 0 to strings_array.len - 1 do {
202      let ia = Hashtbl.hash (base.data.strings.get i) mod Array.length taba in
203      tabl.(i) := taba.(ia);
204      taba.(ia) := i;
205    };
206    output_binary_int oc2 (Array.length taba);
207    output_binary_int oc2 0;
208    output_binary_int oc2 0;
209    for i = 0 to Array.length taba - 1 do {
210      output_binary_int oc2 taba.(i)
211    };
212    for i = 0 to Array.length tabl - 1 do {
213      output_binary_int oc2 tabl.(i)
214    };
215  }
216;
217
218value output_surname_index oc2 base tmp_snames_inx tmp_snames_dat =
219  let module IstrTree =
220    Btree.Make
221      (struct
222         type t = dsk_istr;
223         value compare = compare_istr_fun base.data;
224       end)
225  in
226  let bt = ref IstrTree.empty in
227  do {
228    for i = 0 to base.data.persons.len - 1 do {
229      let p = poi base (Adef.iper_of_int i) in
230      let a =
231        try IstrTree.find p.surname bt.val with [ Not_found -> [] ]
232      in
233      bt.val := IstrTree.add p.surname [p.key_index :: a] bt.val
234    };
235    (* obsolete table: saved by compatibility with GeneWeb versions <= 4.09,
236       i.e. the created database can be still read by these versions but this
237       table will not be used in versions >= 4.10 *)
238    output_value_no_sharing oc2 (bt.val : IstrTree.t (list iper));
239    (* new table created from version >= 4.10 *)
240    let oc_sn_dat = Secure.open_out_bin tmp_snames_dat in
241    let bt2 =
242      IstrTree.map
243        (fun ipl ->
244           let i = pos_out oc_sn_dat in
245           do {
246             output_binary_int oc_sn_dat (List.length ipl);
247             List.iter
248               (fun ip -> output_binary_int oc_sn_dat (Adef.int_of_iper ip))
249               ipl;
250             i
251           })
252        bt.val
253    in
254    close_out oc_sn_dat;
255    let oc_sn_inx = Secure.open_out_bin tmp_snames_inx in
256    output_value_no_sharing oc_sn_inx (bt2 : IstrTree.t int);
257    close_out oc_sn_inx;
258  }
259;
260
261value output_first_name_index oc2 base tmp_fnames_inx tmp_fnames_dat =
262  let module IstrTree =
263    Btree.Make
264      (struct
265         type t = dsk_istr;
266         value compare = compare_istr_fun base.data;
267       end)
268  in
269  let bt = ref IstrTree.empty in
270  do {
271    for i = 0 to base.data.persons.len - 1 do {
272      let p = poi base (Adef.iper_of_int i) in
273      let a =
274        try IstrTree.find p.first_name bt.val with [ Not_found -> [] ]
275      in
276      bt.val := IstrTree.add p.first_name [p.key_index :: a] bt.val
277    };
278    (* obsolete table: saved by compatibility with GeneWeb versions <= 4.09,
279       i.e. the created database can be still read by these versions but this
280       table will not be used in versions >= 4.10 *)
281    output_value_no_sharing oc2 (bt.val : IstrTree.t (list iper));
282    (* new table created from version >= 4.10 *)
283    let oc_fn_dat = Secure.open_out_bin tmp_fnames_dat in
284    let bt2 =
285      IstrTree.map
286        (fun ipl ->
287           let i = pos_out oc_fn_dat in
288           do {
289             output_binary_int oc_fn_dat (List.length ipl);
290             List.iter
291               (fun ip -> output_binary_int oc_fn_dat (Adef.int_of_iper ip))
292               ipl;
293             i
294           })
295        bt.val
296    in
297    close_out oc_fn_dat;
298    let oc_fn_inx = Secure.open_out_bin tmp_fnames_inx in
299    output_value_no_sharing oc_fn_inx (bt2 : IstrTree.t int);
300    close_out oc_fn_inx;
301  }
302;
303
304value gen_output no_patches bname base =
305  let bname =
306    if Filename.check_suffix bname ".gwb" then bname else bname ^ ".gwb"
307  in
308  do {
309    try Unix.mkdir bname 0o755 with _ -> ();
310    let tmp_base = Filename.concat bname "1base" in
311    let tmp_base_acc = Filename.concat bname "1base.acc" in
312    let tmp_names_inx = Filename.concat bname "1names.inx" in
313    let tmp_names_acc = Filename.concat bname "1names.acc" in
314    let tmp_snames_inx = Filename.concat bname "1snames.inx" in
315    let tmp_snames_dat = Filename.concat bname "1snames.dat" in
316    let tmp_fnames_inx = Filename.concat bname "1fnames.inx" in
317    let tmp_fnames_dat = Filename.concat bname "1fnames.dat" in
318    let tmp_strings_inx = Filename.concat bname "1strings.inx" in
319    let tmp_notes = Filename.concat bname "1notes" in
320    let tmp_notes_d = Filename.concat bname "1notes_d" in
321    if not no_patches then do {
322      load_ascends_array base;
323      load_unions_array base;
324      load_couples_array base;
325      load_descends_array base;
326      load_strings_array base;
327    }
328    else ();
329    let oc = Secure.open_out_bin tmp_base in
330    let oc_acc = Secure.open_out_bin tmp_base_acc in
331    let output_array arrname arr =
332      let bpos = pos_out oc in
333      do {
334        Printf.eprintf "*** saving %s array\n" arrname;
335        flush stderr;
336        arr.output_array oc;
337        let epos = Iovalue.output_array_access oc_acc arr.get arr.len bpos in
338        if epos <> pos_out oc then count_error epos (pos_out oc) else ()
339      }
340    in
341    try
342      do {
343        output_string oc
344          (if utf_8_db.val then magic_gwb else magic_gwb_iso_8859_1);
345        output_binary_int oc base.data.persons.len;
346        output_binary_int oc base.data.families.len;
347        output_binary_int oc base.data.strings.len;
348        let array_start_indexes = pos_out oc in
349        output_binary_int oc 0;
350        output_binary_int oc 0;
351        output_binary_int oc 0;
352        output_binary_int oc 0;
353        output_binary_int oc 0;
354        output_binary_int oc 0;
355        output_binary_int oc 0;
356        output_value_no_sharing oc (base.data.bnotes.norigin_file : string);
357        let persons_array_pos = pos_out oc in
358        if not no_patches then output_array "persons" base.data.persons
359        else just_copy bname "persons" oc oc_acc;
360        let ascends_array_pos = pos_out oc in
361        if not no_patches then () else trace "saving ascends";
362        output_array "ascends" base.data.ascends;
363        let unions_array_pos = pos_out oc in
364        if not no_patches then output_array "unions" base.data.unions
365        else just_copy bname "unions" oc oc_acc;
366        let families_array_pos = pos_out oc in
367        if not no_patches then output_array "families" base.data.families
368        else just_copy bname "families" oc oc_acc;
369        let couples_array_pos = pos_out oc in
370        if not no_patches then output_array "couples" base.data.couples
371        else just_copy bname "couples" oc oc_acc;
372        let descends_array_pos = pos_out oc in
373        if not no_patches then output_array "descends" base.data.descends
374        else just_copy bname "descends" oc oc_acc;
375        let strings_array_pos = pos_out oc in
376        if not no_patches then output_array "strings" base.data.strings
377        else just_copy bname "strings" oc oc_acc;
378        seek_out oc array_start_indexes;
379        output_binary_int oc persons_array_pos;
380        output_binary_int oc ascends_array_pos;
381        output_binary_int oc unions_array_pos;
382        output_binary_int oc families_array_pos;
383        output_binary_int oc couples_array_pos;
384        output_binary_int oc descends_array_pos;
385        output_binary_int oc strings_array_pos;
386        base.data.families.clear_array ();
387        base.data.descends.clear_array ();
388        close_out oc;
389        close_out oc_acc;
390        if not no_patches then
391          let oc_inx = Secure.open_out_bin tmp_names_inx in
392          let oc_inx_acc = Secure.open_out_bin tmp_names_acc in
393          let oc2 = Secure.open_out_bin tmp_strings_inx in
394          try
395            do {
396              trace "create name index";
397              output_binary_int oc_inx 0;
398              create_name_index oc_inx oc_inx_acc base;
399              base.data.ascends.clear_array ();
400              base.data.unions.clear_array ();
401              base.data.couples.clear_array ();
402              if save_mem.val then do { trace "compacting"; Gc.compact () }
403              else ();
404              let surname_or_first_name_pos = pos_out oc_inx in
405              trace "create strings of fsname";
406              create_strings_of_fsname oc_inx oc_inx_acc base;
407              seek_out oc_inx 0;
408              output_binary_int oc_inx surname_or_first_name_pos;
409              close_out oc_inx;
410              close_out oc_inx_acc;
411              if save_mem.val then do { trace "compacting"; Gc.compact () }
412              else ();
413              trace "create string index";
414              output_strings_hash oc2 base;
415              if save_mem.val then do { trace "compacting"; Gc.compact () }
416              else ();
417              let surname_pos = pos_out oc2 in
418              trace "create surname index";
419              output_surname_index oc2 base tmp_snames_inx tmp_snames_dat;
420              if save_mem.val then do {
421                trace "compacting"; Gc.compact ()
422              }
423              else ();
424              let first_name_pos = pos_out oc2 in
425              trace "create first name index";
426              output_first_name_index oc2 base tmp_fnames_inx tmp_fnames_dat;
427              seek_out oc2 int_size;
428              output_binary_int oc2 surname_pos;
429              output_binary_int oc2 first_name_pos;
430              let s = base.data.bnotes.nread "" RnAll in
431              if s = "" then ()
432              else do {
433                let oc_not = Secure.open_out tmp_notes in
434                output_string oc_not s;
435                close_out oc_not;
436              };
437              close_out oc2;
438              List.iter
439                (fun f ->
440                   let s = base.data.bnotes.nread f RnAll in
441                   let fname = Filename.concat tmp_notes_d (f ^ ".txt") in
442                   do {
443                     mkdir_p (Filename.dirname fname);
444                     let oc = open_out fname in
445                     output_string oc s;
446                     close_out oc;
447                   })
448                (List.rev (base.data.bnotes.efiles ()));
449            }
450          with e ->
451            do {
452              try close_out oc_inx with _ -> ();
453              try close_out oc_inx_acc with _ -> ();
454              try close_out oc2 with _ -> ();
455              raise e
456            }
457        else ();
458        trace "ok";
459      }
460    with e ->
461      do {
462        try close_out oc with _ -> ();
463        try close_out oc_acc with _ -> ();
464        remove_file tmp_base;
465        remove_file tmp_base_acc;
466        if not no_patches then do {
467          remove_file tmp_names_inx;
468          remove_file tmp_names_acc;
469          remove_file tmp_strings_inx;
470          remove_dir tmp_notes_d;
471        }
472        else ();
473        raise e
474      };
475    close_base base;
476    remove_file (Filename.concat bname "base");
477    Sys.rename tmp_base (Filename.concat bname "base");
478    remove_file (Filename.concat bname "base.acc");
479    Sys.rename tmp_base_acc (Filename.concat bname "base.acc");
480    if not no_patches then do {
481      remove_file (Filename.concat bname "names.inx");
482      Sys.rename tmp_names_inx (Filename.concat bname "names.inx");
483      remove_file (Filename.concat bname "names.acc");
484      Sys.rename tmp_names_acc (Filename.concat bname "names.acc");
485      remove_file (Filename.concat bname "snames.dat");
486      Sys.rename tmp_snames_dat (Filename.concat bname "snames.dat");
487      remove_file (Filename.concat bname "snames.inx");
488      Sys.rename tmp_snames_inx (Filename.concat bname "snames.inx");
489      remove_file (Filename.concat bname "fnames.dat");
490      Sys.rename tmp_fnames_dat (Filename.concat bname "fnames.dat");
491      remove_file (Filename.concat bname "fnames.inx");
492      Sys.rename tmp_fnames_inx (Filename.concat bname "fnames.inx");
493      remove_file (Filename.concat bname "strings.inx");
494      Sys.rename tmp_strings_inx (Filename.concat bname "strings.inx");
495      remove_file (Filename.concat bname "notes");
496      if Sys.file_exists tmp_notes then
497        Sys.rename tmp_notes (Filename.concat bname "notes")
498      else ();
499      if Sys.file_exists tmp_notes_d then do {
500        let notes_d = Filename.concat bname "notes_d" in
501        remove_dir notes_d;
502        Sys.rename tmp_notes_d notes_d;
503      }
504      else ();
505      remove_file (Filename.concat bname "patches");
506      remove_file (Filename.concat bname "patches~");
507      remove_file (Filename.concat bname "tstab");
508      remove_file (Filename.concat bname "tstab_visitor");
509      remove_file (Filename.concat bname "restrict")
510    }
511    else ();
512  }
513;
514
515value output = gen_output False;
516