1(* $Id: outbase.ml,v 5.21 2007-01-19 01:53:16 ddr Exp $ *) 2(* Copyright (c) 2006-2007 INRIA *) 3 4open Dbdisk; 5open Dutil; 6open Def; 7open Mutil; 8 9value load_ascends_array base = base.data.ascends.load_array (); 10value load_unions_array base = base.data.unions.load_array (); 11value load_couples_array base = base.data.couples.load_array (); 12value load_descends_array base = base.data.descends.load_array (); 13value load_strings_array base = base.data.strings.load_array (); 14value close_base base = base.func.cleanup (); 15 16value save_mem = ref False; 17 18value trace s = 19 if verbose.val then do { Printf.eprintf "*** %s\n" s; flush stderr } 20 else () 21; 22 23value count_error computed found = 24 do { 25 Printf.eprintf "Count error. Computed %d. Found %d.\n" computed found; 26 flush stderr; 27 exit 2 28 } 29; 30 31value just_copy bname what oc oc_acc = 32 do { 33 Printf.eprintf "*** copying %s\n" what; 34 flush stderr; 35 let ic = 36 let ic = Secure.open_in_bin (Filename.concat bname "base") in 37 do { check_magic ic; ic } 38 in 39 let ic_acc = Secure.open_in_bin (Filename.concat bname "base.acc") in 40 let persons_len = input_binary_int ic in 41 let families_len = input_binary_int ic in 42 let strings_len = input_binary_int ic in 43 let persons_array_pos = input_binary_int ic in 44 let ascends_array_pos = input_binary_int ic in 45 let unions_array_pos = input_binary_int ic in 46 let families_array_pos = input_binary_int ic in 47 let couples_array_pos = input_binary_int ic in 48 let descends_array_pos = input_binary_int ic in 49 let strings_array_pos = input_binary_int ic in 50 let _ (*norigin_file*) = input_value ic in 51 let (beg_pos, end_pos, beg_acc_pos, array_len) = 52 match what with 53 [ "persons" -> 54 let pos = 0 in 55 (persons_array_pos, ascends_array_pos, pos, persons_len) 56 | "ascends" -> 57 let pos = persons_len * Iovalue.sizeof_long in 58 (ascends_array_pos, unions_array_pos, pos, persons_len) 59 | "unions" -> 60 let pos = 2 * persons_len * Iovalue.sizeof_long in 61 (unions_array_pos, families_array_pos, pos, persons_len) 62 | "families" -> 63 let pos = 3 * persons_len * Iovalue.sizeof_long in 64 (families_array_pos, couples_array_pos, pos, families_len) 65 | "couples" -> 66 let pos = (3 * persons_len + families_len) * Iovalue.sizeof_long in 67 (couples_array_pos, descends_array_pos, pos, families_len) 68 | "descends" -> 69 let pos = 70 (3 * persons_len + 2 * families_len) * Iovalue.sizeof_long 71 in 72 (descends_array_pos, strings_array_pos, pos, families_len) 73 | "strings" -> 74 let pos = 75 (3 * persons_len + 3 * families_len) * Iovalue.sizeof_long 76 in 77 (strings_array_pos, in_channel_length ic, pos, strings_len) 78 | _ -> failwith ("just copy " ^ what) ] 79 in 80 let shift = pos_out oc - beg_pos in 81 seek_in ic beg_pos; 82 let rec loop pos = 83 if pos = end_pos then close_in ic 84 else do { output_char oc (input_char ic); loop (pos + 1) } 85 in 86 loop beg_pos; 87 seek_in ic_acc beg_acc_pos; 88 let rec loop len = 89 if len = array_len then close_in ic_acc 90 else do { 91 output_binary_int oc_acc (input_binary_int ic_acc + shift); 92 loop (len + 1) 93 } 94 in 95 loop 0; 96 } 97; 98 99value make_name_index base = 100 let t = Array.make table_size [| |] in 101 let add_name key valu = 102 let key = Name.crush (Name.abbrev key) in 103 let i = Hashtbl.hash key mod Array.length t in 104 if array_mem valu t.(i) then () 105 else t.(i) := Array.append [| valu |] t.(i) 106 in 107 let rec add_names ip = 108 fun 109 [ [] -> () 110 | [n :: nl] -> do { add_name n ip; add_names ip nl } ] 111 in 112 do { 113 for i = 0 to base.data.persons.len - 1 do { 114 let p = base.data.persons.get i in 115 let first_name = p_first_name base p in 116 let surname = p_surname base p in 117 if first_name <> "?" && surname <> "?" then 118 let names = 119 [Name.lower (first_name ^ " " ^ surname) :: 120 Dutil.dsk_person_misc_names base p (fun p -> p.titles)] 121 in 122 add_names p.key_index names 123 else (); 124 }; 125 t 126 } 127; 128 129value create_name_index oc_inx oc_inx_acc base = 130 let ni = make_name_index base in 131 let bpos = pos_out oc_inx in 132 do { 133 output_value_no_sharing oc_inx (ni : name_index_data); 134 let epos = 135 Iovalue.output_array_access oc_inx_acc (Array.get ni) (Array.length ni) 136 bpos 137 in 138 if epos <> pos_out oc_inx then count_error epos (pos_out oc_inx) 139 else () 140 } 141; 142 143value add_name t key valu = 144 let key = Name.crush_lower key in 145 let i = Hashtbl.hash key mod Array.length t in 146 if array_mem valu t.(i) then () 147 else t.(i) := Array.append [| valu |] t.(i) 148; 149 150value make_strings_of_fsname base = 151 let t = Array.make table_size [| |] in 152 do { 153 for i = 0 to base.data.persons.len - 1 do { 154 let p = poi base (Adef.iper_of_int i) in 155 let first_name = p_first_name base p in 156 let surname = p_surname base p in 157 if first_name <> "?" then add_name t first_name p.first_name else (); 158 if surname <> "?" then do { 159 add_name t surname p.surname; 160 List.iter (fun sp -> add_name t sp p.surname) 161 (surnames_pieces surname); 162 } 163 else (); 164 }; 165 t 166 } 167; 168 169value create_strings_of_fsname oc_inx oc_inx_acc base = 170 let t = make_strings_of_fsname base in 171 let bpos = pos_out oc_inx in 172 do { 173 output_value_no_sharing oc_inx (t : strings_of_fsname); 174 let epos = 175 Iovalue.output_array_access oc_inx_acc (Array.get t) (Array.length t) 176 bpos 177 in 178 if epos <> pos_out oc_inx then count_error epos (pos_out oc_inx) 179 else () 180 } 181; 182 183value is_prime a = 184 loop 2 where rec loop b = 185 if a / b < b then True else if a mod b = 0 then False else loop (b + 1) 186; 187 188value rec prime_after n = if is_prime n then n else prime_after (n + 1); 189 190value output_strings_hash oc2 base = 191 let () = base.data.strings.load_array () in 192 let strings_array = base.data.strings in 193 let taba = 194 Array.make 195 (min Sys.max_array_length 196 (prime_after (max 2 (10 * strings_array.len)))) 197 (-1) 198 in 199 let tabl = Array.make strings_array.len (-1) in 200 do { 201 for i = 0 to strings_array.len - 1 do { 202 let ia = Hashtbl.hash (base.data.strings.get i) mod Array.length taba in 203 tabl.(i) := taba.(ia); 204 taba.(ia) := i; 205 }; 206 output_binary_int oc2 (Array.length taba); 207 output_binary_int oc2 0; 208 output_binary_int oc2 0; 209 for i = 0 to Array.length taba - 1 do { 210 output_binary_int oc2 taba.(i) 211 }; 212 for i = 0 to Array.length tabl - 1 do { 213 output_binary_int oc2 tabl.(i) 214 }; 215 } 216; 217 218value output_surname_index oc2 base tmp_snames_inx tmp_snames_dat = 219 let module IstrTree = 220 Btree.Make 221 (struct 222 type t = dsk_istr; 223 value compare = compare_istr_fun base.data; 224 end) 225 in 226 let bt = ref IstrTree.empty in 227 do { 228 for i = 0 to base.data.persons.len - 1 do { 229 let p = poi base (Adef.iper_of_int i) in 230 let a = 231 try IstrTree.find p.surname bt.val with [ Not_found -> [] ] 232 in 233 bt.val := IstrTree.add p.surname [p.key_index :: a] bt.val 234 }; 235 (* obsolete table: saved by compatibility with GeneWeb versions <= 4.09, 236 i.e. the created database can be still read by these versions but this 237 table will not be used in versions >= 4.10 *) 238 output_value_no_sharing oc2 (bt.val : IstrTree.t (list iper)); 239 (* new table created from version >= 4.10 *) 240 let oc_sn_dat = Secure.open_out_bin tmp_snames_dat in 241 let bt2 = 242 IstrTree.map 243 (fun ipl -> 244 let i = pos_out oc_sn_dat in 245 do { 246 output_binary_int oc_sn_dat (List.length ipl); 247 List.iter 248 (fun ip -> output_binary_int oc_sn_dat (Adef.int_of_iper ip)) 249 ipl; 250 i 251 }) 252 bt.val 253 in 254 close_out oc_sn_dat; 255 let oc_sn_inx = Secure.open_out_bin tmp_snames_inx in 256 output_value_no_sharing oc_sn_inx (bt2 : IstrTree.t int); 257 close_out oc_sn_inx; 258 } 259; 260 261value output_first_name_index oc2 base tmp_fnames_inx tmp_fnames_dat = 262 let module IstrTree = 263 Btree.Make 264 (struct 265 type t = dsk_istr; 266 value compare = compare_istr_fun base.data; 267 end) 268 in 269 let bt = ref IstrTree.empty in 270 do { 271 for i = 0 to base.data.persons.len - 1 do { 272 let p = poi base (Adef.iper_of_int i) in 273 let a = 274 try IstrTree.find p.first_name bt.val with [ Not_found -> [] ] 275 in 276 bt.val := IstrTree.add p.first_name [p.key_index :: a] bt.val 277 }; 278 (* obsolete table: saved by compatibility with GeneWeb versions <= 4.09, 279 i.e. the created database can be still read by these versions but this 280 table will not be used in versions >= 4.10 *) 281 output_value_no_sharing oc2 (bt.val : IstrTree.t (list iper)); 282 (* new table created from version >= 4.10 *) 283 let oc_fn_dat = Secure.open_out_bin tmp_fnames_dat in 284 let bt2 = 285 IstrTree.map 286 (fun ipl -> 287 let i = pos_out oc_fn_dat in 288 do { 289 output_binary_int oc_fn_dat (List.length ipl); 290 List.iter 291 (fun ip -> output_binary_int oc_fn_dat (Adef.int_of_iper ip)) 292 ipl; 293 i 294 }) 295 bt.val 296 in 297 close_out oc_fn_dat; 298 let oc_fn_inx = Secure.open_out_bin tmp_fnames_inx in 299 output_value_no_sharing oc_fn_inx (bt2 : IstrTree.t int); 300 close_out oc_fn_inx; 301 } 302; 303 304value gen_output no_patches bname base = 305 let bname = 306 if Filename.check_suffix bname ".gwb" then bname else bname ^ ".gwb" 307 in 308 do { 309 try Unix.mkdir bname 0o755 with _ -> (); 310 let tmp_base = Filename.concat bname "1base" in 311 let tmp_base_acc = Filename.concat bname "1base.acc" in 312 let tmp_names_inx = Filename.concat bname "1names.inx" in 313 let tmp_names_acc = Filename.concat bname "1names.acc" in 314 let tmp_snames_inx = Filename.concat bname "1snames.inx" in 315 let tmp_snames_dat = Filename.concat bname "1snames.dat" in 316 let tmp_fnames_inx = Filename.concat bname "1fnames.inx" in 317 let tmp_fnames_dat = Filename.concat bname "1fnames.dat" in 318 let tmp_strings_inx = Filename.concat bname "1strings.inx" in 319 let tmp_notes = Filename.concat bname "1notes" in 320 let tmp_notes_d = Filename.concat bname "1notes_d" in 321 if not no_patches then do { 322 load_ascends_array base; 323 load_unions_array base; 324 load_couples_array base; 325 load_descends_array base; 326 load_strings_array base; 327 } 328 else (); 329 let oc = Secure.open_out_bin tmp_base in 330 let oc_acc = Secure.open_out_bin tmp_base_acc in 331 let output_array arrname arr = 332 let bpos = pos_out oc in 333 do { 334 Printf.eprintf "*** saving %s array\n" arrname; 335 flush stderr; 336 arr.output_array oc; 337 let epos = Iovalue.output_array_access oc_acc arr.get arr.len bpos in 338 if epos <> pos_out oc then count_error epos (pos_out oc) else () 339 } 340 in 341 try 342 do { 343 output_string oc 344 (if utf_8_db.val then magic_gwb else magic_gwb_iso_8859_1); 345 output_binary_int oc base.data.persons.len; 346 output_binary_int oc base.data.families.len; 347 output_binary_int oc base.data.strings.len; 348 let array_start_indexes = pos_out oc in 349 output_binary_int oc 0; 350 output_binary_int oc 0; 351 output_binary_int oc 0; 352 output_binary_int oc 0; 353 output_binary_int oc 0; 354 output_binary_int oc 0; 355 output_binary_int oc 0; 356 output_value_no_sharing oc (base.data.bnotes.norigin_file : string); 357 let persons_array_pos = pos_out oc in 358 if not no_patches then output_array "persons" base.data.persons 359 else just_copy bname "persons" oc oc_acc; 360 let ascends_array_pos = pos_out oc in 361 if not no_patches then () else trace "saving ascends"; 362 output_array "ascends" base.data.ascends; 363 let unions_array_pos = pos_out oc in 364 if not no_patches then output_array "unions" base.data.unions 365 else just_copy bname "unions" oc oc_acc; 366 let families_array_pos = pos_out oc in 367 if not no_patches then output_array "families" base.data.families 368 else just_copy bname "families" oc oc_acc; 369 let couples_array_pos = pos_out oc in 370 if not no_patches then output_array "couples" base.data.couples 371 else just_copy bname "couples" oc oc_acc; 372 let descends_array_pos = pos_out oc in 373 if not no_patches then output_array "descends" base.data.descends 374 else just_copy bname "descends" oc oc_acc; 375 let strings_array_pos = pos_out oc in 376 if not no_patches then output_array "strings" base.data.strings 377 else just_copy bname "strings" oc oc_acc; 378 seek_out oc array_start_indexes; 379 output_binary_int oc persons_array_pos; 380 output_binary_int oc ascends_array_pos; 381 output_binary_int oc unions_array_pos; 382 output_binary_int oc families_array_pos; 383 output_binary_int oc couples_array_pos; 384 output_binary_int oc descends_array_pos; 385 output_binary_int oc strings_array_pos; 386 base.data.families.clear_array (); 387 base.data.descends.clear_array (); 388 close_out oc; 389 close_out oc_acc; 390 if not no_patches then 391 let oc_inx = Secure.open_out_bin tmp_names_inx in 392 let oc_inx_acc = Secure.open_out_bin tmp_names_acc in 393 let oc2 = Secure.open_out_bin tmp_strings_inx in 394 try 395 do { 396 trace "create name index"; 397 output_binary_int oc_inx 0; 398 create_name_index oc_inx oc_inx_acc base; 399 base.data.ascends.clear_array (); 400 base.data.unions.clear_array (); 401 base.data.couples.clear_array (); 402 if save_mem.val then do { trace "compacting"; Gc.compact () } 403 else (); 404 let surname_or_first_name_pos = pos_out oc_inx in 405 trace "create strings of fsname"; 406 create_strings_of_fsname oc_inx oc_inx_acc base; 407 seek_out oc_inx 0; 408 output_binary_int oc_inx surname_or_first_name_pos; 409 close_out oc_inx; 410 close_out oc_inx_acc; 411 if save_mem.val then do { trace "compacting"; Gc.compact () } 412 else (); 413 trace "create string index"; 414 output_strings_hash oc2 base; 415 if save_mem.val then do { trace "compacting"; Gc.compact () } 416 else (); 417 let surname_pos = pos_out oc2 in 418 trace "create surname index"; 419 output_surname_index oc2 base tmp_snames_inx tmp_snames_dat; 420 if save_mem.val then do { 421 trace "compacting"; Gc.compact () 422 } 423 else (); 424 let first_name_pos = pos_out oc2 in 425 trace "create first name index"; 426 output_first_name_index oc2 base tmp_fnames_inx tmp_fnames_dat; 427 seek_out oc2 int_size; 428 output_binary_int oc2 surname_pos; 429 output_binary_int oc2 first_name_pos; 430 let s = base.data.bnotes.nread "" RnAll in 431 if s = "" then () 432 else do { 433 let oc_not = Secure.open_out tmp_notes in 434 output_string oc_not s; 435 close_out oc_not; 436 }; 437 close_out oc2; 438 List.iter 439 (fun f -> 440 let s = base.data.bnotes.nread f RnAll in 441 let fname = Filename.concat tmp_notes_d (f ^ ".txt") in 442 do { 443 mkdir_p (Filename.dirname fname); 444 let oc = open_out fname in 445 output_string oc s; 446 close_out oc; 447 }) 448 (List.rev (base.data.bnotes.efiles ())); 449 } 450 with e -> 451 do { 452 try close_out oc_inx with _ -> (); 453 try close_out oc_inx_acc with _ -> (); 454 try close_out oc2 with _ -> (); 455 raise e 456 } 457 else (); 458 trace "ok"; 459 } 460 with e -> 461 do { 462 try close_out oc with _ -> (); 463 try close_out oc_acc with _ -> (); 464 remove_file tmp_base; 465 remove_file tmp_base_acc; 466 if not no_patches then do { 467 remove_file tmp_names_inx; 468 remove_file tmp_names_acc; 469 remove_file tmp_strings_inx; 470 remove_dir tmp_notes_d; 471 } 472 else (); 473 raise e 474 }; 475 close_base base; 476 remove_file (Filename.concat bname "base"); 477 Sys.rename tmp_base (Filename.concat bname "base"); 478 remove_file (Filename.concat bname "base.acc"); 479 Sys.rename tmp_base_acc (Filename.concat bname "base.acc"); 480 if not no_patches then do { 481 remove_file (Filename.concat bname "names.inx"); 482 Sys.rename tmp_names_inx (Filename.concat bname "names.inx"); 483 remove_file (Filename.concat bname "names.acc"); 484 Sys.rename tmp_names_acc (Filename.concat bname "names.acc"); 485 remove_file (Filename.concat bname "snames.dat"); 486 Sys.rename tmp_snames_dat (Filename.concat bname "snames.dat"); 487 remove_file (Filename.concat bname "snames.inx"); 488 Sys.rename tmp_snames_inx (Filename.concat bname "snames.inx"); 489 remove_file (Filename.concat bname "fnames.dat"); 490 Sys.rename tmp_fnames_dat (Filename.concat bname "fnames.dat"); 491 remove_file (Filename.concat bname "fnames.inx"); 492 Sys.rename tmp_fnames_inx (Filename.concat bname "fnames.inx"); 493 remove_file (Filename.concat bname "strings.inx"); 494 Sys.rename tmp_strings_inx (Filename.concat bname "strings.inx"); 495 remove_file (Filename.concat bname "notes"); 496 if Sys.file_exists tmp_notes then 497 Sys.rename tmp_notes (Filename.concat bname "notes") 498 else (); 499 if Sys.file_exists tmp_notes_d then do { 500 let notes_d = Filename.concat bname "notes_d" in 501 remove_dir notes_d; 502 Sys.rename tmp_notes_d notes_d; 503 } 504 else (); 505 remove_file (Filename.concat bname "patches"); 506 remove_file (Filename.concat bname "patches~"); 507 remove_file (Filename.concat bname "tstab"); 508 remove_file (Filename.concat bname "tstab_visitor"); 509 remove_file (Filename.concat bname "restrict") 510 } 511 else (); 512 } 513; 514 515value output = gen_output False; 516