1-- HASHY McHASHFACE
2
3local M = {}
4_G.d = M
5
6
7local function setdefault(table, key)
8  local val = table[key]
9  if val == nil then
10    val = {}
11    table[key] = val
12  end
13  return val
14end
15
16function M.build_pos_hash(strings)
17  local len_buckets = {}
18  local maxlen = 0
19  for _,s in ipairs(strings) do
20    table.insert(setdefault(len_buckets, #s),s)
21    if #s > maxlen then maxlen = #s end
22  end
23
24  local len_pos_buckets = {}
25  local worst_buck_size = 0
26
27  for len = 1,maxlen do
28    local strs = len_buckets[len]
29    if strs then
30      -- the best position so far generates `best_bucket`
31      -- with `minsize` worst case collisions
32      local bestpos, minsize, best_bucket = nil, #strs*2, nil
33      for pos = 1,len do
34        local try_bucket = {}
35        for _,str in ipairs(strs) do
36          local poschar = string.sub(str, pos, pos)
37          table.insert(setdefault(try_bucket, poschar), str)
38        end
39        local maxsize = 1
40        for _,pos_strs in pairs(try_bucket) do
41          maxsize = math.max(maxsize, #pos_strs)
42        end
43        if maxsize < minsize then
44          bestpos = pos
45          minsize = maxsize
46          best_bucket = try_bucket
47        end
48      end
49      len_pos_buckets[len] = {bestpos, best_bucket}
50      worst_buck_size = math.max(worst_buck_size, minsize)
51    end
52  end
53  return len_pos_buckets, maxlen, worst_buck_size
54end
55
56function M.switcher(put, tab, maxlen, worst_buck_size)
57  local neworder = {}
58  put "  switch (len) {\n"
59  local bucky = worst_buck_size > 1
60  for len = 1,maxlen do
61    local vals = tab[len]
62    if vals then
63      put("    case "..len..": ")
64      local pos, posbuck = unpack(vals)
65      local keys = vim.tbl_keys(posbuck)
66      if #keys > 1 then
67        table.sort(keys)
68        put("switch (str["..(pos-1).."]) {\n")
69        for _,c in ipairs(keys) do
70          local buck = posbuck[c]
71          local startidx = #neworder
72          vim.list_extend(neworder, buck)
73          local endidx = #neworder
74          put("      case '"..c.."': ")
75          put("low = "..startidx.."; ")
76          if bucky then put("high = "..endidx.."; ") end
77          put "break;\n"
78        end
79        put "      default: break;\n"
80        put "    }\n  "
81      else
82          local startidx = #neworder
83          table.insert(neworder, posbuck[keys[1]][1])
84          local endidx = #neworder
85          put("low = "..startidx.."; ")
86          if bucky then put("high = "..endidx.."; ") end
87      end
88      put "  break;\n"
89    end
90  end
91  put "    default: break;\n"
92  put "  }\n"
93  return neworder
94end
95
96function M.hashy_hash(name, strings, access)
97  local stats = {}
98  local put = function(str) table.insert(stats, str) end
99  local len_pos_buckets, maxlen, worst_buck_size = M.build_pos_hash(strings)
100  put("int "..name.."_hash(const char *str, size_t len)\n{\n")
101  if worst_buck_size > 1 then
102    put("  int low = 0, high = 0;\n")
103  else
104    put("  int low = -1;\n")
105  end
106  local neworder = M.switcher(put, len_pos_buckets, maxlen, worst_buck_size)
107  if worst_buck_size > 1 then
108    error [[ not implemented yet ]] -- TODO(bfredl)
109  else
110    put [[
111  if (low < 0) {
112    return -1;
113  }
114  ]]
115    put("if(memcmp(str, "..access("low")..", len)) {\n    return -1;\n  }\n")
116    put "  return low;\n"
117    put "}\n\n"
118  end
119  return neworder, table.concat(stats)
120end
121
122return M
123