1discard """
2action: compile
3"""
4
5# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites
6import tables, parseutils, strutils, threadpool
7
8const filename = "pagecounts-20160101-050000"
9
10type
11  Stats = ref object
12    projectName, pageTitle: string
13    requests, contentSize: int
14
15proc `$`(stats: Stats): string =
16  "(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [
17    stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize
18  ]
19
20proc parse(chunk: string): Stats =
21  # Each line looks like: en Main_Page 242332 4737756101
22  result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
23
24  var projectName = ""
25  var pageTitle = ""
26  var requests = ""
27  var contentSize = ""
28  for line in chunk.splitLines:
29    var i = 0
30    projectName.setLen(0)
31    i.inc parseUntil(line, projectName, Whitespace, i)
32    i.inc skipWhitespace(line, i)
33    pageTitle.setLen(0)
34    i.inc parseUntil(line, pageTitle, Whitespace, i)
35    i.inc skipWhitespace(line, i)
36    requests.setLen(0)
37    i.inc parseUntil(line, requests, Whitespace, i)
38    i.inc skipWhitespace(line, i)
39    contentSize.setLen(0)
40    i.inc parseUntil(line, contentSize, Whitespace, i)
41    i.inc skipWhitespace(line, i)
42
43    if requests.len == 0 or contentSize.len == 0:
44      # Ignore lines with either of the params that are empty.
45      continue
46
47    let requestsInt = requests.parseInt
48    if requestsInt > result.requests and projectName == "en":
49      result = Stats(
50        projectName: projectName,
51        pageTitle: pageTitle,
52        requests: requestsInt,
53        contentSize: contentSize.parseInt
54      )
55
56proc readChunks(filename: string, chunksize = 1000000): Stats =
57  result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0)
58  var file = open(filename)
59  var responses = newSeq[FlowVar[Stats]]()
60  var buffer = newString(chunksize)
61  var oldBufferLen = 0
62  while not endOfFile(file):
63    let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen
64    var chunkLen = readSize
65
66    while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines:
67      # Find where the last line ends
68      chunkLen.dec
69
70    responses.add(spawn parse(buffer[0 ..< chunkLen]))
71    oldBufferLen = readSize - chunkLen
72    buffer[0 ..< oldBufferLen] = buffer[readSize - oldBufferLen .. ^1]
73
74  for resp in responses:
75    let statistic = ^resp
76    if statistic.requests > result.requests:
77      result = statistic
78
79  file.close()
80
81
82when true:
83  echo readChunks(filename)
84