1discard """ 2action: compile 3""" 4 5# See this page for info about the format https://wikitech.wikimedia.org/wiki/Analytics/Data/Pagecounts-all-sites 6import tables, parseutils, strutils, threadpool 7 8const filename = "pagecounts-20160101-050000" 9 10type 11 Stats = ref object 12 projectName, pageTitle: string 13 requests, contentSize: int 14 15proc `$`(stats: Stats): string = 16 "(projectName: $#, pageTitle: $#, requests: $#, contentSize: $#)" % [ 17 stats.projectName, stats.pageTitle, $stats.requests, $stats.contentSize 18 ] 19 20proc parse(chunk: string): Stats = 21 # Each line looks like: en Main_Page 242332 4737756101 22 result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0) 23 24 var projectName = "" 25 var pageTitle = "" 26 var requests = "" 27 var contentSize = "" 28 for line in chunk.splitLines: 29 var i = 0 30 projectName.setLen(0) 31 i.inc parseUntil(line, projectName, Whitespace, i) 32 i.inc skipWhitespace(line, i) 33 pageTitle.setLen(0) 34 i.inc parseUntil(line, pageTitle, Whitespace, i) 35 i.inc skipWhitespace(line, i) 36 requests.setLen(0) 37 i.inc parseUntil(line, requests, Whitespace, i) 38 i.inc skipWhitespace(line, i) 39 contentSize.setLen(0) 40 i.inc parseUntil(line, contentSize, Whitespace, i) 41 i.inc skipWhitespace(line, i) 42 43 if requests.len == 0 or contentSize.len == 0: 44 # Ignore lines with either of the params that are empty. 45 continue 46 47 let requestsInt = requests.parseInt 48 if requestsInt > result.requests and projectName == "en": 49 result = Stats( 50 projectName: projectName, 51 pageTitle: pageTitle, 52 requests: requestsInt, 53 contentSize: contentSize.parseInt 54 ) 55 56proc readChunks(filename: string, chunksize = 1000000): Stats = 57 result = Stats(projectName: "", pageTitle: "", requests: 0, contentSize: 0) 58 var file = open(filename) 59 var responses = newSeq[FlowVar[Stats]]() 60 var buffer = newString(chunksize) 61 var oldBufferLen = 0 62 while not endOfFile(file): 63 let readSize = file.readChars(buffer, oldBufferLen, chunksize - oldBufferLen) + oldBufferLen 64 var chunkLen = readSize 65 66 while chunkLen >= 0 and buffer[chunkLen - 1] notin NewLines: 67 # Find where the last line ends 68 chunkLen.dec 69 70 responses.add(spawn parse(buffer[0 ..< chunkLen])) 71 oldBufferLen = readSize - chunkLen 72 buffer[0 ..< oldBufferLen] = buffer[readSize - oldBufferLen .. ^1] 73 74 for resp in responses: 75 let statistic = ^resp 76 if statistic.requests > result.requests: 77 result = statistic 78 79 file.close() 80 81 82when true: 83 echo readChunks(filename) 84