1#!/bin/sh 2# 3# Builds a corpus from all small json files in the source directory. 4# The files are renamed to the sha1 of their content, and suffixed 5# .json. The files are zipped into a flat file named corpus.zip 6 7set -eu 8 9tmp=$(mktemp -d) 10 11root=$(readlink -f "$(dirname "$0")/..") 12 13find $root -type f -size -4k -name "*.json" | while read -r json; do 14 cp "$json" "$tmp"/$(sha1sum < "$json" |cut -f1 -d' ').json 15done 16 17zip --quiet --junk-paths -r corpus.zip "$tmp" 18rm -rf "$tmp" 19