1# - Manage data files stored outside source tree
2# Use this module to unambiguously reference data files stored outside the
3# source tree and fetch them at build time from arbitrary local and remote
4# content-addressed locations.  Functions provided by this module recognize
5# arguments with the syntax "DATA{<name>}" as references to external data,
6# replace them with full paths to local copies of those data, and create build
7# rules to fetch and update the local copies.
8#
9# The DATA{} syntax is literal and the <name> is a full or relative path
10# within the source tree.  The source tree must contain either a real data
11# file at <name> or a "content link" at <name><ext> containing a hash of the
12# real file using a hash algorithm corresponding to <ext>.  For example, the
13# argument "DATA{img.png}" may be satisfied by either a real "img.png" file in
14# the current source directory or a "img.png.md5" file containing its MD5 sum.
15#
16# The 'ExternalData_Expand_Arguments' function evaluates DATA{} references
17# in its arguments and constructs a new list of arguments:
18#  ExternalData_Expand_Arguments(
19#    <target>   # Name of data management target
20#    <outVar>   # Output variable
21#    [args...]  # Input arguments, DATA{} allowed
22#    )
23# It replaces each DATA{} reference in an argument with the full path of a
24# real data file on disk that will exist after the <target> builds.
25#
26# The 'ExternalData_Add_Test' function wraps around the CMake add_test()
27# command but supports DATA{} references in its arguments:
28#  ExternalData_Add_Test(
29#    <target>   # Name of data management target
30#    ...        # Arguments of add_test(), DATA{} allowed
31#    )
32# It passes its arguments through ExternalData_Expand_Arguments and then
33# invokes add_test() using the results.
34#
35# The 'ExternalData_Add_Target' function creates a custom target to manage
36# local instances of data files stored externally:
37#  ExternalData_Add_Target(
38#    <target>   # Name of data management target
39#    )
40# It creates custom commands in the target as necessary to make data files
41# available for each DATA{} reference previously evaluated by other functions
42# provided by this module.  A list of URL templates must be provided in the
43# variable ExternalData_URL_TEMPLATES using the placeholders "%(algo)" and
44# "%(hash)" in each template.  Data fetch rules try each URL template in order
45# by substituting the hash algorithm name for "%(algo)" and the hash value for
46# "%(hash)".
47#
48# The following hash algorithms are supported:
49#    %(algo)     <ext>     Description
50#    -------     -----     -----------
51#    MD5         .md5      Message-Digest Algorithm 5, RFC 1321
52# Note that the hashes are used only for unique data identification and
53# download verification.  This is not security software.
54#
55# Example usage:
56#   include(ExternalData)
57#   set(ExternalData_URL_TEMPLATES "file:///local/%(algo)/%(hash)"
58#                                  "http://data.org/%(algo)/%(hash)")
59#   ExternalData_Add_Test(MyData
60#     NAME MyTest
61#     COMMAND MyExe DATA{MyInput.png}
62#     )
63#   ExternalData_Add_Target(MyData)
64# When test "MyTest" runs the "DATA{MyInput.png}" argument will be replaced by
65# the full path to a real instance of the data file "MyInput.png" on disk.  If
66# the source tree contains a content link such as "MyInput.png.md5" then the
67# "MyData" target creates a real "MyInput.png" in the build tree.
68#
69# The DATA{} syntax can be told to fetch a file series using the form
70# "DATA{<name>,:}", where the ":" is literal.  If the source tree contains a
71# group of files or content links named like a series then a reference to one
72# member adds rules to fetch all of them.  Although all members of a series
73# are fetched, only the file originally named by the DATA{} argument is
74# substituted for it.  The default configuration recognizes file series names
75# ending with "#.ext", "_#.ext", ".#.ext", or "-#.ext" where "#" is a sequence
76# of decimal digits and ".ext" is any single extension.  Configure it with a
77# regex that parses <number> and <suffix> parts from the end of <name>:
78#  ExternalData_SERIES_PARSE = regex of the form (<number>)(<suffix>)$
79# For more complicated cases set:
80#  ExternalData_SERIES_PARSE = regex with at least two () groups
81#  ExternalData_SERIES_PARSE_PREFIX = <prefix> regex group number, if any
82#  ExternalData_SERIES_PARSE_NUMBER = <number> regex group number
83#  ExternalData_SERIES_PARSE_SUFFIX = <suffix> regex group number
84# Configure series number matching with a regex that matches the
85# <number> part of series members named <prefix><number><suffix>:
86#  ExternalData_SERIES_MATCH = regex matching <number> in all series members
87# Note that the <suffix> of a series does not include a hash-algorithm
88# extension.
89#
90# The DATA{} syntax can alternatively match files associated with the named
91# file and contained in the same directory.  Associated files may be specified
92# by options using the syntax DATA{<name>,<opt1>,<opt2>,...}.  Each option may
93# specify one file by name or specify a regular expression to match file names
94# using the syntax REGEX:<regex>.  For example, the arguments
95#   DATA{MyData/MyInput.mhd,MyInput.img}                   # File pair
96#   DATA{MyData/MyFrames00.png,REGEX:MyFrames[0-9]+\\.png} # Series
97# will pass MyInput.mha and MyFrames00.png on the command line but ensure
98# that the associated files are present next to them.
99#
100# The DATA{} syntax may reference a directory using a trailing slash and a
101# list of associated files.  The form DATA{<name>/,<opt1>,<opt2>,...} adds
102# rules to fetch any files in the directory that match one of the associated
103# file options.  For example, the argument DATA{MyDataDir/,REGEX:.*} will pass
104# the full path to a MyDataDir directory on the command line and ensure that
105# the directory contains files corresponding to every file or content link in
106# the MyDataDir source directory.
107#
108# The variable ExternalData_LINK_CONTENT may be set to the name of a supported
109# hash algorithm to enable automatic conversion of real data files referenced
110# by the DATA{} syntax into content links.  For each such <file> a content
111# link named "<file><ext>" is created.  The original file is renamed to the
112# form ".ExternalData_<algo>_<hash>" to stage it for future transmission to
113# one of the locations in the list of URL templates (by means outside the
114# scope of this module).  The data fetch rule created for the content link
115# will use the staged object if it cannot be found using any URL template.
116#
117# The variable ExternalData_OBJECT_STORES may be set to a list of local
118# directories that store objects using the layout <dir>/%(algo)/%(hash).
119# These directories will be searched first for a needed object.  If the object
120# is not available in any store then it will be fetched remotely using the URL
121# templates and added to the first local store listed.  If no stores are
122# specified the default is a location inside the build tree.
123#
124# The variable ExternalData_SOURCE_ROOT may be set to the highest source
125# directory containing any path named by a DATA{} reference.  The default is
126# CMAKE_SOURCE_DIR.  ExternalData_SOURCE_ROOT and CMAKE_SOURCE_DIR must refer
127# to directories within a single source distribution (e.g. they come together
128# in one tarball).
129#
130# The variable ExternalData_BINARY_ROOT may be set to the directory to hold
131# the real data files named by expanded DATA{} references.  The default is
132# CMAKE_BINARY_DIR.  The directory layout will mirror that of content links
133# under ExternalData_SOURCE_ROOT.
134#
135# Variables ExternalData_TIMEOUT_INACTIVITY and ExternalData_TIMEOUT_ABSOLUTE
136# set the download inactivity and absolute timeouts, in seconds.  The defaults
137# are 60 seconds and 300 seconds, respectively.  Set either timeout to 0
138# seconds to disable enforcement.
139
140#=============================================================================
141# Copyright 2010-2013 Kitware, Inc.
142# All rights reserved.
143#
144# Redistribution and use in source and binary forms, with or without
145# modification, are permitted provided that the following conditions
146# are met:
147#
148# * Redistributions of source code must retain the above copyright
149#   notice, this list of conditions and the following disclaimer.
150#
151# * Redistributions in binary form must reproduce the above copyright
152#   notice, this list of conditions and the following disclaimer in the
153#   documentation and/or other materials provided with the distribution.
154#
155# * Neither the names of Kitware, Inc., the Insight Software Consortium,
156#   nor the names of their contributors may be used to endorse or promote
157#   products derived from this software without specific prior written
158#   permission.
159#
160# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
161# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
162# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
163# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
164# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
165# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
166# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
167# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
168# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
169# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
170# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
171#=============================================================================
172
173function(ExternalData_add_test target)
174  # Expand all arguments as a single string to preserve escaped semicolons.
175  ExternalData_expand_arguments("${target}" testArgs "${ARGN}")
176  add_test(${testArgs})
177endfunction()
178
179function(ExternalData_add_target target)
180  if(NOT ExternalData_URL_TEMPLATES)
181    message(FATAL_ERROR "ExternalData_URL_TEMPLATES is not set!")
182  endif()
183  if(NOT ExternalData_OBJECT_STORES)
184    set(ExternalData_OBJECT_STORES ${CMAKE_BINARY_DIR}/ExternalData/Objects)
185  endif()
186  set(config ${CMAKE_CURRENT_BINARY_DIR}/${target}_config.cmake)
187  configure_file(${_ExternalData_SELF_DIR}/ExternalData_config.cmake.in ${config} @ONLY)
188
189  set(files "")
190
191  # Set "_ExternalData_FILE_${file}" for each output file to avoid duplicate
192  # rules.  Use local data first to prefer real files over content links.
193
194  # Custom commands to copy or link local data.
195  get_property(data_local GLOBAL PROPERTY _ExternalData_${target}_LOCAL)
196  foreach(entry IN LISTS data_local)
197    string(REPLACE "|" ";" tuple "${entry}")
198    list(GET tuple 0 file)
199    list(GET tuple 1 name)
200    if(NOT DEFINED "_ExternalData_FILE_${file}")
201      set("_ExternalData_FILE_${file}" 1)
202      add_custom_command(
203        COMMENT "Generating ${file}"
204        OUTPUT "${file}"
205        COMMAND ${CMAKE_COMMAND} -Drelative_top=${CMAKE_BINARY_DIR}
206                                 -Dfile=${file} -Dname=${name}
207                                 -DExternalData_ACTION=local
208                                 -DExternalData_CONFIG=${config}
209                                 -P ${_ExternalData_SELF}
210        MAIN_DEPENDENCY "${name}"
211        )
212      list(APPEND files "${file}")
213    endif()
214  endforeach()
215
216  # Custom commands to fetch remote data.
217  get_property(data_fetch GLOBAL PROPERTY _ExternalData_${target}_FETCH)
218  foreach(entry IN LISTS data_fetch)
219    string(REPLACE "|" ";" tuple "${entry}")
220    list(GET tuple 0 file)
221    list(GET tuple 1 name)
222    list(GET tuple 2 ext)
223    set(stamp "${ext}-stamp")
224    if(NOT DEFINED "_ExternalData_FILE_${file}")
225      set("_ExternalData_FILE_${file}" 1)
226      add_custom_command(
227        # Users care about the data file, so hide the hash/timestamp file.
228        COMMENT "Generating ${file}"
229        # The hash/timestamp file is the output from the build perspective.
230        # List the real file as a second output in case it is a broken link.
231        # The files must be listed in this order so CMake can hide from the
232        # make tool that a symlink target may not be newer than the input.
233        OUTPUT "${file}${stamp}" "${file}"
234        # Run the data fetch/update script.
235        COMMAND ${CMAKE_COMMAND} -Drelative_top=${CMAKE_BINARY_DIR}
236                                 -Dfile=${file} -Dname=${name} -Dext=${ext}
237                                 -DExternalData_ACTION=fetch
238                                 -DExternalData_CONFIG=${config}
239                                 -P ${_ExternalData_SELF}
240        # Update whenever the object hash changes.
241        MAIN_DEPENDENCY "${name}${ext}"
242        )
243      list(APPEND files "${file}${stamp}")
244    endif()
245  endforeach()
246
247  # Custom target to drive all update commands.
248  add_custom_target(${target} ALL DEPENDS ${files})
249endfunction()
250
251function(ExternalData_expand_arguments target outArgsVar)
252  # Replace DATA{} references with real arguments.
253  set(data_regex "DATA{([^;{}\r\n]*)}")
254  set(other_regex "([^D]|D[^A]|DA[^T]|DAT[^A]|DATA[^{])+|.")
255  set(outArgs "")
256  # This list expansion un-escapes semicolons in list element values so we
257  # must re-escape them below anywhere a new list expansion will occur.
258  foreach(arg IN LISTS ARGN)
259    if("x${arg}" MATCHES "${data_regex}")
260      # Re-escape in-value semicolons before expansion in foreach below.
261      string(REPLACE ";" "\\;" tmp "${arg}")
262      # Split argument into DATA{}-pieces and other pieces.
263      string(REGEX MATCHALL "${data_regex}|${other_regex}" pieces "${tmp}")
264      # Compose output argument with DATA{}-pieces replaced.
265      set(outArg "")
266      foreach(piece IN LISTS pieces)
267        if("x${piece}" MATCHES "^x${data_regex}$")
268          # Replace this DATA{}-piece with a file path.
269          string(REGEX REPLACE "${data_regex}" "\\1" data "${piece}")
270          _ExternalData_arg("${target}" "${piece}" "${data}" file)
271          set(outArg "${outArg}${file}")
272        else()
273          # No replacement needed for this piece.
274          set(outArg "${outArg}${piece}")
275        endif()
276      endforeach()
277    else()
278      # No replacements needed in this argument.
279      set(outArg "${arg}")
280    endif()
281    # Re-escape in-value semicolons in resulting list.
282    string(REPLACE ";" "\\;" outArg "${outArg}")
283    list(APPEND outArgs "${outArg}")
284  endforeach()
285  set("${outArgsVar}" "${outArgs}" PARENT_SCOPE)
286endfunction()
287
288#-----------------------------------------------------------------------------
289# Private helper interface
290
291set(_ExternalData_REGEX_ALGO "MD5")
292set(_ExternalData_REGEX_EXT "md5")
293set(_ExternalData_SELF "${CMAKE_CURRENT_LIST_FILE}")
294get_filename_component(_ExternalData_SELF_DIR "${_ExternalData_SELF}" PATH)
295
296function(_ExternalData_compute_hash var_hash algo file)
297  if("${algo}" MATCHES "^${_ExternalData_REGEX_ALGO}$")
298    # TODO: Require CMake 2.8.7 to support other hashes with file(${algo} ...)
299    execute_process(COMMAND "${CMAKE_COMMAND}" -E md5sum "${file}"
300      OUTPUT_VARIABLE output)
301    string(SUBSTRING "${output}" 0 32 hash)
302    set("${var_hash}" "${hash}" PARENT_SCOPE)
303  else()
304    message(FATAL_ERROR "Hash algorithm ${algo} unimplemented.")
305  endif()
306endfunction()
307
308function(_ExternalData_random var)
309  string(RANDOM LENGTH 6 random)
310  set("${var}" "${random}" PARENT_SCOPE)
311endfunction()
312
313function(_ExternalData_exact_regex regex_var string)
314  string(REGEX REPLACE "([][+.*()^])" "\\\\\\1" regex "${string}")
315  set("${regex_var}" "${regex}" PARENT_SCOPE)
316endfunction()
317
318function(_ExternalData_atomic_write file content)
319  _ExternalData_random(random)
320  set(tmp "${file}.tmp${random}")
321  file(WRITE "${tmp}" "${content}")
322  file(RENAME "${tmp}" "${file}")
323endfunction()
324
325function(_ExternalData_link_content name var_ext)
326  if("${ExternalData_LINK_CONTENT}" MATCHES "^(${_ExternalData_REGEX_ALGO})$")
327    set(algo "${ExternalData_LINK_CONTENT}")
328  else()
329    message(FATAL_ERROR
330      "Unknown hash algorithm specified by ExternalData_LINK_CONTENT:\n"
331      "  ${ExternalData_LINK_CONTENT}")
332  endif()
333  _ExternalData_compute_hash(hash "${algo}" "${name}")
334  get_filename_component(dir "${name}" PATH)
335  set(staged "${dir}/.ExternalData_${algo}_${hash}")
336  string(TOLOWER ".${algo}" ext)
337  _ExternalData_atomic_write("${name}${ext}" "${hash}\n")
338  file(RENAME "${name}" "${staged}")
339  set("${var_ext}" "${ext}" PARENT_SCOPE)
340
341  file(RELATIVE_PATH relname "${ExternalData_SOURCE_ROOT}" "${name}${ext}")
342  message(STATUS "Linked ${relname} to ExternalData ${algo}/${hash}")
343endfunction()
344
345function(_ExternalData_arg target arg options var_file)
346  # Separate data path from the options.
347  string(REPLACE "," ";" options "${options}")
348  list(GET options 0 data)
349  list(REMOVE_AT options 0)
350
351  # Interpret trailing slashes as directories.
352  set(data_is_directory 0)
353  if("x${data}" MATCHES "^x(.*)([/\\])$")
354    set(data_is_directory 1)
355    set(data "${CMAKE_MATCH_1}")
356  endif()
357
358  # Convert to full path.
359  if(IS_ABSOLUTE "${data}")
360    set(absdata "${data}")
361  else()
362    set(absdata "${CMAKE_CURRENT_SOURCE_DIR}/${data}")
363  endif()
364  get_filename_component(absdata "${absdata}" ABSOLUTE)
365
366  # Convert to relative path under the source tree.
367  if(NOT ExternalData_SOURCE_ROOT)
368    set(ExternalData_SOURCE_ROOT "${CMAKE_SOURCE_DIR}")
369  endif()
370  set(top_src "${ExternalData_SOURCE_ROOT}")
371  file(RELATIVE_PATH reldata "${top_src}" "${absdata}")
372  if(IS_ABSOLUTE "${reldata}" OR "${reldata}" MATCHES "^\\.\\./")
373    message(FATAL_ERROR "Data file referenced by argument\n"
374      "  ${arg}\n"
375      "does not lie under the top-level source directory\n"
376      "  ${top_src}\n")
377  endif()
378  if(data_is_directory AND NOT IS_DIRECTORY "${top_src}/${reldata}")
379    message(FATAL_ERROR "Data directory referenced by argument\n"
380      "  ${arg}\n"
381      "corresponds to source tree path\n"
382      "  ${reldata}\n"
383      "that does not exist as a directory!")
384  endif()
385  if(NOT ExternalData_BINARY_ROOT)
386    set(ExternalData_BINARY_ROOT "${CMAKE_BINARY_DIR}")
387  endif()
388  set(top_bin "${ExternalData_BINARY_ROOT}")
389
390  # Handle in-source builds gracefully.
391  if("${top_src}" STREQUAL "${top_bin}")
392    if(ExternalData_LINK_CONTENT)
393      message(WARNING "ExternalData_LINK_CONTENT cannot be used in-source")
394      set(ExternalData_LINK_CONTENT 0)
395    endif()
396    set(top_same 1)
397  endif()
398
399  set(external "") # Entries external to the source tree.
400  set(internal "") # Entries internal to the source tree.
401  set(have_original ${data_is_directory})
402  set(have_original_as_dir 0)
403
404  # Process options.
405  set(series_option "")
406  set(associated_files "")
407  set(associated_regex "")
408  foreach(opt ${options})
409    if("x${opt}" MATCHES "^xREGEX:[^:/]+$")
410      # Regular expression to match associated files.
411      string(REGEX REPLACE "^REGEX:" "" regex "${opt}")
412      list(APPEND associated_regex "${regex}")
413    elseif("x${opt}" MATCHES "^x:$")
414      # Activate series matching.
415      set(series_option "${opt}")
416    elseif("x${opt}" MATCHES "^[^][:/*?]+$")
417      # Specific associated file.
418      list(APPEND associated_files "${opt}")
419    else()
420      message(FATAL_ERROR "Unknown option \"${opt}\" in argument\n"
421        "  ${arg}\n")
422    endif()
423  endforeach()
424
425  if(series_option)
426    if(data_is_directory)
427      message(FATAL_ERROR "Series option \"${series_option}\" not allowed with directories.")
428    endif()
429    if(associated_files OR associated_regex)
430      message(FATAL_ERROR "Series option \"${series_option}\" not allowed with associated files.")
431    endif()
432    # Load a whole file series.
433    _ExternalData_arg_series()
434  elseif(data_is_directory)
435    if(associated_files OR associated_regex)
436      # Load listed/matching associated files in the directory.
437      _ExternalData_arg_associated()
438    else()
439      message(FATAL_ERROR "Data directory referenced by argument\n"
440        "  ${arg}\n"
441        "must list associated files.")
442    endif()
443  else()
444    # Load the named data file.
445    _ExternalData_arg_single()
446    if(associated_files OR associated_regex)
447      # Load listed/matching associated files.
448      _ExternalData_arg_associated()
449    endif()
450  endif()
451
452  if(NOT have_original)
453    if(have_original_as_dir)
454      set(msg_kind FATAL_ERROR)
455      set(msg "that is directory instead of a file!")
456    else()
457      set(msg_kind AUTHOR_WARNING)
458      set(msg "that does not exist as a file (with or without an extension)!")
459    endif()
460    message(${msg_kind} "Data file referenced by argument\n"
461      "  ${arg}\n"
462      "corresponds to source tree path\n"
463      "  ${reldata}\n"
464      "${msg}")
465  endif()
466
467  if(external)
468    # Make the series available in the build tree.
469    set_property(GLOBAL APPEND PROPERTY
470      _ExternalData_${target}_FETCH "${external}")
471    set_property(GLOBAL APPEND PROPERTY
472      _ExternalData_${target}_LOCAL "${internal}")
473    set("${var_file}" "${top_bin}/${reldata}" PARENT_SCOPE)
474  else()
475    # The whole series is in the source tree.
476    set("${var_file}" "${top_src}/${reldata}" PARENT_SCOPE)
477  endif()
478endfunction()
479
480macro(_ExternalData_arg_associated)
481  # Associated files lie in the same directory.
482  if(data_is_directory)
483    set(reldir "${reldata}")
484  else()
485    get_filename_component(reldir "${reldata}" PATH)
486  endif()
487  if(reldir)
488    set(reldir "${reldir}/")
489  endif()
490  _ExternalData_exact_regex(reldir_regex "${reldir}")
491
492  # Find files named explicitly.
493  foreach(file ${associated_files})
494    _ExternalData_exact_regex(file_regex "${file}")
495    _ExternalData_arg_find_files("${reldir}${file}" "${reldir_regex}${file_regex}")
496  endforeach()
497
498  # Find files matching the given regular expressions.
499  set(all "")
500  set(sep "")
501  foreach(regex ${associated_regex})
502    set(all "${all}${sep}${reldir_regex}${regex}")
503    set(sep "|")
504  endforeach()
505  _ExternalData_arg_find_files("${reldir}" "${all}")
506endmacro()
507
508macro(_ExternalData_arg_single)
509  # Match only the named data by itself.
510  _ExternalData_exact_regex(data_regex "${reldata}")
511  _ExternalData_arg_find_files("${reldata}" "${data_regex}")
512endmacro()
513
514macro(_ExternalData_arg_series)
515  # Configure series parsing and matching.
516  set(series_parse_prefix "")
517  set(series_parse_number "\\1")
518  set(series_parse_suffix "\\2")
519  if(ExternalData_SERIES_PARSE)
520    if(ExternalData_SERIES_PARSE_NUMBER AND ExternalData_SERIES_PARSE_SUFFIX)
521      if(ExternalData_SERIES_PARSE_PREFIX)
522        set(series_parse_prefix "\\${ExternalData_SERIES_PARSE_PREFIX}")
523      endif()
524      set(series_parse_number "\\${ExternalData_SERIES_PARSE_NUMBER}")
525      set(series_parse_suffix "\\${ExternalData_SERIES_PARSE_SUFFIX}")
526    elseif(NOT "x${ExternalData_SERIES_PARSE}" MATCHES "^x\\([^()]*\\)\\([^()]*\\)\\$$")
527      message(FATAL_ERROR
528        "ExternalData_SERIES_PARSE is set to\n"
529        "  ${ExternalData_SERIES_PARSE}\n"
530        "which is not of the form\n"
531        "  (<number>)(<suffix>)$\n"
532        "Fix the regular expression or set variables\n"
533        "  ExternalData_SERIES_PARSE_PREFIX = <prefix> regex group number, if any\n"
534        "  ExternalData_SERIES_PARSE_NUMBER = <number> regex group number\n"
535        "  ExternalData_SERIES_PARSE_SUFFIX = <suffix> regex group number\n"
536        )
537    endif()
538    set(series_parse "${ExternalData_SERIES_PARSE}")
539  else()
540    set(series_parse "([0-9]*)(\\.[^./]*)$")
541  endif()
542  if(ExternalData_SERIES_MATCH)
543    set(series_match "${ExternalData_SERIES_MATCH}")
544  else()
545    set(series_match "[_.-]?[0-9]*")
546  endif()
547
548  # Parse the base, number, and extension components of the series.
549  string(REGEX REPLACE "${series_parse}" "${series_parse_prefix};${series_parse_number};${series_parse_suffix}" tuple "${reldata}")
550  list(LENGTH tuple len)
551  if(NOT "${len}" EQUAL 3)
552    message(FATAL_ERROR "Data file referenced by argument\n"
553      "  ${arg}\n"
554      "corresponds to path\n"
555      "  ${reldata}\n"
556      "that does not match regular expression\n"
557      "  ${series_parse}")
558  endif()
559  list(GET tuple 0 relbase)
560  list(GET tuple 2 ext)
561
562  # Glob files that might match the series.
563  # Then match base, number, and extension.
564  _ExternalData_exact_regex(series_base "${relbase}")
565  _ExternalData_exact_regex(series_ext "${ext}")
566  _ExternalData_arg_find_files("${relbase}*${ext}"
567    "${series_base}${series_match}${series_ext}")
568endmacro()
569
570function(_ExternalData_arg_find_files pattern regex)
571  file(GLOB globbed RELATIVE "${top_src}" "${top_src}/${pattern}*")
572  foreach(entry IN LISTS globbed)
573    if("x${entry}" MATCHES "^x(.*)(\\.(${_ExternalData_REGEX_EXT}))$")
574      set(relname "${CMAKE_MATCH_1}")
575      set(alg "${CMAKE_MATCH_2}")
576    else()
577      set(relname "${entry}")
578      set(alg "")
579    endif()
580    if("x${relname}" MATCHES "^x${regex}$" # matches
581        AND NOT "x${relname}" MATCHES "(^x|/)\\.ExternalData_" # not staged obj
582        )
583      if(IS_DIRECTORY "${top_src}/${entry}")
584        if("${relname}" STREQUAL "${reldata}")
585          set(have_original_as_dir 1)
586        endif()
587      else()
588        set(name "${top_src}/${relname}")
589        set(file "${top_bin}/${relname}")
590        if(alg)
591          list(APPEND external "${file}|${name}|${alg}")
592        elseif(ExternalData_LINK_CONTENT)
593          _ExternalData_link_content("${name}" alg)
594          list(APPEND external "${file}|${name}|${alg}")
595        elseif(NOT top_same)
596          list(APPEND internal "${file}|${name}")
597        endif()
598        if("${relname}" STREQUAL "${reldata}")
599          set(have_original 1)
600        endif()
601      endif()
602    endif()
603  endforeach()
604  set(external "${external}" PARENT_SCOPE)
605  set(internal "${internal}" PARENT_SCOPE)
606  set(have_original "${have_original}" PARENT_SCOPE)
607  set(have_original_as_dir "${have_original_as_dir}" PARENT_SCOPE)
608endfunction()
609
610#-----------------------------------------------------------------------------
611# Private script mode interface
612
613if(CMAKE_GENERATOR OR NOT ExternalData_ACTION)
614  return()
615endif()
616
617if(ExternalData_CONFIG)
618  include(${ExternalData_CONFIG})
619endif()
620if(NOT ExternalData_URL_TEMPLATES)
621  message(FATAL_ERROR "No ExternalData_URL_TEMPLATES set!")
622endif()
623
624function(_ExternalData_link_or_copy src dst)
625  # Create a temporary file first.
626  get_filename_component(dst_dir "${dst}" PATH)
627  file(MAKE_DIRECTORY "${dst_dir}")
628  _ExternalData_random(random)
629  set(tmp "${dst}.tmp${random}")
630  if(UNIX)
631    # Create a symbolic link.
632    set(tgt "${src}")
633    if(relative_top)
634      # Use relative path if files are close enough.
635      file(RELATIVE_PATH relsrc "${relative_top}" "${src}")
636      file(RELATIVE_PATH relfile "${relative_top}" "${dst}")
637      if(NOT IS_ABSOLUTE "${relsrc}" AND NOT "${relsrc}" MATCHES "^\\.\\./" AND
638          NOT IS_ABSOLUTE "${reldst}" AND NOT "${reldst}" MATCHES "^\\.\\./")
639        file(RELATIVE_PATH tgt "${dst_dir}" "${src}")
640      endif()
641    endif()
642    execute_process(COMMAND "${CMAKE_COMMAND}" -E create_symlink "${tgt}" "${tmp}" RESULT_VARIABLE result)
643  else()
644    # Create a copy.
645    execute_process(COMMAND "${CMAKE_COMMAND}" -E copy "${src}" "${tmp}" RESULT_VARIABLE result)
646  endif()
647  if(result)
648    file(REMOVE "${tmp}")
649    message(FATAL_ERROR "Failed to create\n  ${tmp}\nfrom\n  ${obj}")
650  endif()
651
652  # Atomically create/replace the real destination.
653  file(RENAME "${tmp}" "${dst}")
654endfunction()
655
656function(_ExternalData_download_file url file err_var msg_var)
657  set(retry 3)
658  while(retry)
659    math(EXPR retry "${retry} - 1")
660    if(ExternalData_TIMEOUT_INACTIVITY)
661      set(inactivity_timeout INACTIVITY_TIMEOUT ${ExternalData_TIMEOUT_INACTIVITY})
662    elseif(NOT "${ExternalData_TIMEOUT_INACTIVITY}" EQUAL 0)
663      set(inactivity_timeout INACTIVITY_TIMEOUT 60)
664    else()
665      set(inactivity_timeout "")
666    endif()
667    if(ExternalData_TIMEOUT_ABSOLUTE)
668      set(absolute_timeout TIMEOUT ${ExternalData_TIMEOUT_ABSOLUTE})
669    elseif(NOT "${ExternalData_TIMEOUT_ABSOLUTE}" EQUAL 0)
670      set(absolute_timeout TIMEOUT 300)
671    else()
672      set(absolute_timeout "")
673    endif()
674    file(DOWNLOAD "${url}" "${file}" STATUS status LOG log ${inactivity_timeout} ${absolute_timeout} SHOW_PROGRESS)
675    list(GET status 0 err)
676    list(GET status 1 msg)
677    if(err)
678      if("${msg}" MATCHES "HTTP response code said error" AND
679          "${log}" MATCHES "error: 503")
680        set(msg "temporarily unavailable")
681      endif()
682    elseif("${log}" MATCHES "\nHTTP[^\n]* 503")
683      set(err TRUE)
684      set(msg "temporarily unavailable")
685    endif()
686    if(NOT err OR NOT "${msg}" MATCHES "partial|timeout|temporarily")
687      break()
688    elseif(retry)
689      message(STATUS "[download terminated: ${msg}, retries left: ${retry}]")
690    endif()
691  endwhile()
692  set("${err_var}" "${err}" PARENT_SCOPE)
693  set("${msg_var}" "${msg}" PARENT_SCOPE)
694endfunction()
695
696function(_ExternalData_download_object name hash algo var_obj)
697  # Search all object stores for an existing object.
698  foreach(dir ${ExternalData_OBJECT_STORES})
699    set(obj "${dir}/${algo}/${hash}")
700    if(EXISTS "${obj}")
701      message(STATUS "Found object: \"${obj}\"")
702      set("${var_obj}" "${obj}" PARENT_SCOPE)
703      return()
704    endif()
705  endforeach()
706
707  # Download object to the first store.
708  list(GET ExternalData_OBJECT_STORES 0 store)
709  set(obj "${store}/${algo}/${hash}")
710
711  _ExternalData_random(random)
712  set(tmp "${obj}.tmp${random}")
713  set(found 0)
714  set(tried "")
715  foreach(url_template IN LISTS ExternalData_URL_TEMPLATES)
716    string(REPLACE "%(hash)" "${hash}" url_tmp "${url_template}")
717    string(REPLACE "%(algo)" "${algo}" url "${url_tmp}")
718    message(STATUS "Fetching \"${url}\"")
719    _ExternalData_download_file("${url}" "${tmp}" err errMsg)
720    set(tried "${tried}\n  ${url}")
721    if(err)
722      set(tried "${tried} (${errMsg})")
723    else()
724      # Verify downloaded object.
725      _ExternalData_compute_hash(dl_hash "${algo}" "${tmp}")
726      if("${dl_hash}" STREQUAL "${hash}")
727        set(found 1)
728        break()
729      else()
730        set(tried "${tried} (wrong hash ${algo}=${dl_hash})")
731        if("$ENV{ExternalData_DEBUG_DOWNLOAD}" MATCHES ".")
732          file(RENAME "${tmp}" "${store}/${algo}/${dl_hash}")
733        endif()
734      endif()
735    endif()
736    file(REMOVE "${tmp}")
737  endforeach()
738
739  get_filename_component(dir "${name}" PATH)
740  set(staged "${dir}/.ExternalData_${algo}_${hash}")
741
742  if(found)
743    file(RENAME "${tmp}" "${obj}")
744    message(STATUS "Downloaded object: \"${obj}\"")
745  elseif(EXISTS "${staged}")
746    set(obj "${staged}")
747    message(STATUS "Staged object: \"${obj}\"")
748  else()
749    message(FATAL_ERROR "Object ${algo}=${hash} not found at:${tried}")
750  endif()
751
752  set("${var_obj}" "${obj}" PARENT_SCOPE)
753endfunction()
754
755if("${ExternalData_ACTION}" STREQUAL "fetch")
756  foreach(v ExternalData_OBJECT_STORES file name ext)
757    if(NOT DEFINED "${v}")
758      message(FATAL_ERROR "No \"-D${v}=\" value provided!")
759    endif()
760  endforeach()
761
762  file(READ "${name}${ext}" hash)
763  string(STRIP "${hash}" hash)
764
765  if("${ext}" MATCHES "^\\.(${_ExternalData_REGEX_EXT})$")
766    string(TOUPPER "${CMAKE_MATCH_1}" algo)
767  else()
768    message(FATAL_ERROR "Unknown hash algorithm extension \"${ext}\"")
769  endif()
770
771  _ExternalData_download_object("${name}" "${hash}" "${algo}" obj)
772
773  # Check if file already corresponds to the object.
774  set(stamp "${ext}-stamp")
775  set(file_up_to_date 0)
776  if(EXISTS "${file}" AND EXISTS "${file}${stamp}")
777    file(READ "${file}${stamp}" f_hash)
778    string(STRIP "${f_hash}" f_hash)
779    if("${f_hash}" STREQUAL "${hash}")
780      #message(STATUS "File already corresponds to object")
781      set(file_up_to_date 1)
782    endif()
783  endif()
784
785  if(file_up_to_date)
786    # Touch the file to convince the build system it is up to date.
787    execute_process(COMMAND "${CMAKE_COMMAND}" -E touch "${file}")
788  else()
789    _ExternalData_link_or_copy("${obj}" "${file}")
790  endif()
791
792  # Atomically update the hash/timestamp file to record the object referenced.
793  _ExternalData_atomic_write("${file}${stamp}" "${hash}\n")
794elseif("${ExternalData_ACTION}" STREQUAL "local")
795  foreach(v file name)
796    if(NOT DEFINED "${v}")
797      message(FATAL_ERROR "No \"-D${v}=\" value provided!")
798    endif()
799  endforeach()
800  _ExternalData_link_or_copy("${name}" "${file}")
801else()
802  message(FATAL_ERROR "Unknown ExternalData_ACTION=[${ExternalData_ACTION}]")
803endif()
804