1""" 2Downloading NeuroImaging datasets: functional datasets (task + resting-state) 3""" 4import fnmatch 5import glob 6import warnings 7import os 8import re 9import json 10 11import nibabel as nib 12import numpy as np 13import numbers 14 15from io import BytesIO 16 17import nibabel 18import pandas as pd 19from scipy.io import loadmat 20from scipy.io.matlab.miobase import MatReadError 21from sklearn.utils import Bunch, deprecated 22 23from .utils import (_get_dataset_dir, _fetch_files, _get_dataset_descr, 24 _read_md5_sum_file, _tree, _filter_columns, _fetch_file, _uncompress_file) 25from .._utils import check_niimg, fill_doc 26from .._utils.numpy_conversions import csv_to_array 27from nilearn.image import get_data 28 29 30@fill_doc 31def fetch_haxby(data_dir=None, subjects=(2,), 32 fetch_stimuli=False, url=None, resume=True, verbose=1): 33 """Download and loads complete haxby dataset. 34 35 See :footcite:`Haxby2425`. 36 37 Parameters 38 ---------- 39 %(data_dir)s 40 subjects : list or int, optional 41 Either a list of subjects or the number of subjects to load, from 1 to 42 6. By default, 2nd subject will be loaded. Empty list returns no subject 43 data. Default=(2,). 44 45 fetch_stimuli : boolean, optional 46 Indicate if stimuli images must be downloaded. They will be presented 47 as a dictionary of categories. Default=False. 48 %(url)s 49 %(resume)s 50 %(verbose)s 51 52 Returns 53 ------- 54 data : sklearn.datasets.base.Bunch 55 Dictionary-like object, the interest attributes are : 56 57 - 'anat': string list. Paths to anatomic images. 58 - 'func': string list. Paths to nifti file with bold data. 59 - 'session_target': string list. Paths to text file containing session and target data. 60 - 'mask': string. Path to fullbrain mask file. 61 - 'mask_vt': string list. Paths to nifti ventral temporal mask file. 62 - 'mask_face': string list. Paths to nifti ventral temporal mask file. 63 - 'mask_house': string list. Paths to nifti ventral temporal mask file. 64 - 'mask_face_little': string list. Paths to nifti ventral temporal mask file. 65 - 'mask_house_little': string list. Paths to nifti ventral temporal mask file. 66 67 References 68 ---------- 69 .. footbibliography:: 70 71 Notes 72 ----- 73 PyMVPA provides a tutorial making use of this dataset: 74 http://www.pymvpa.org/tutorial.html 75 76 More information about its structure: 77 http://dev.pymvpa.org/datadb/haxby2001.html 78 79 See `additional information 80 <http://www.sciencemag.org/content/293/5539/2425>` 81 82 Run 8 in subject 5 does not contain any task labels. 83 The anatomical image for subject 6 is unavailable. 84 85 """ 86 if isinstance(subjects, numbers.Number) and subjects > 6: 87 subjects = 6 88 89 if subjects is not None and (isinstance(subjects, list) or 90 isinstance(subjects, tuple)): 91 for sub_id in subjects: 92 if sub_id not in [1, 2, 3, 4, 5, 6]: 93 raise ValueError("You provided invalid subject id {0} in a " 94 "list. Subjects must be selected in " 95 "[1, 2, 3, 4, 5, 6]".format(sub_id)) 96 97 dataset_name = 'haxby2001' 98 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 99 verbose=verbose) 100 101 # Get the mask 102 url_mask = 'https://www.nitrc.org/frs/download.php/7868/mask.nii.gz' 103 mask = _fetch_files(data_dir, [('mask.nii.gz', url_mask, {})], 104 verbose=verbose)[0] 105 106 # Dataset files 107 if url is None: 108 url = 'http://data.pymvpa.org/datasets/haxby2001/' 109 md5sums = _fetch_files(data_dir, [('MD5SUMS', url + 'MD5SUMS', {})], 110 verbose=verbose)[0] 111 md5sums = _read_md5_sum_file(md5sums) 112 113 # definition of dataset files 114 sub_files = ['bold.nii.gz', 'labels.txt', 115 'mask4_vt.nii.gz', 'mask8b_face_vt.nii.gz', 116 'mask8b_house_vt.nii.gz', 'mask8_face_vt.nii.gz', 117 'mask8_house_vt.nii.gz', 'anat.nii.gz'] 118 n_files = len(sub_files) 119 120 if subjects is None: 121 subjects = [] 122 123 if isinstance(subjects, numbers.Number): 124 subject_mask = np.arange(1, subjects + 1) 125 else: 126 subject_mask = np.array(subjects) 127 128 files = [ 129 (os.path.join('subj%d' % i, sub_file), 130 url + 'subj%d-2010.01.14.tar.gz' % i, 131 {'uncompress': True, 132 'md5sum': md5sums.get('subj%d-2010.01.14.tar.gz' % i, None)}) 133 for i in subject_mask 134 for sub_file in sub_files 135 if not (sub_file == 'anat.nii.gz' and i == 6) # no anat for sub. 6 136 ] 137 138 files = _fetch_files(data_dir, files, resume=resume, verbose=verbose) 139 140 if ((isinstance(subjects, numbers.Number) and subjects == 6) or 141 np.any(subject_mask == 6)): 142 files.append(None) # None value because subject 6 has no anat 143 144 kwargs = {} 145 if fetch_stimuli: 146 stimuli_files = [(os.path.join('stimuli', 'README'), 147 url + 'stimuli-2010.01.14.tar.gz', 148 {'uncompress': True})] 149 readme = _fetch_files(data_dir, stimuli_files, resume=resume, 150 verbose=verbose)[0] 151 kwargs['stimuli'] = _tree(os.path.dirname(readme), pattern='*.jpg', 152 dictionary=True) 153 154 fdescr = _get_dataset_descr(dataset_name) 155 156 # return the data 157 return Bunch( 158 anat=files[7::n_files], 159 func=files[0::n_files], 160 session_target=files[1::n_files], 161 mask_vt=files[2::n_files], 162 mask_face=files[3::n_files], 163 mask_house=files[4::n_files], 164 mask_face_little=files[5::n_files], 165 mask_house_little=files[6::n_files], 166 mask=mask, 167 description=fdescr, 168 **kwargs) 169 170 171@fill_doc 172def fetch_adhd(n_subjects=30, data_dir=None, url=None, resume=True, 173 verbose=1): 174 """Download and load the ADHD resting-state dataset. 175 176 See :footcite:`ADHDdataset`. 177 178 Parameters 179 ---------- 180 n_subjects : int, optional 181 The number of subjects to load from maximum of 40 subjects. 182 By default, 30 subjects will be loaded. If None is given, 183 all 40 subjects will be loaded. Default=30. 184 %(data_dir)s 185 %(url)s 186 %(resume)s 187 %(verbose)s 188 189 Returns 190 ------- 191 data : sklearn.datasets.base.Bunch 192 Dictionary-like object, the interest attributes are : 193 194 - 'func': Paths to functional resting-state images 195 - 'phenotypic': Explanations of preprocessing steps 196 - 'confounds': CSV files containing the nuisance variables 197 198 References 199 ---------- 200 .. footbibliography:: 201 202 """ 203 if url is None: 204 url = 'https://www.nitrc.org/frs/download.php/' 205 206 # Preliminary checks and declarations 207 dataset_name = 'adhd' 208 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 209 verbose=verbose) 210 ids = ['0010042', '0010064', '0010128', '0021019', '0023008', '0023012', 211 '0027011', '0027018', '0027034', '0027037', '1019436', '1206380', 212 '1418396', '1517058', '1552181', '1562298', '1679142', '2014113', 213 '2497695', '2950754', '3007585', '3154996', '3205761', '3520880', 214 '3624598', '3699991', '3884955', '3902469', '3994098', '4016887', 215 '4046678', '4134561', '4164316', '4275075', '6115230', '7774305', 216 '8409791', '8697774', '9744150', '9750701'] 217 nitrc_ids = range(7782, 7822) 218 max_subjects = len(ids) 219 if n_subjects is None: 220 n_subjects = max_subjects 221 if n_subjects > max_subjects: 222 warnings.warn('Warning: there are only %d subjects' % max_subjects) 223 n_subjects = max_subjects 224 ids = ids[:n_subjects] 225 nitrc_ids = nitrc_ids[:n_subjects] 226 227 opts = dict(uncompress=True) 228 229 # Dataset description 230 fdescr = _get_dataset_descr(dataset_name) 231 232 # First, get the metadata 233 phenotypic = ('ADHD200_40subs_motion_parameters_and_phenotypics.csv', 234 url + '7781/adhd40_metadata.tgz', opts) 235 236 phenotypic = _fetch_files(data_dir, [phenotypic], resume=resume, 237 verbose=verbose)[0] 238 239 # Load the csv file 240 phenotypic = np.genfromtxt(phenotypic, names=True, delimiter=',', 241 dtype=None) 242 243 # Keep phenotypic information for selected subjects 244 int_ids = np.asarray(ids, dtype=int) 245 phenotypic = phenotypic[[np.where(phenotypic['Subject'] == i)[0][0] 246 for i in int_ids]] 247 248 # Download dataset files 249 250 archives = [url + '%i/adhd40_%s.tgz' % (ni, ii) 251 for ni, ii in zip(nitrc_ids, ids)] 252 functionals = ['data/%s/%s_rest_tshift_RPI_voreg_mni.nii.gz' % (i, i) 253 for i in ids] 254 confounds = ['data/%s/%s_regressors.csv' % (i, i) for i in ids] 255 256 functionals = _fetch_files( 257 data_dir, zip(functionals, archives, (opts,) * n_subjects), 258 resume=resume, verbose=verbose) 259 260 confounds = _fetch_files( 261 data_dir, zip(confounds, archives, (opts,) * n_subjects), 262 resume=resume, verbose=verbose) 263 264 return Bunch(func=functionals, confounds=confounds, 265 phenotypic=phenotypic, description=fdescr) 266 267 268@fill_doc 269def fetch_miyawaki2008(data_dir=None, url=None, resume=True, verbose=1): 270 """Download and loads Miyawaki et al. 2008 dataset (153MB). 271 272 See :footcite:`MIYAWAKI2008915`. 273 274 Parameters 275 ---------- 276 %(data_dir)s 277 %(url)s 278 %(resume)s 279 %(verbose)s 280 281 Returns 282 ------- 283 data : Bunch 284 Dictionary-like object, the interest attributes are : 285 286 - 'func': string list 287 Paths to nifti file with bold data 288 - 'label': string list 289 Paths to text file containing session and target data 290 - 'mask': string 291 Path to nifti mask file to define target volume in visual 292 cortex 293 - 'background': string 294 Path to nifti file containing a background image usable as a 295 background image for miyawaki images. 296 297 References 298 ---------- 299 .. footbibliography:: 300 301 Notes 302 ----- 303 This dataset is available on the `brainliner website 304 <http://brainliner.jp/data/brainliner-admin/Reconstruct>`_ 305 306 See `additional information 307 <http://www.cns.atr.jp/dni/en/downloads/ 308 fmri-data-set-for-visual-image-reconstruction/>`_ 309 310 """ 311 url = 'https://www.nitrc.org/frs/download.php' \ 312 '/8486/miyawaki2008.tgz?i_agree=1&download_now=1' 313 opts = {'uncompress': True} 314 315 # Dataset files 316 317 # Functional MRI: 318 # * 20 random scans (usually used for training) 319 # * 12 figure scans (usually used for testing) 320 321 func_figure = [(os.path.join('func', 'data_figure_run%02d.nii.gz' % i), 322 url, opts) for i in range(1, 13)] 323 324 func_random = [(os.path.join('func', 'data_random_run%02d.nii.gz' % i), 325 url, opts) for i in range(1, 21)] 326 327 # Labels, 10x10 patches, stimuli shown to the subject: 328 # * 20 random labels 329 # * 12 figure labels (letters and shapes) 330 331 label_filename = 'data_%s_run%02d_label.csv' 332 label_figure = [(os.path.join('label', label_filename % ('figure', i)), 333 url, opts) for i in range(1, 13)] 334 335 label_random = [(os.path.join('label', label_filename % ('random', i)), 336 url, opts) for i in range(1, 21)] 337 338 # Masks 339 340 file_mask = [ 341 'mask.nii.gz', 342 'LHlag0to1.nii.gz', 343 'LHlag10to11.nii.gz', 344 'LHlag1to2.nii.gz', 345 'LHlag2to3.nii.gz', 346 'LHlag3to4.nii.gz', 347 'LHlag4to5.nii.gz', 348 'LHlag5to6.nii.gz', 349 'LHlag6to7.nii.gz', 350 'LHlag7to8.nii.gz', 351 'LHlag8to9.nii.gz', 352 'LHlag9to10.nii.gz', 353 'LHV1d.nii.gz', 354 'LHV1v.nii.gz', 355 'LHV2d.nii.gz', 356 'LHV2v.nii.gz', 357 'LHV3A.nii.gz', 358 'LHV3.nii.gz', 359 'LHV4v.nii.gz', 360 'LHVP.nii.gz', 361 'RHlag0to1.nii.gz', 362 'RHlag10to11.nii.gz', 363 'RHlag1to2.nii.gz', 364 'RHlag2to3.nii.gz', 365 'RHlag3to4.nii.gz', 366 'RHlag4to5.nii.gz', 367 'RHlag5to6.nii.gz', 368 'RHlag6to7.nii.gz', 369 'RHlag7to8.nii.gz', 370 'RHlag8to9.nii.gz', 371 'RHlag9to10.nii.gz', 372 'RHV1d.nii.gz', 373 'RHV1v.nii.gz', 374 'RHV2d.nii.gz', 375 'RHV2v.nii.gz', 376 'RHV3A.nii.gz', 377 'RHV3.nii.gz', 378 'RHV4v.nii.gz', 379 'RHVP.nii.gz' 380 ] 381 382 file_mask = [(os.path.join('mask', m), url, opts) for m in file_mask] 383 384 file_names = func_figure + func_random + \ 385 label_figure + label_random + \ 386 file_mask 387 388 dataset_name = 'miyawaki2008' 389 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 390 verbose=verbose) 391 files = _fetch_files(data_dir, file_names, resume=resume, verbose=verbose) 392 393 # Fetch the background image 394 bg_img = _fetch_files(data_dir, [('bg.nii.gz', url, opts)], resume=resume, 395 verbose=verbose)[0] 396 397 fdescr = _get_dataset_descr(dataset_name) 398 399 # Return the data 400 return Bunch( 401 func=files[:32], 402 label=files[32:64], 403 mask=files[64], 404 mask_roi=files[65:], 405 background=bg_img, 406 description=fdescr) 407 408 409@fill_doc 410def fetch_localizer_contrasts(contrasts, n_subjects=None, get_tmaps=False, 411 get_masks=False, get_anats=False, 412 data_dir=None, url=None, resume=True, verbose=1): 413 """Download and load Brainomics/Localizer dataset (94 subjects). 414 415 "The Functional Localizer is a simple and fast acquisition 416 procedure based on a 5-minute functional magnetic resonance 417 imaging (fMRI) sequence that can be run as easily and as 418 systematically as an anatomical scan. This protocol captures the 419 cerebral bases of auditory and visual perception, motor actions, 420 reading, language comprehension and mental calculation at an 421 individual level. Individual functional maps are reliable and 422 quite precise. The procedure is described in more detail on the 423 Functional Localizer page." 424 (see https://osf.io/vhtf6/) 425 426 You may cite :footcite:`PAPADOPOULOSORFANOS2017309` 427 when using this dataset. 428 429 Scientific results obtained using this dataset are described 430 in :footcite:`Pinel2007fast`. 431 432 Parameters 433 ---------- 434 contrasts : list of str 435 The contrasts to be fetched (for all 94 subjects available). 436 Allowed values are:: 437 438 - "checkerboard" 439 - "horizontal checkerboard" 440 - "vertical checkerboard" 441 - "horizontal vs vertical checkerboard" 442 - "vertical vs horizontal checkerboard" 443 - "sentence listening" 444 - "sentence reading" 445 - "sentence listening and reading" 446 - "sentence reading vs checkerboard" 447 - "calculation (auditory cue)" 448 - "calculation (visual cue)" 449 - "calculation (auditory and visual cue)" 450 - "calculation (auditory cue) vs sentence listening" 451 - "calculation (visual cue) vs sentence reading" 452 - "calculation vs sentences" 453 - "calculation (auditory cue) and sentence listening" 454 - "calculation (visual cue) and sentence reading" 455 - "calculation and sentence listening/reading" 456 - "calculation (auditory cue) and sentence listening vs " 457 - "calculation (visual cue) and sentence reading" 458 - "calculation (visual cue) and sentence reading vs checkerboard" 459 - "calculation and sentence listening/reading vs button press" 460 - "left button press (auditory cue)" 461 - "left button press (visual cue)" 462 - "left button press" 463 - "left vs right button press" 464 - "right button press (auditory cue)" 465 - "right button press (visual cue)" 466 - "right button press" 467 - "right vs left button press" 468 - "button press (auditory cue) vs sentence listening" 469 - "button press (visual cue) vs sentence reading" 470 - "button press vs calculation and sentence listening/reading" 471 472 or equivalently on can use the original names:: 473 474 - "checkerboard" 475 - "horizontal checkerboard" 476 - "vertical checkerboard" 477 - "horizontal vs vertical checkerboard" 478 - "vertical vs horizontal checkerboard" 479 - "auditory sentences" 480 - "visual sentences" 481 - "auditory&visual sentences" 482 - "visual sentences vs checkerboard" 483 - "auditory calculation" 484 - "visual calculation" 485 - "auditory&visual calculation" 486 - "auditory calculation vs auditory sentences" 487 - "visual calculation vs sentences" 488 - "auditory&visual calculation vs sentences" 489 - "auditory processing" 490 - "visual processing" 491 - "visual processing vs auditory processing" 492 - "auditory processing vs visual processing" 493 - "visual processing vs checkerboard" 494 - "cognitive processing vs motor" 495 - "left auditory click" 496 - "left visual click" 497 - "left auditory&visual click" 498 - "left auditory & visual click vs right auditory&visual click" 499 - "right auditory click" 500 - "right visual click" 501 - "right auditory&visual click" 502 - "right auditory & visual click vs left auditory&visual click" 503 - "auditory click vs auditory sentences" 504 - "visual click vs visual sentences" 505 - "auditory&visual motor vs cognitive processing" 506 507 n_subjects : int or list, optional 508 The number or list of subjects to load. If None is given, 509 all 94 subjects are used. 510 511 get_tmaps : boolean, optional 512 Whether t maps should be fetched or not. Default=False. 513 514 get_masks : boolean, optional 515 Whether individual masks should be fetched or not. 516 Default=False. 517 518 get_anats : boolean, optional 519 Whether individual structural images should be fetched or not. 520 Default=False. 521 %(data_dir)s 522 %(url)s 523 %(resume)s 524 %(verbose)s 525 526 Returns 527 ------- 528 data : Bunch 529 Dictionary-like object, the interest attributes are : 530 531 - 'cmaps': string list 532 Paths to nifti contrast maps 533 - 'tmaps' string list (if 'get_tmaps' set to True) 534 Paths to nifti t maps 535 - 'masks': string list 536 Paths to nifti files corresponding to the subjects individual masks 537 - 'anats': string 538 Path to nifti files corresponding to the subjects structural images 539 540 References 541 ---------- 542 .. footbibliography:: 543 544 See Also 545 --------- 546 nilearn.datasets.fetch_localizer_calculation_task 547 nilearn.datasets.fetch_localizer_button_task 548 549 """ 550 if isinstance(contrasts, str): 551 raise ValueError('Contrasts should be a list of strings, but ' 552 'a single string was given: "%s"' % contrasts) 553 if n_subjects is None: 554 n_subjects = 94 # 94 subjects available 555 if (isinstance(n_subjects, numbers.Number) and 556 ((n_subjects > 94) or (n_subjects < 1))): 557 warnings.warn("Wrong value for \'n_subjects\' (%d). The maximum " 558 "value will be used instead (\'n_subjects=94\')") 559 n_subjects = 94 # 94 subjects available 560 561 # we allow the user to use alternatives to Brainomics contrast names 562 contrast_name_wrapper = { 563 # Checkerboard 564 "checkerboard": "checkerboard", 565 "horizontal checkerboard": "horizontal checkerboard", 566 "vertical checkerboard": "vertical checkerboard", 567 "horizontal vs vertical checkerboard": 568 "horizontal vs vertical checkerboard", 569 "vertical vs horizontal checkerboard": 570 "vertical vs horizontal checkerboard", 571 # Sentences 572 "sentence listening": "auditory sentences", 573 "sentence reading": "visual sentences", 574 "sentence listening and reading": "auditory&visual sentences", 575 "sentence reading vs checkerboard": "visual sentences vs checkerboard", 576 # Calculation 577 "calculation (auditory cue)": "auditory calculation", 578 "calculation (visual cue)": "visual calculation", 579 "calculation (auditory and visual cue)": "auditory&visual calculation", 580 "calculation (auditory cue) vs sentence listening": 581 "auditory calculation vs auditory sentences", 582 "calculation (visual cue) vs sentence reading": 583 "visual calculation vs sentences", 584 "calculation vs sentences": "auditory&visual calculation vs sentences", 585 # Calculation + Sentences 586 "calculation (auditory cue) and sentence listening": 587 "auditory processing", 588 "calculation (visual cue) and sentence reading": 589 "visual processing", 590 "calculation (visual cue) and sentence reading vs " 591 "calculation (auditory cue) and sentence listening": 592 "visual processing vs auditory processing", 593 "calculation (auditory cue) and sentence listening vs " 594 "calculation (visual cue) and sentence reading": 595 "auditory processing vs visual processing", 596 "calculation (visual cue) and sentence reading vs checkerboard": 597 "visual processing vs checkerboard", 598 "calculation and sentence listening/reading vs button press": 599 "cognitive processing vs motor", 600 # Button press 601 "left button press (auditory cue)": "left auditory click", 602 "left button press (visual cue)": "left visual click", 603 "left button press": "left auditory&visual click", 604 "left vs right button press": "left auditory & visual click vs " 605 + "right auditory&visual click", 606 "right button press (auditory cue)": "right auditory click", 607 "right button press (visual cue)": "right visual click", 608 "right button press": "right auditory & visual click", 609 "right vs left button press": "right auditory & visual click " 610 + "vs left auditory&visual click", 611 "button press (auditory cue) vs sentence listening": 612 "auditory click vs auditory sentences", 613 "button press (visual cue) vs sentence reading": 614 "visual click vs visual sentences", 615 "button press vs calculation and sentence listening/reading": 616 "auditory&visual motor vs cognitive processing"} 617 allowed_contrasts = list(contrast_name_wrapper.values()) 618 619 # convert contrast names 620 contrasts_wrapped = [] 621 # get a unique ID for each contrast. It is used to give a unique name to 622 # each download file and avoid name collisions. 623 contrasts_indices = [] 624 for contrast in contrasts: 625 if contrast in allowed_contrasts: 626 contrasts_wrapped.append(contrast.title().replace(" ", "")) 627 contrasts_indices.append(allowed_contrasts.index(contrast)) 628 elif contrast in contrast_name_wrapper: 629 name = contrast_name_wrapper[contrast] 630 contrasts_wrapped.append(name.title().replace(" ", "")) 631 contrasts_indices.append(allowed_contrasts.index(name)) 632 else: 633 raise ValueError("Contrast \'%s\' is not available" % contrast) 634 635 # Get the dataset OSF index 636 dataset_name = "brainomics_localizer" 637 index_url = "https://osf.io/hwbm2/download" 638 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 639 verbose=verbose) 640 index_file = _fetch_file(index_url, data_dir, verbose=verbose) 641 with open(index_file, "rt") as of: 642 index = json.load(of) 643 644 # Build data URLs that will be fetched 645 files = {} 646 # Download from the relevant OSF project, using hashes generated 647 # from the OSF API. Note the trailing slash. For more info, see: 648 # https://gist.github.com/emdupre/3cb4d564511d495ea6bf89c6a577da74 649 root_url = "https://osf.io/download/{0}/" 650 if isinstance(n_subjects, numbers.Number): 651 subject_mask = np.arange(1, n_subjects + 1) 652 else: 653 subject_mask = np.array(n_subjects) 654 subject_ids = ["S%02d" % s for s in subject_mask] 655 data_types = ["cmaps"] 656 if get_tmaps: 657 data_types.append("tmaps") 658 filenames = [] 659 660 def _is_valid_path(path, index, verbose): 661 if path not in index: 662 if verbose > 0: 663 print("Skipping path '{0}'...".format(path)) 664 return False 665 return True 666 667 for subject_id in subject_ids: 668 for data_type in data_types: 669 for contrast_id, contrast in enumerate(contrasts_wrapped): 670 name_aux = str.replace( 671 str.join('_', [data_type, contrast]), ' ', '_') 672 file_path = os.path.join( 673 "brainomics_data", subject_id, "%s.nii.gz" % name_aux) 674 path = "/".join([ 675 "/localizer", "derivatives", "spm_1st_level", 676 "sub-%s" % subject_id, 677 "sub-%s_task-localizer_acq-%s_%s.nii.gz" % ( 678 subject_id, contrast, data_type)]) 679 if _is_valid_path(path, index, verbose=verbose): 680 file_url = root_url.format(index[path][1:]) 681 opts = {"move": file_path} 682 filenames.append((file_path, file_url, opts)) 683 files.setdefault(data_type, []).append(file_path) 684 685 # Fetch masks if asked by user 686 if get_masks: 687 for subject_id in subject_ids: 688 file_path = os.path.join( 689 "brainomics_data", subject_id, "boolean_mask_mask.nii.gz") 690 path = "/".join([ 691 "/localizer", "derivatives", "spm_1st_level", 692 "sub-%s" % subject_id, "sub-%s_mask.nii.gz" % subject_id]) 693 if _is_valid_path(path, index, verbose=verbose): 694 file_url = root_url.format(index[path][1:]) 695 opts = {"move": file_path} 696 filenames.append((file_path, file_url, opts)) 697 files.setdefault("masks", []).append(file_path) 698 699 # Fetch anats if asked by user 700 if get_anats: 701 for subject_id in subject_ids: 702 file_path = os.path.join( 703 "brainomics_data", subject_id, 704 "normalized_T1_anat_defaced.nii.gz") 705 path = "/".join([ 706 "/localizer", "derivatives", "spm_preprocessing", 707 "sub-%s" % subject_id, "sub-%s_T1w.nii.gz" % subject_id]) 708 if _is_valid_path(path, index, verbose=verbose): 709 file_url = root_url.format(index[path][1:]) 710 opts = {"move": file_path} 711 filenames.append((file_path, file_url, opts)) 712 files.setdefault("anats", []).append(file_path) 713 714 # Fetch subject characteristics 715 participants_file = os.path.join("brainomics_data", "participants.tsv") 716 path = "/localizer/participants.tsv" 717 if _is_valid_path(path, index, verbose=verbose): 718 file_url = root_url.format(index[path][1:]) 719 opts = {"move": participants_file} 720 filenames.append((participants_file, file_url, opts)) 721 722 # Fetch behavioural 723 behavioural_file = os.path.join( 724 "brainomics_data", "phenotype", "behavioural.tsv") 725 path = "/localizer/phenotype/behavioural.tsv" 726 if _is_valid_path(path, index, verbose=verbose): 727 file_url = root_url.format(index[path][1:]) 728 opts = {"move": behavioural_file} 729 filenames.append((behavioural_file, file_url, opts)) 730 731 # Actual data fetching 732 fdescr = _get_dataset_descr(dataset_name) 733 _fetch_files(data_dir, filenames, verbose=verbose) 734 for key, value in files.items(): 735 files[key] = [os.path.join(data_dir, val) for val in value] 736 737 # Load covariates file 738 from numpy.lib.recfunctions import join_by 739 participants_file = os.path.join(data_dir, participants_file) 740 csv_data = np.recfromcsv(participants_file, delimiter='\t') 741 behavioural_file = os.path.join(data_dir, behavioural_file) 742 csv_data2 = np.recfromcsv(behavioural_file, delimiter='\t') 743 csv_data = join_by( 744 "participant_id", csv_data, csv_data2, usemask=False, asrecarray=True) 745 subject_names = csv_data["participant_id"].tolist() 746 subjects_indices = [] 747 for name in subject_ids: 748 name = name.encode("utf8") 749 if name not in subject_names: 750 continue 751 subjects_indices.append(subject_names.index(name)) 752 csv_data = csv_data[subjects_indices] 753 754 return Bunch(ext_vars=csv_data, description=fdescr, **files) 755 756 757@fill_doc 758def fetch_localizer_calculation_task(n_subjects=1, data_dir=None, url=None, 759 verbose=1): 760 """Fetch calculation task contrast maps from the localizer. 761 762 Parameters 763 ---------- 764 n_subjects : int, optional 765 The number of subjects to load. If None is given, 766 all 94 subjects are used. Default=1. 767 %(data_dir)s 768 %(url)s 769 %(verbose)s 770 771 Returns 772 ------- 773 data : Bunch 774 Dictionary-like object, the interest attributes are : 775 'cmaps': string list, giving paths to nifti contrast maps 776 777 Notes 778 ------ 779 This function is only a caller for the fetch_localizer_contrasts in order 780 to simplify examples reading and understanding. 781 The 'calculation (auditory and visual cue)' contrast is used. 782 783 See Also 784 --------- 785 nilearn.datasets.fetch_localizer_button_task 786 nilearn.datasets.fetch_localizer_contrasts 787 788 """ 789 data = fetch_localizer_contrasts(["calculation (auditory and visual cue)"], 790 n_subjects=n_subjects, 791 get_tmaps=False, get_masks=False, 792 get_anats=False, data_dir=data_dir, 793 url=url, resume=True, verbose=verbose) 794 return data 795 796 797@fill_doc 798def fetch_localizer_button_task(data_dir=None, url=None, 799 verbose=1): 800 """Fetch left vs right button press contrast maps from the localizer. 801 802 Parameters 803 ---------- 804 %(data_dir)s 805 %(url)s 806 %(verbose)s 807 808 Returns 809 ------- 810 data : Bunch 811 Dictionary-like object, the interest attributes are : 812 813 - 'cmaps': string list, giving paths to nifti contrast maps 814 - 'tmap': string, giving paths to nifti contrast maps 815 - 'anat': string, giving paths to normalized anatomical image 816 817 Notes 818 ------ 819 This function is only a caller for the fetch_localizer_contrasts in order 820 to simplify examples reading and understanding. 821 The 'left vs right button press' contrast is used. 822 823 See Also 824 --------- 825 nilearn.datasets.fetch_localizer_calculation_task 826 nilearn.datasets.fetch_localizer_contrasts 827 828 """ 829 data = fetch_localizer_contrasts(["left vs right button press"], 830 n_subjects=[2], 831 get_tmaps=True, get_masks=False, 832 get_anats=True, data_dir=data_dir, 833 url=url, resume=True, verbose=verbose) 834 # Additional keys for backward compatibility 835 data['tmap'] = data['tmaps'][0] 836 data['anat'] = data['anats'][0] 837 return data 838 839 840@fill_doc 841def fetch_abide_pcp(data_dir=None, n_subjects=None, pipeline='cpac', 842 band_pass_filtering=False, global_signal_regression=False, 843 derivatives=['func_preproc'], 844 quality_checked=True, url=None, verbose=1, **kwargs): 845 """Fetch ABIDE dataset. 846 847 Fetch the Autism Brain Imaging Data Exchange (ABIDE) dataset wrt criteria 848 that can be passed as parameter. Note that this is the preprocessed 849 version of ABIDE provided by the preprocess connectome projects (PCP). 850 See :footcite:`Nielsen2013Multisite`. 851 852 Parameters 853 ---------- 854 %(data_dir)s 855 n_subjects : int, optional 856 The number of subjects to load. If None is given, 857 all available subjects are used (this number depends on the 858 preprocessing pipeline used). 859 860 pipeline : string {'cpac', 'css', 'dparsf', 'niak'}, optional 861 Possible pipelines are "ccs", "cpac", "dparsf" and "niak". 862 Default='cpac'. 863 864 band_pass_filtering : boolean, optional 865 Due to controversies in the literature, band pass filtering is 866 optional. If true, signal is band filtered between 0.01Hz and 0.1Hz. 867 Default=False. 868 869 global_signal_regression : boolean optional 870 Indicates if global signal regression should be applied on the 871 signals. Default=False. 872 873 derivatives : string list, optional 874 Types of downloaded files. Possible values are: alff, degree_binarize, 875 degree_weighted, dual_regression, eigenvector_binarize, 876 eigenvector_weighted, falff, func_mask, func_mean, func_preproc, lfcd, 877 reho, rois_aal, rois_cc200, rois_cc400, rois_dosenbach160, rois_ez, 878 rois_ho, rois_tt, and vmhc. Please refer to the PCP site for more 879 details. Default=['func_preproc']. 880 881 quality_checked : boolean, optional 882 If true (default), restrict the list of the subjects to the one that 883 passed quality assessment for all raters. Default=True. 884 %(url)s 885 %(verbose)s 886 kwargs : parameter list, optional 887 Any extra keyword argument will be used to filter downloaded subjects 888 according to the CSV phenotypic file. Some examples of filters are 889 indicated below. 890 891 SUB_ID : list of integers in [50001, 50607], optional 892 Ids of the subjects to be loaded. 893 894 DX_GROUP : integer in {1, 2}, optional 895 1 is autism, 2 is control. 896 897 DSM_IV_TR : integer in [0, 4], optional 898 O is control, 1 is autism, 2 is Asperger, 3 is PPD-NOS, 899 4 is Asperger or PPD-NOS. 900 901 AGE_AT_SCAN : float in [6.47, 64], optional 902 Age of the subject. 903 904 SEX : integer in {1, 2}, optional 905 1 is male, 2 is female. 906 907 HANDEDNESS_CATEGORY : string in {'R', 'L', 'Mixed', 'Ambi'}, optional 908 R = Right, L = Left, Ambi = Ambidextrous. 909 910 HANDEDNESS_SCORE : integer in [-100, 100], optional 911 Positive = Right, Negative = Left, 0 = Ambidextrous. 912 913 Notes 914 ----- 915 Code and description of preprocessing pipelines are provided on the 916 `PCP website <http://preprocessed-connectomes-project.github.io/>`. 917 918 References 919 ---------- 920 .. footbibliography:: 921 922 """ 923 # People keep getting it wrong and submiting a string instead of a 924 # list of strings. We'll make their life easy 925 if isinstance(derivatives, str): 926 derivatives = [derivatives, ] 927 928 # Parameter check 929 for derivative in derivatives: 930 if derivative not in [ 931 'alff', 'degree_binarize', 'degree_weighted', 932 'dual_regression', 'eigenvector_binarize', 933 'eigenvector_weighted', 'falff', 'func_mask', 'func_mean', 934 'func_preproc', 'lfcd', 'reho', 'rois_aal', 'rois_cc200', 935 'rois_cc400', 'rois_dosenbach160', 'rois_ez', 'rois_ho', 936 'rois_tt', 'vmhc']: 937 raise KeyError('%s is not a valid derivative' % derivative) 938 939 strategy = '' 940 if not band_pass_filtering: 941 strategy += 'no' 942 strategy += 'filt_' 943 if not global_signal_regression: 944 strategy += 'no' 945 strategy += 'global' 946 947 # General file: phenotypic information 948 dataset_name = 'ABIDE_pcp' 949 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 950 verbose=verbose) 951 if url is None: 952 url = ('https://s3.amazonaws.com/fcp-indi/data/Projects/' 953 'ABIDE_Initiative') 954 955 if quality_checked: 956 kwargs['qc_rater_1'] = b'OK' 957 kwargs['qc_anat_rater_2'] = [b'OK', b'maybe'] 958 kwargs['qc_func_rater_2'] = [b'OK', b'maybe'] 959 kwargs['qc_anat_rater_3'] = b'OK' 960 kwargs['qc_func_rater_3'] = b'OK' 961 962 # Fetch the phenotypic file and load it 963 csv = 'Phenotypic_V1_0b_preprocessed1.csv' 964 path_csv = _fetch_files(data_dir, [(csv, url + '/' + csv, {})], 965 verbose=verbose)[0] 966 967 # Note: the phenotypic file contains string that contains comma which mess 968 # up numpy array csv loading. This is why I do a pass to remove the last 969 # field. This can be 970 # done simply with pandas but we don't want such dependency ATM 971 # pheno = pandas.read_csv(path_csv).to_records() 972 with open(path_csv, 'r') as pheno_f: 973 pheno = ['i' + pheno_f.readline()] 974 975 # This regexp replaces commas between double quotes 976 for line in pheno_f: 977 pheno.append(re.sub(r',(?=[^"]*"(?:[^"]*"[^"]*")*[^"]*$)', ";", line)) 978 979 # bytes (encode()) needed for python 2/3 compat with numpy 980 pheno = '\n'.join(pheno).encode() 981 pheno = BytesIO(pheno) 982 pheno = np.recfromcsv(pheno, comments='$', case_sensitive=True) 983 984 # First, filter subjects with no filename 985 pheno = pheno[pheno['FILE_ID'] != b'no_filename'] 986 # Apply user defined filters 987 user_filter = _filter_columns(pheno, kwargs) 988 pheno = pheno[user_filter] 989 990 # Go into specific data folder and url 991 data_dir = os.path.join(data_dir, pipeline, strategy) 992 url = '/'.join([url, 'Outputs', pipeline, strategy]) 993 994 # Get the files 995 results = {} 996 file_ids = [file_id.decode() for file_id in pheno['FILE_ID']] 997 if n_subjects is not None: 998 file_ids = file_ids[:n_subjects] 999 pheno = pheno[:n_subjects] 1000 1001 results['description'] = _get_dataset_descr(dataset_name) 1002 results['phenotypic'] = pheno 1003 for derivative in derivatives: 1004 ext = '.1D' if derivative.startswith('rois') else '.nii.gz' 1005 files = [] 1006 for file_id in file_ids: 1007 file_ = [( 1008 file_id + '_' + derivative + ext, 1009 '/'.join([url, derivative, file_id + '_' + derivative + ext]), 1010 {} 1011 )] 1012 files.append(_fetch_files(data_dir, file_, verbose=verbose)[0]) 1013 # Load derivatives if needed 1014 if ext == '.1D': 1015 files = [np.loadtxt(f) for f in files] 1016 results[derivative] = files 1017 return Bunch(**results) 1018 1019 1020def _load_mixed_gambles(zmap_imgs): 1021 """Ravel zmaps (one per subject) along time axis, resulting, 1022 in a n_subjects * n_trials 3D niimgs and, and then make 1023 gain vector y of same length. 1024 """ 1025 X = [] 1026 y = [] 1027 mask = [] 1028 for zmap_img in zmap_imgs: 1029 # load subject data 1030 this_X = get_data(zmap_img) 1031 affine = zmap_img.affine 1032 finite_mask = np.all(np.isfinite(this_X), axis=-1) 1033 this_mask = np.logical_and(np.all(this_X != 0, axis=-1), 1034 finite_mask) 1035 this_y = np.array([np.arange(1, 9)] * 6).ravel() 1036 1037 # gain levels 1038 if len(this_y) != this_X.shape[-1]: 1039 raise RuntimeError("%s: Expecting %i volumes, got %i!" % ( 1040 zmap_img, len(this_y), this_X.shape[-1])) 1041 1042 # standardize subject data 1043 this_X -= this_X.mean(axis=-1)[..., np.newaxis] 1044 std = this_X.std(axis=-1) 1045 std[std == 0] = 1 1046 this_X /= std[..., np.newaxis] 1047 1048 # commit subject data 1049 X.append(this_X) 1050 y.extend(this_y) 1051 mask.append(this_mask) 1052 y = np.array(y) 1053 X = np.concatenate(X, axis=-1) 1054 mask = np.sum(mask, axis=0) > .5 * len(mask) 1055 mask = np.logical_and(mask, np.all(np.isfinite(X), axis=-1)) 1056 X = X[mask, :].T 1057 tmp = np.zeros(list(mask.shape) + [len(X)]) 1058 tmp[mask, :] = X.T 1059 mask_img = nibabel.Nifti1Image(mask.astype(int), affine) 1060 X = nibabel.four_to_three(nibabel.Nifti1Image(tmp, affine)) 1061 return X, y, mask_img 1062 1063 1064@fill_doc 1065def fetch_mixed_gambles(n_subjects=1, data_dir=None, url=None, resume=True, 1066 return_raw_data=False, verbose=1): 1067 """Fetch Jimura "mixed gambles" dataset. 1068 1069 See :footcite:`JIMURA2012544`. 1070 1071 Parameters 1072 ---------- 1073 n_subjects : int, optional 1074 The number of subjects to load. If None is given, all the 1075 subjects are used. Default=1. 1076 %(data_dir)s 1077 %(url)s 1078 %(resume)s 1079 %(verbose)s 1080 return_raw_data : bool, optional 1081 If false, then the data will transformed into and (X, y) pair, suitable 1082 for machine learning routines. X is a list of n_subjects * 48 1083 Nifti1Image objects (where 48 is the number of trials), 1084 and y is an array of shape (n_subjects * 48,). 1085 Default=False. 1086 1087 Returns 1088 ------- 1089 data : Bunch 1090 Dictionary-like object, the interest attributes are : 1091 'zmaps': string list 1092 Paths to realigned gain betamaps (one nifti per subject). 1093 'gain': .. 1094 If make_Xy is true, this is a list of n_subjects * 48 1095 Nifti1Image objects, else it is None. 1096 'y': array of shape (n_subjects * 48,) or None 1097 If make_Xy is true, then this is an array of shape 1098 (n_subjects * 48,), else it is None. 1099 1100 References 1101 ---------- 1102 .. footbibliography:: 1103 1104 """ 1105 if n_subjects > 16: 1106 warnings.warn('Warning: there are only 16 subjects!') 1107 n_subjects = 16 1108 if url is None: 1109 url = ("https://www.nitrc.org/frs/download.php/7229/" 1110 "jimura_poldrack_2012_zmaps.zip") 1111 opts = dict(uncompress=True) 1112 files = [("zmaps%ssub%03i_zmaps.nii.gz" % (os.sep, (j + 1)), url, opts) 1113 for j in range(n_subjects)] 1114 data_dir = _get_dataset_dir('jimura_poldrack_2012_zmaps', 1115 data_dir=data_dir) 1116 zmap_fnames = _fetch_files(data_dir, files, resume=resume, verbose=verbose) 1117 subject_id = np.repeat(np.arange(n_subjects), 6 * 8) 1118 data = Bunch(zmaps=zmap_fnames, 1119 subject_id=subject_id) 1120 if not return_raw_data: 1121 X, y, mask_img = _load_mixed_gambles(check_niimg(data.zmaps, 1122 return_iterator=True)) 1123 data.zmaps, data.gain, data.mask_img = X, y, mask_img 1124 return data 1125 1126 1127@fill_doc 1128def fetch_megatrawls_netmats(dimensionality=100, timeseries='eigen_regression', 1129 matrices='partial_correlation', data_dir=None, 1130 resume=True, verbose=1): 1131 """Downloads and returns Network Matrices data from MegaTrawls release in HCP. 1132 1133 This data can be used to predict relationships between imaging data and 1134 non-imaging behavioural measures such as age, sex, education, etc. 1135 The network matrices are estimated from functional connectivity 1136 datasets of 461 subjects. Full technical details in references. 1137 1138 More information available in :footcite:`smithhcp2015`, 1139 :footcite:`smith2015positive`, :footcite:`Filippini7209`, 1140 :footcite:`smith2014methods`, and :footcite:`reilly2009cerebellum`. 1141 1142 Parameters 1143 ---------- 1144 dimensionality : int, optional 1145 Valid inputs are 25, 50, 100, 200, 300. By default, network matrices 1146 estimated using Group ICA brain parcellations of 100 components/dimensions 1147 will be returned. Default=100. 1148 1149 timeseries : str, optional 1150 Valid inputs are 'multiple_spatial_regression' or 'eigen_regression'. By 1151 default 'eigen_regression', matrices estimated using first principal 1152 eigen component timeseries signals extracted from each subject data 1153 parcellations will be returned. Otherwise, 'multiple_spatial_regression' 1154 matrices estimated using spatial regressor based timeseries signals 1155 extracted from each subject data parcellations will be returned. 1156 Default='eigen_regression'. 1157 1158 matrices : str, optional 1159 Valid inputs are 'full_correlation' or 'partial_correlation'. By default, 1160 partial correlation matrices will be returned otherwise if selected 1161 full correlation matrices will be returned. 1162 Default='partial_correlation'. 1163 %(data_dir)s 1164 %(resume)s 1165 %(verbose)s 1166 1167 Returns 1168 ------- 1169 data : Bunch 1170 Dictionary-like object, the attributes are : 1171 1172 - 'dimensions': int, consists of given input in dimensions. 1173 1174 - 'timeseries': str, consists of given input in timeseries method. 1175 1176 - 'matrices': str, consists of given type of specific matrices. 1177 1178 - 'correlation_matrices': ndarray, consists of correlation matrices 1179 based on given type of matrices. Array size will depend on given 1180 dimensions (n, n). 1181 1182 - 'description': data description 1183 1184 References 1185 ---------- 1186 .. footbibliography:: 1187 1188 Notes 1189 ----- 1190 See description for terms & conditions on data usage. 1191 1192 """ 1193 url = "http://www.nitrc.org/frs/download.php/8037/Megatrawls.tgz" 1194 opts = {'uncompress': True} 1195 1196 error_message = "Invalid {0} input is provided: {1}, choose one of them {2}" 1197 # standard dataset terms 1198 dimensionalities = [25, 50, 100, 200, 300] 1199 if dimensionality not in dimensionalities: 1200 raise ValueError(error_message.format('dimensionality', dimensionality, 1201 dimensionalities)) 1202 timeseries_methods = ['multiple_spatial_regression', 'eigen_regression'] 1203 if timeseries not in timeseries_methods: 1204 raise ValueError(error_message.format('timeseries', timeseries, 1205 timeseries_methods)) 1206 output_matrices_names = ['full_correlation', 'partial_correlation'] 1207 if matrices not in output_matrices_names: 1208 raise ValueError(error_message.format('matrices', matrices, 1209 output_matrices_names)) 1210 1211 dataset_name = 'Megatrawls' 1212 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) 1213 description = _get_dataset_descr(dataset_name) 1214 1215 timeseries_map = dict(multiple_spatial_regression='ts2', eigen_regression='ts3') 1216 matrices_map = dict(full_correlation='Znet1.txt', partial_correlation='Znet2.txt') 1217 filepath = [(os.path.join( 1218 '3T_Q1-Q6related468_MSMsulc_d%d_%s' % (dimensionality, timeseries_map[timeseries]), 1219 matrices_map[matrices]), url, opts)] 1220 1221 # Fetch all the files 1222 files = _fetch_files(data_dir, filepath, resume=resume, verbose=verbose) 1223 1224 # Load the files into arrays 1225 correlation_matrices = csv_to_array(files[0]) 1226 1227 return Bunch( 1228 dimensions=dimensionality, 1229 timeseries=timeseries, 1230 matrices=matrices, 1231 correlation_matrices=correlation_matrices, 1232 description=description) 1233 1234 1235@fill_doc 1236@deprecated("'fetch_cobre' has been deprecated and will be removed " 1237 "in release 0.9 . " 1238 "Please consider using a different datasets or downloading it " 1239 "with a different tool than nilearn.") 1240def fetch_cobre(n_subjects=10, data_dir=None, url=None, verbose=1): 1241 """Fetch COBRE datasets preprocessed using NIAK 0.17 under CentOS 1242 version 6.3 with Octave version 4.0.2 and the Minc toolkit version 0.3.18. 1243 1244 Downloads and returns COBRE preprocessed resting state fMRI datasets, 1245 covariates and phenotypic information such as demographic, clinical 1246 variables, measure of frame displacement FD (an average FD for all the time 1247 frames left after censoring). 1248 1249 Each subject `fmri_XXXXXXX.nii.gz` is a 3D+t nifti volume (150 volumes). 1250 WARNING: no confounds were actually regressed from the data, so it can be 1251 done interactively by the user who will be able to explore different 1252 analytical paths easily. 1253 1254 For each subject, there is `fmri_XXXXXXX.tsv` files which contains the 1255 covariates such as motion parameters, mean CSF signal that should to be 1256 regressed out of the functional data. 1257 1258 `keys_confounds.json`: a json file, that describes each variable mentioned 1259 in the files `fmri_XXXXXXX.tsv.gz`. It also contains a list of time frames 1260 that have been removed from the time series by censoring for high motion. 1261 1262 `phenotypic_data.tsv` contains the data of clinical variables that 1263 explained in `keys_phenotypic_data.json` 1264 1265 .. versionadded:: 0.3 1266 1267 Warnings 1268 -------- 1269 'fetch_cobre' has been deprecated and will be removed in release 0.9. 1270 1271 Parameters 1272 ---------- 1273 n_subjects : int, optional 1274 The number of subjects to load from maximum of 146 subjects. 1275 By default, 10 subjects will be loaded. If n_subjects=None, 1276 all subjects will be loaded. Default=10. 1277 %(data_dir)s 1278 %(url)s 1279 %(verbose)s 1280 1281 Returns 1282 ------- 1283 data : Bunch 1284 Dictionary-like object, the attributes are: 1285 1286 - 'func': string list 1287 Paths to Nifti images. 1288 - 'confounds': string list 1289 Paths to .tsv files of each subject, confounds. 1290 - 'phenotypic': numpy.recarray 1291 Contains data of clinical variables, sex, age, FD. 1292 - 'description': data description of the release and references. 1293 - 'desc_con': str 1294 description of the confounds variables 1295 - 'desc_phenotypic': str 1296 description of the phenotypic variables. 1297 1298 Notes 1299 ----- 1300 See `more information about datasets structure 1301 <https://figshare.com/articles/COBRE_preprocessed_with_NIAK_0_17_-_lightweight_release/4197885>`_ 1302 1303 """ 1304 if url is None: 1305 # Here we use the file that provides URL for all others 1306 url = 'https://api.figshare.com/v2/articles/4197885' 1307 dataset_name = 'cobre' 1308 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 1309 verbose=verbose) 1310 fdescr = _get_dataset_descr(dataset_name) 1311 1312 # First, fetch the file that references all individual URLs 1313 files = _fetch_files(data_dir, [("4197885", url, {})], 1314 verbose=verbose)[0] 1315 1316 files = json.load(open(files, 'r')) 1317 files = files['files'] 1318 # Index files by name 1319 files_ = {} 1320 for f in files: 1321 files_[f['name']] = f 1322 files = files_ 1323 1324 # Fetch the phenotypic file and load it 1325 csv_name_gz = 'phenotypic_data.tsv.gz' 1326 csv_name = os.path.splitext(csv_name_gz)[0] 1327 csv_file_phen = _fetch_files( 1328 data_dir, [(csv_name, files[csv_name_gz]['download_url'], 1329 {'md5': files[csv_name_gz].get('md5', None), 1330 'move': csv_name_gz, 1331 'uncompress': True})], 1332 verbose=verbose)[0] 1333 1334 # Load file in filename to numpy arrays 1335 names = ['ID', 'Current Age', 'Gender', 'Handedness', 'Subject Type', 1336 'Diagnosis', 'Frames OK', 'FD', 'FD Scrubbed'] 1337 1338 csv_array_phen = np.recfromcsv(csv_file_phen, names=names, 1339 skip_header=True, delimiter='\t') 1340 1341 # Check number of subjects 1342 max_subjects = len(csv_array_phen) 1343 if n_subjects is None: 1344 n_subjects = max_subjects 1345 1346 if n_subjects > max_subjects: 1347 warnings.warn('Warning: there are only %d subjects' % max_subjects) 1348 n_subjects = max_subjects 1349 1350 sz_count = list(csv_array_phen['subject_type']).count(b'Patient') 1351 ct_count = list(csv_array_phen['subject_type']).count(b'Control') 1352 1353 n_sz = np.round(float(n_subjects) / max_subjects * sz_count).astype(int) 1354 n_ct = np.round(float(n_subjects) / max_subjects * ct_count).astype(int) 1355 1356 # First, restrict the csv files to the adequate number of subjects 1357 sz_ids = csv_array_phen[csv_array_phen['subject_type'] == 1358 b'Patient']['id'][:n_sz] 1359 ct_ids = csv_array_phen[csv_array_phen['subject_type'] == 1360 b'Control']['id'][:n_ct] 1361 ids = np.hstack([sz_ids, ct_ids]) 1362 csv_array_phen = csv_array_phen[np.in1d(csv_array_phen['id'], ids)] 1363 1364 # Call fetch_files once per subject. 1365 1366 func = [] 1367 con = [] 1368 for i in ids: 1369 f = 'fmri_00' + str(i) + '.nii.gz' 1370 c_gz = 'fmri_00' + str(i) + '.tsv.gz' 1371 c = os.path.splitext(c_gz)[0] 1372 1373 f, c = _fetch_files( 1374 data_dir, 1375 [(f, files[f]['download_url'], {'md5': files[f].get('md5', None), 1376 'move': f}), 1377 (c, files[c_gz]['download_url'], 1378 {'md5': files[c_gz].get('md5', None), 1379 'move': c_gz, 'uncompress': True}) 1380 ], 1381 verbose=verbose) 1382 func.append(f) 1383 con.append(c) 1384 1385 # Fetch the the complementary files 1386 keys_con = "keys_confounds.json" 1387 keys_phen = "keys_phenotypic_data.json" 1388 1389 csv_keys_con, csv_keys_phen = _fetch_files( 1390 data_dir, 1391 [(keys_con, files[keys_con]['download_url'], 1392 {'md5': files[keys_con].get('md5', None), 'move': keys_con}), 1393 (keys_phen, files[keys_phen]['download_url'], 1394 {'md5': files[keys_phen].get('md5', None), 'move': keys_phen}) 1395 ], 1396 verbose=verbose) 1397 1398 files_keys_con = open(csv_keys_con, 'r').read() 1399 files_keys_phen = open(csv_keys_phen, 'r').read() 1400 1401 return Bunch(func=func, confounds=con, phenotypic=csv_array_phen, 1402 description=fdescr, desc_con=files_keys_con, 1403 desc_phenotypic=files_keys_phen) 1404 1405 1406@fill_doc 1407def fetch_surf_nki_enhanced(n_subjects=10, data_dir=None, 1408 url=None, resume=True, verbose=1): 1409 """Download and load the NKI enhanced resting-state dataset, 1410 preprocessed and projected to the fsaverage5 space surface. 1411 1412 See :footcite:`Nooner2012NKI`. 1413 1414 Direct download link :footcite:`NKIdataset`. 1415 1416 .. versionadded:: 0.3 1417 1418 Parameters 1419 ---------- 1420 n_subjects : int, optional 1421 The number of subjects to load from maximum of 102 subjects. 1422 By default, 10 subjects will be loaded. If None is given, 1423 all 102 subjects will be loaded. Default=10. 1424 %(data_dir)s 1425 %(url)s 1426 %(resume)s 1427 %(verbose)s 1428 1429 Returns 1430 ------- 1431 data : sklearn.datasets.base.Bunch 1432 Dictionary-like object, the interest attributes are : 1433 1434 - 'func_left': Paths to Gifti files containing resting state 1435 time series left hemisphere 1436 - 'func_right': Paths to Gifti files containing resting state 1437 time series right hemisphere 1438 - 'phenotypic': array containing tuple with subject ID, age, 1439 dominant hand and sex for each subject. 1440 - 'description': data description of the release and references. 1441 1442 References 1443 ---------- 1444 .. footbibliography:: 1445 1446 """ 1447 if url is None: 1448 url = 'https://www.nitrc.org/frs/download.php/' 1449 1450 # Preliminary checks and declarations 1451 dataset_name = 'nki_enhanced_surface' 1452 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 1453 verbose=verbose) 1454 ids = ['A00028185', 'A00033747', 'A00035072', 'A00035827', 'A00035840', 1455 'A00037112', 'A00037511', 'A00038998', 'A00039391', 'A00039431', 1456 'A00039488', 'A00040524', 'A00040623', 'A00040944', 'A00043299', 1457 'A00043520', 'A00043677', 'A00043722', 'A00045589', 'A00050998', 1458 'A00051063', 'A00051064', 'A00051456', 'A00051457', 'A00051477', 1459 'A00051513', 'A00051514', 'A00051517', 'A00051528', 'A00051529', 1460 'A00051539', 'A00051604', 'A00051638', 'A00051658', 'A00051676', 1461 'A00051678', 'A00051679', 'A00051726', 'A00051774', 'A00051796', 1462 'A00051835', 'A00051882', 'A00051925', 'A00051927', 'A00052070', 1463 'A00052117', 'A00052118', 'A00052126', 'A00052180', 'A00052197', 1464 'A00052214', 'A00052234', 'A00052307', 'A00052319', 'A00052499', 1465 'A00052502', 'A00052577', 'A00052612', 'A00052639', 'A00053202', 1466 'A00053369', 'A00053456', 'A00053474', 'A00053546', 'A00053576', 1467 'A00053577', 'A00053578', 'A00053625', 'A00053626', 'A00053627', 1468 'A00053874', 'A00053901', 'A00053927', 'A00053949', 'A00054038', 1469 'A00054153', 'A00054173', 'A00054358', 'A00054482', 'A00054532', 1470 'A00054533', 'A00054534', 'A00054621', 'A00054895', 'A00054897', 1471 'A00054913', 'A00054929', 'A00055061', 'A00055215', 'A00055352', 1472 'A00055353', 'A00055542', 'A00055738', 'A00055763', 'A00055806', 1473 'A00056097', 'A00056098', 'A00056164', 'A00056372', 'A00056452', 1474 'A00056489', 'A00056949'] 1475 1476 nitrc_ids = range(8260, 8464) 1477 max_subjects = len(ids) 1478 if n_subjects is None: 1479 n_subjects = max_subjects 1480 if n_subjects > max_subjects: 1481 warnings.warn('Warning: there are only %d subjects' % max_subjects) 1482 n_subjects = max_subjects 1483 ids = ids[:n_subjects] 1484 1485 # Dataset description 1486 fdescr = _get_dataset_descr(dataset_name) 1487 1488 # First, get the metadata 1489 phenotypic_file = 'NKI_enhanced_surface_phenotypics.csv' 1490 phenotypic = (phenotypic_file, url + '8470/pheno_nki_nilearn.csv', 1491 {'move': phenotypic_file}) 1492 1493 phenotypic = _fetch_files(data_dir, [phenotypic], resume=resume, 1494 verbose=verbose)[0] 1495 1496 # Load the csv file 1497 phenotypic = np.genfromtxt(phenotypic, skip_header=True, 1498 names=['Subject', 'Age', 1499 'Dominant Hand', 'Sex'], 1500 delimiter=',', dtype=['U9', '<f8', 1501 'U1', 'U1']) 1502 1503 # Keep phenotypic information for selected subjects 1504 int_ids = np.asarray(ids) 1505 phenotypic = phenotypic[[np.where(phenotypic['Subject'] == i)[0][0] 1506 for i in int_ids]] 1507 1508 # Download subjects' datasets 1509 func_right = [] 1510 func_left = [] 1511 for i in range(len(ids)): 1512 1513 archive = url + '%i/%s_%s_preprocessed_fsaverage5_fwhm6.gii' 1514 func = os.path.join('%s', '%s_%s_preprocessed_fwhm6.gii') 1515 rh = _fetch_files(data_dir, 1516 [(func % (ids[i], ids[i], 'right'), 1517 archive % (nitrc_ids[2*i+1], ids[i], 'rh'), 1518 {'move': func % (ids[i], ids[i], 'right')} 1519 )], 1520 resume=resume, verbose=verbose) 1521 lh = _fetch_files(data_dir, 1522 [(func % (ids[i], ids[i], 'left'), 1523 archive % (nitrc_ids[2*i], ids[i], 'lh'), 1524 {'move': func % (ids[i], ids[i], 'left')} 1525 )], 1526 resume=resume, verbose=verbose) 1527 1528 func_right.append(rh[0]) 1529 func_left.append(lh[0]) 1530 1531 return Bunch(func_left=func_left, func_right=func_right, 1532 phenotypic=phenotypic, 1533 description=fdescr) 1534 1535 1536@fill_doc 1537def _fetch_development_fmri_participants(data_dir, url, verbose): 1538 """Helper function to fetch_development_fmri. 1539 1540 This function helps in downloading and loading participants data from .tsv 1541 uploaded on Open Science Framework (OSF). 1542 1543 The original .tsv file contains many columns but this function picks only 1544 those columns that are relevant. 1545 1546 Parameters 1547 ---------- 1548 %(data_dir)s 1549 %(url)s 1550 %(verbose)s 1551 1552 Returns 1553 ------- 1554 participants : numpy.ndarray 1555 Contains data of each subject age, age group, child or adult, 1556 gender, handedness. 1557 1558 """ 1559 dataset_name = 'development_fmri' 1560 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 1561 verbose=verbose) 1562 1563 if url is None: 1564 url = 'https://osf.io/yr3av/download' 1565 1566 files = [('participants.tsv', url, {'move': 'participants.tsv'})] 1567 path_to_participants = _fetch_files(data_dir, files, verbose=verbose)[0] 1568 1569 # Load path to participants 1570 dtype = [('participant_id', 'U12'), ('Age', '<f8'), ('AgeGroup', 'U6'), 1571 ('Child_Adult', 'U5'), ('Gender', 'U4'), ('Handedness', 'U4')] 1572 names = ['participant_id', 'Age', 'AgeGroup', 'Child_Adult', 'Gender', 1573 'Handedness'] 1574 participants = csv_to_array(path_to_participants, skip_header=True, 1575 dtype=dtype, names=names) 1576 return participants 1577 1578 1579@fill_doc 1580def _fetch_development_fmri_functional(participants, data_dir, url, resume, 1581 verbose): 1582 """Helper function to fetch_development_fmri. 1583 1584 This function helps in downloading functional MRI data in Nifti 1585 and its confound corresponding to each subject. 1586 1587 The files are downloaded from Open Science Framework (OSF). 1588 1589 Parameters 1590 ---------- 1591 participants : numpy.ndarray 1592 Should contain column participant_id which represents subjects id. The 1593 number of files are fetched based on ids in this column. 1594 %(data_dir)s 1595 %(url)s 1596 %(resume)s 1597 %(verbose)s 1598 1599 Returns 1600 ------- 1601 func : list of str (Nifti files) 1602 Paths to functional MRI data (4D) for each subject. 1603 1604 regressors : list of str (tsv files) 1605 Paths to regressors related to each subject. 1606 1607 """ 1608 dataset_name = 'development_fmri' 1609 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 1610 verbose=verbose) 1611 1612 if url is None: 1613 # Download from the relevant OSF project, using hashes generated 1614 # from the OSF API. Note the trailing slash. For more info, see: 1615 # https://gist.github.com/emdupre/3cb4d564511d495ea6bf89c6a577da74 1616 url = 'https://osf.io/download/{}/' 1617 1618 confounds = '{}_task-pixar_desc-confounds_regressors.tsv' 1619 func = '{0}_task-pixar_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz' 1620 1621 # The gzip contains unique download keys per Nifti file and confound 1622 # pre-extracted from OSF. Required for downloading files. 1623 package_directory = os.path.dirname(os.path.abspath(__file__)) 1624 dtype = [('participant_id', 'U12'), ('key_regressor', 'U24'), 1625 ('key_bold', 'U24')] 1626 names = ['participant_id', 'key_r', 'key_b'] 1627 # csv file contains download information related to OpenScience(osf) 1628 osf_data = csv_to_array(os.path.join(package_directory, "data", 1629 "development_fmri.csv"), 1630 skip_header=True, dtype=dtype, names=names) 1631 1632 funcs = [] 1633 regressors = [] 1634 1635 for participant_id in participants['participant_id']: 1636 this_osf_id = osf_data[osf_data['participant_id'] == participant_id] 1637 # Download regressors 1638 confound_url = url.format(this_osf_id['key_r'][0]) 1639 regressor_file = [(confounds.format(participant_id), 1640 confound_url, 1641 {'move': confounds.format(participant_id)})] 1642 path_to_regressor = _fetch_files(data_dir, regressor_file, 1643 verbose=verbose)[0] 1644 regressors.append(path_to_regressor) 1645 # Download bold images 1646 func_url = url.format(this_osf_id['key_b'][0]) 1647 func_file = [(func.format(participant_id, participant_id), func_url, 1648 {'move': func.format(participant_id)})] 1649 path_to_func = _fetch_files(data_dir, func_file, resume=resume, 1650 verbose=verbose)[0] 1651 funcs.append(path_to_func) 1652 return funcs, regressors 1653 1654 1655@fill_doc 1656def fetch_development_fmri(n_subjects=None, reduce_confounds=True, 1657 data_dir=None, resume=True, verbose=1, 1658 age_group='both'): 1659 """Fetch movie watching based brain development dataset (fMRI) 1660 1661 The data is downsampled to 4mm resolution for convenience with a repetition time (TR) 1662 of 2 secs. The origin of the data is coming from OpenNeuro. See Notes below. 1663 1664 Please cite :footcite:`richardson2018development` 1665 if you are using this dataset. 1666 1667 .. versionadded:: 0.5.2 1668 1669 Parameters 1670 ---------- 1671 n_subjects : int, optional 1672 The number of subjects to load. If None, all the subjects are 1673 loaded. Total 155 subjects. 1674 1675 reduce_confounds : bool, optional 1676 If True, the returned confounds only include 6 motion parameters, 1677 mean framewise displacement, signal from white matter, csf, and 1678 6 anatomical compcor parameters. This selection only serves the 1679 purpose of having realistic examples. Depending on your research 1680 question, other confounds might be more appropriate. 1681 If False, returns all fmriprep confounds. 1682 Default=True. 1683 %(data_dir)s 1684 %(resume)s 1685 %(verbose)s 1686 age_group : str, optional 1687 Default='both'. Which age group to fetch 1688 1689 - 'adults' = fetch adults only (n=33, ages 18-39) 1690 - 'child' = fetch children only (n=122, ages 3-12) 1691 - 'both' = fetch full sample (n=155) 1692 1693 Returns 1694 ------- 1695 data : Bunch 1696 Dictionary-like object, the interest attributes are : 1697 1698 - 'func': list of str (Nifti files) 1699 Paths to downsampled functional MRI data (4D) for each subject. 1700 1701 - 'confounds': list of str (tsv files) 1702 Paths to confounds related to each subject. 1703 1704 - 'phenotypic': numpy.ndarray 1705 Contains each subject age, age group, child or adult, gender, 1706 handedness. 1707 1708 Notes 1709 ----- 1710 The original data is downloaded from OpenNeuro 1711 https://openneuro.org/datasets/ds000228/versions/1.0.0 1712 1713 This fetcher downloads downsampled data that are available on Open 1714 Science Framework (OSF). Located here: https://osf.io/5hju4/files/ 1715 1716 Preprocessing details: https://osf.io/wjtyq/ 1717 1718 Note that if n_subjects > 2, and age_group is 'both', 1719 fetcher will return a ratio of children and adults representative 1720 of the total sample. 1721 1722 References 1723 ---------- 1724 .. footbibliography:: 1725 1726 """ 1727 dataset_name = 'development_fmri' 1728 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 1729 verbose=1) 1730 keep_confounds = ['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 1731 'rot_z', 'framewise_displacement', 'a_comp_cor_00', 1732 'a_comp_cor_01', 'a_comp_cor_02', 'a_comp_cor_03', 1733 'a_comp_cor_04', 'a_comp_cor_05', 'csf', 1734 'white_matter'] 1735 1736 # Dataset description 1737 fdescr = _get_dataset_descr(dataset_name) 1738 1739 # Participants data: ids, demographics, etc 1740 participants = _fetch_development_fmri_participants(data_dir=data_dir, 1741 url=None, 1742 verbose=verbose) 1743 1744 adult_count, child_count = _filter_func_regressors_by_participants( 1745 participants, age_group) # noqa: E126 1746 max_subjects = adult_count + child_count 1747 1748 n_subjects = _set_invalid_n_subjects_to_max(n_subjects, 1749 max_subjects, 1750 age_group) 1751 1752 # To keep the proportion of children versus adults 1753 percent_total = float(n_subjects) / max_subjects 1754 n_child = np.round(percent_total * child_count).astype(int) 1755 n_adult = np.round(percent_total * adult_count).astype(int) 1756 1757 # We want to return adults by default (i.e., `age_group=both`) or 1758 # if explicitly requested. 1759 if (age_group != 'child') and (n_subjects == 1): 1760 n_adult, n_child = 1, 0 1761 1762 if (age_group == 'both') and (n_subjects == 2): 1763 n_adult, n_child = 1, 1 1764 1765 participants = _filter_csv_by_n_subjects(participants, n_adult, n_child) 1766 1767 funcs, regressors = _fetch_development_fmri_functional(participants, 1768 data_dir=data_dir, 1769 url=None, 1770 resume=resume, 1771 verbose=verbose) 1772 1773 if reduce_confounds: 1774 regressors = _reduce_confounds(regressors, keep_confounds) 1775 return Bunch(func=funcs, confounds=regressors, phenotypic=participants, 1776 description=fdescr) 1777 1778 1779def _filter_func_regressors_by_participants(participants, age_group): 1780 """ Filter functional and regressors based on participants 1781 """ 1782 valid_age_groups = ('both', 'child', 'adult') 1783 if age_group not in valid_age_groups: 1784 raise ValueError("Wrong value for age_group={0}. " 1785 "Valid arguments are: {1}".format(age_group, 1786 valid_age_groups) 1787 ) 1788 1789 child_adult = participants['Child_Adult'].tolist() 1790 1791 if age_group != 'adult': 1792 child_count = child_adult.count('child') 1793 else: 1794 child_count = 0 1795 1796 if age_group != 'child': 1797 adult_count = child_adult.count('adult') 1798 else: 1799 adult_count = 0 1800 return adult_count, child_count 1801 1802 1803def _filter_csv_by_n_subjects(participants, n_adult, n_child): 1804 """Restrict the csv files to the adequate number of subjects 1805 """ 1806 child_ids = participants[participants['Child_Adult'] == 1807 'child']['participant_id'][:n_child] 1808 adult_ids = participants[participants['Child_Adult'] == 1809 'adult']['participant_id'][:n_adult] 1810 ids = np.hstack([adult_ids, child_ids]) 1811 participants = participants[np.in1d(participants['participant_id'], ids)] 1812 participants = participants[np.argsort(participants, order='Child_Adult')] 1813 return participants 1814 1815 1816def _set_invalid_n_subjects_to_max(n_subjects, max_subjects, age_group): 1817 """ If n_subjects is invalid, sets it to max. 1818 """ 1819 if n_subjects is None: 1820 n_subjects = max_subjects 1821 1822 if (isinstance(n_subjects, numbers.Number) and 1823 ((n_subjects > max_subjects) or (n_subjects < 1))): 1824 warnings.warn("Wrong value for n_subjects={0}. The maximum " 1825 "value (for age_group={1}) will be used instead: " 1826 "n_subjects={2}" 1827 .format(n_subjects, age_group, max_subjects)) 1828 n_subjects = max_subjects 1829 return n_subjects 1830 1831 1832def _reduce_confounds(regressors, keep_confounds): 1833 reduced_regressors = [] 1834 for in_file in regressors: 1835 out_file = in_file.replace('desc-confounds', 1836 'desc-reducedConfounds') 1837 if not os.path.isfile(out_file): 1838 confounds = np.recfromcsv(in_file, delimiter='\t') 1839 selected_confounds = confounds[keep_confounds] 1840 header = '\t'.join(selected_confounds.dtype.names) 1841 np.savetxt(out_file, np.array(selected_confounds.tolist()), 1842 header=header, delimiter='\t', comments='') 1843 reduced_regressors.append(out_file) 1844 return reduced_regressors 1845 1846 1847# datasets originally belonging to nistats follow 1848 1849 1850@fill_doc 1851def fetch_language_localizer_demo_dataset(data_dir=None, verbose=1): 1852 """Download language localizer demo dataset. 1853 1854 Parameters 1855 ---------- 1856 %(data_dir)s 1857 %(verbose)s 1858 1859 Returns 1860 ------- 1861 data_dir : string 1862 Path to downloaded dataset. 1863 1864 downloaded_files : list of string 1865 Absolute paths of downloaded files on disk 1866 1867 """ 1868 url = 'https://osf.io/3dj2a/download' 1869 # When it starts working again change back to: 1870 # url = 'https://osf.io/nh987/download' 1871 main_folder = 'fMRI-language-localizer-demo-dataset' 1872 1873 data_dir = _get_dataset_dir(main_folder, data_dir=data_dir, 1874 verbose=verbose) 1875 # The files_spec needed for _fetch_files 1876 files_spec = [(main_folder + '.zip', url, {'move': main_folder + '.zip'})] 1877 # Only download if directory is empty 1878 # Directory will have been created by the call to _get_dataset_dir above 1879 if not os.listdir(data_dir): 1880 downloaded_files = _fetch_files(data_dir, files_spec, resume=True, 1881 verbose=verbose) 1882 _uncompress_file(downloaded_files[0]) 1883 1884 file_list = [os.path.join(path, f) for 1885 path, dirs, files in os.walk(data_dir) for f in files] 1886 return data_dir, sorted(file_list) 1887 1888 1889@fill_doc 1890def fetch_bids_langloc_dataset(data_dir=None, verbose=1): 1891 """Download language localizer example :term:`bids<BIDS>` dataset. 1892 1893 Parameters 1894 ---------- 1895 %(data_dir)s 1896 %(verbose)s 1897 1898 Returns 1899 ------- 1900 data_dir : string 1901 Path to downloaded dataset. 1902 1903 downloaded_files : list of string 1904 Absolute paths of downloaded files on disk. 1905 1906 """ 1907 url = 'https://files.osf.io/v1/resources/9q7dv/providers/osfstorage/5888d9a76c613b01fc6acc4e' # noqa: E501 1908 dataset_name = 'bids_langloc_example' 1909 main_folder = 'bids_langloc_dataset' 1910 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 1911 verbose=verbose) 1912 # The files_spec needed for _fetch_files 1913 files_spec = [(main_folder + '.zip', url, {'move': main_folder + '.zip'})] 1914 if not os.path.exists(os.path.join(data_dir, main_folder)): 1915 downloaded_files = _fetch_files(data_dir, files_spec, resume=True, 1916 verbose=verbose) 1917 _uncompress_file(downloaded_files[0]) 1918 main_path = os.path.join(data_dir, main_folder) 1919 file_list = [os.path.join(path, f) for 1920 path, dirs, files in os.walk(main_path) for f in files] 1921 return os.path.join(data_dir, main_folder), sorted(file_list) 1922 1923 1924@fill_doc 1925def fetch_openneuro_dataset_index(data_dir=None, 1926 dataset_version='ds000030_R1.0.4', 1927 verbose=1): 1928 """Download a file with OpenNeuro :term:`BIDS` dataset index. 1929 1930 Downloading the index allows to explore the dataset directories 1931 to select specific files to download. The index is a sorted list of urls. 1932 1933 Parameters 1934 ---------- 1935 %(data_dir)s 1936 dataset_version : string, optional 1937 Dataset version name. Assumes it is of the form [name]_[version]. 1938 Default='ds000030_R1.0.4'. 1939 %(verbose)s 1940 1941 Returns 1942 ------- 1943 urls_path : string 1944 Path to downloaded dataset index. 1945 1946 urls : list of string 1947 Sorted list of dataset directories. 1948 1949 """ 1950 data_prefix = '{}/{}/uncompressed'.format(dataset_version.split('_')[0], 1951 dataset_version, 1952 ) 1953 data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir, 1954 verbose=verbose) 1955 1956 file_url = 'https://osf.io/86xj7/download' 1957 final_download_path = os.path.join(data_dir, 'urls.json') 1958 downloaded_file_path = _fetch_files(data_dir=data_dir, 1959 files=[(final_download_path, 1960 file_url, 1961 {'move': final_download_path} 1962 )], 1963 resume=True 1964 ) 1965 urls_path = downloaded_file_path[0] 1966 with open(urls_path, 'r') as json_file: 1967 urls = json.load(json_file) 1968 return urls_path, urls 1969 1970 1971def select_from_index(urls, inclusion_filters=None, exclusion_filters=None, 1972 n_subjects=None): 1973 """Select subset of urls with given filters. 1974 1975 Parameters 1976 ---------- 1977 urls : list of str 1978 List of dataset urls obtained from index download. 1979 1980 inclusion_filters : list of str, optional 1981 List of unix shell-style wildcard strings 1982 that will be used to filter the url list. 1983 If a filter matches the url it is retained for download. 1984 Multiple filters work on top of each other. 1985 Like an "and" logical operator, creating a more restrictive query. 1986 Inclusion and exclusion filters apply together. 1987 For example the filter '*task-rest*'' would keep only urls 1988 that contain the 'task-rest' string. 1989 1990 exclusion_filters : list of str, optional 1991 List of unix shell-style wildcard strings 1992 that will be used to filter the url list. 1993 If a filter matches the url it is discarded for download. 1994 Multiple filters work on top of each other. 1995 Like an "and" logical operator, creating a more restrictive query. 1996 Inclusion and exclusion filters apply together. 1997 For example the filter '*task-rest*' would discard all urls 1998 that contain the 'task-rest' string. 1999 2000 n_subjects : int, optional 2001 Number of subjects to download from the dataset. All by default. 2002 2003 Returns 2004 ------- 2005 urls : list of string 2006 Sorted list of filtered dataset directories. 2007 2008 """ 2009 inclusion_filters = inclusion_filters if inclusion_filters else [] 2010 exclusion_filters = exclusion_filters if exclusion_filters else [] 2011 # We apply filters to the urls 2012 for exclusion in exclusion_filters: 2013 urls = [url for url in urls if not fnmatch.fnmatch(url, exclusion)] 2014 for inclusion in inclusion_filters: 2015 urls = [url for url in urls if fnmatch.fnmatch(url, inclusion)] 2016 2017 # subject selection filter 2018 # from the url list we infer all available subjects like 'sub-xxx/' 2019 subject_regex = 'sub-[a-z|A-Z|0-9]*[_./]' 2020 2021 def infer_subjects(urls): 2022 subjects = set() 2023 for url in urls: 2024 if 'sub-' in url: 2025 subjects.add(re.search(subject_regex, url).group(0)[:-1]) 2026 return sorted(subjects) 2027 2028 # We get a list of subjects (for the moment the first n subjects) 2029 selected_subjects = set(infer_subjects(urls)[:n_subjects]) 2030 # We exclude urls of subjects not selected 2031 urls = [ 2032 url for url in urls 2033 if 'sub-' not in url or re.search(subject_regex, url).group(0)[:-1] 2034 in selected_subjects 2035 ] 2036 return urls 2037 2038 2039def patch_openneuro_dataset(file_list): 2040 """Add symlinks for files not named according to latest :term:`BIDS` conventions. 2041 """ 2042 rep = {'_T1w_brainmask': '_desc-brain_mask', 2043 '_T1w_preproc': '_desc-preproc_T1w', 2044 '_T1w_space-MNI152NLin2009cAsym_brainmask': 2045 '_space-MNI152NLin2009cAsym_desc-brain_mask', 2046 '_T1w_space-MNI152NLin2009cAsym_class-': 2047 '_space-MNI152NLin2009cAsym_label-', 2048 '_T1w_space-MNI152NLin2009cAsym_preproc': 2049 '_space-MNI152NLin2009cAsym_desc-preproc_T1w', 2050 '_bold_confounds': '_desc-confounds_regressors', 2051 '_bold_space-MNI152NLin2009cAsym_brainmask': 2052 '_space-MNI152NLin2009cAsym_desc-brain_mask', 2053 '_bold_space-MNI152NLin2009cAsym_preproc': 2054 '_space-MNI152NLin2009cAsym_desc-preproc_bold' 2055 } 2056 # Create a symlink if a file with the modified filename does not exist 2057 for old in rep: 2058 for name in file_list: 2059 if old in name: 2060 if not os.path.exists(name.replace(old, rep[old])): 2061 os.symlink(name, name.replace(old, rep[old])) 2062 2063 2064@fill_doc 2065def fetch_openneuro_dataset( 2066 urls=None, data_dir=None, dataset_version='ds000030_R1.0.4', 2067 verbose=1): 2068 """Download OpenNeuro :term:`BIDS` dataset. 2069 2070 Parameters 2071 ---------- 2072 urls : list of string, optional 2073 Openneuro url list of dataset files to download. If not specified 2074 all files of the specified dataset will be downloaded. 2075 %(data_dir)s 2076 dataset_version : string, optional 2077 Dataset version name. Assumes it is of the form [name]_[version]. 2078 Default is `ds000030_R1.0.4`. 2079 %(verbose)s 2080 2081 Returns 2082 ------- 2083 data_dir : string 2084 Path to downloaded dataset. 2085 2086 downloaded_files : list of string 2087 Absolute paths of downloaded files on disk. 2088 2089 """ 2090 data_prefix = '{}/{}/uncompressed'.format( 2091 dataset_version.split('_')[0], dataset_version) 2092 data_dir = _get_dataset_dir(data_prefix, data_dir=data_dir, 2093 verbose=verbose) 2094 2095 # if urls are not specified we download the complete dataset index 2096 if urls is None: 2097 _, urls = fetch_openneuro_dataset_index( 2098 data_dir=data_dir, dataset_version=dataset_version, 2099 verbose=verbose) 2100 2101 # The files_spec needed for _fetch_files 2102 files_spec = [] 2103 files_dir = [] 2104 for url in urls: 2105 url_path = url.split(data_prefix + '/')[1] 2106 file_dir = os.path.join(data_dir, url_path) 2107 files_spec.append((os.path.basename(file_dir), url, {})) 2108 files_dir.append(os.path.dirname(file_dir)) 2109 2110 # download the files 2111 downloaded = [] 2112 for file_spec, file_dir in zip(files_spec, files_dir): 2113 # Timeout errors are common in the s3 connection so we try to avoid 2114 # failure of the dataset download for a transient instability 2115 success = False 2116 download_attempts = 4 2117 while download_attempts > 0 and not success: 2118 try: 2119 downloaded_files = _fetch_files( 2120 file_dir, [file_spec], resume=True, verbose=verbose) 2121 downloaded += downloaded_files 2122 success = True 2123 except Exception: 2124 download_attempts -= 1 2125 if not success: 2126 raise Exception('multiple failures downloading %s' % file_spec[1]) 2127 patch_openneuro_dataset(downloaded) 2128 2129 return data_dir, sorted(downloaded) 2130 2131 2132@fill_doc 2133def fetch_localizer_first_level(data_dir=None, verbose=1): 2134 """Download a first-level localizer fMRI dataset 2135 2136 Parameters 2137 ---------- 2138 %(data_dir)s 2139 %(verbose)s 2140 2141 Returns 2142 ------- 2143 data : sklearn.datasets.base.Bunch 2144 Dictionary-like object, with the keys: 2145 epi_img: the input 4D image 2146 events: a csv file describing the paardigm 2147 2148 """ 2149 url = 'https://osf.io/2bqxn/download' 2150 epi_img = 'sub-12069_task-localizer_space-MNI305.nii.gz' 2151 events = 'sub-12069_task-localizer_events.tsv' 2152 opts = {'uncompress': True} 2153 options = ('epi_img', 'events') 2154 dir_ = 'localizer_first_level' 2155 filenames = [(os.path.join(dir_, name), url, opts) 2156 for name in [epi_img, events]] 2157 2158 dataset_name = 'localizer_first_level' 2159 data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, 2160 verbose=verbose) 2161 files = _fetch_files(data_dir, filenames, verbose=verbose) 2162 2163 params = dict(list(zip(options, files))) 2164 return Bunch(**params) 2165 2166 2167def _download_spm_auditory_data(data_dir, subject_dir, subject_id): 2168 print('Data absent, downloading...') 2169 url = ('http://www.fil.ion.ucl.ac.uk/spm/download/data/MoAEpilot/' 2170 'MoAEpilot.zip') 2171 archive_path = os.path.join(subject_dir, os.path.basename(url)) 2172 _fetch_file(url, subject_dir) 2173 try: 2174 _uncompress_file(archive_path) 2175 except: # noqa: E722 2176 print('Archive corrupted, trying to download it again.') 2177 return fetch_spm_auditory(data_dir=data_dir, data_name='', 2178 subject_id=subject_id) 2179 2180 2181def _prepare_downloaded_spm_auditory_data(subject_dir): 2182 """ Uncompresses downloaded spm_auditory dataset and organizes 2183 the data into appropriate directories. 2184 2185 Parameters 2186 ---------- 2187 subject_dir : string 2188 Path to subject's data directory. 2189 2190 Returns 2191 ------- 2192 _subject_data : skl.Bunch object 2193 Scikit-Learn Bunch object containing data of a single subject 2194 from the SPM Auditory dataset. 2195 2196 """ 2197 subject_data = {} 2198 spm_auditory_data_files = ["fM00223/fM00223_%03i.img" % index 2199 for index in range(4, 100)] 2200 spm_auditory_data_files.append("sM00223/sM00223_002.img") 2201 2202 for file_name in spm_auditory_data_files: 2203 file_path = os.path.join(subject_dir, file_name) 2204 if os.path.exists(file_path): 2205 subject_data[file_name] = file_path 2206 else: 2207 print('%s missing from filelist!' % file_name) 2208 return None 2209 2210 _subject_data = {} 2211 _subject_data['func'] = sorted( 2212 [subject_data[x] for x in subject_data.keys() 2213 if re.match(r'^fM00223_0\d\d\.img$', 2214 os.path.basename(x))]) 2215 2216 # volumes for this dataset of shape (64, 64, 64, 1); let's fix this 2217 for x in _subject_data['func']: 2218 vol = nib.load(x) 2219 if len(vol.shape) == 4: 2220 vol = nib.Nifti1Image(get_data(vol)[:, :, :, 0], 2221 vol.affine) 2222 nib.save(vol, x) 2223 2224 _subject_data['anat'] = [subject_data[x] for x in subject_data.keys() 2225 if re.match(r'^sM00223_002\.img$', 2226 os.path.basename(x))][0] 2227 2228 # ... same thing for anat 2229 vol = nib.load(_subject_data['anat']) 2230 if len(vol.shape) == 4: 2231 vol = nib.Nifti1Image(get_data(vol)[:, :, :, 0], 2232 vol.affine) 2233 nib.save(vol, _subject_data['anat']) 2234 2235 return Bunch(**_subject_data) 2236 2237 2238def _make_path_events_file_spm_auditory_data(spm_auditory_data): 2239 """Accepts data for spm_auditory dataset as Bunch 2240 and constructs the filepath for its events descriptor file. 2241 2242 Parameters 2243 ---------- 2244 spm_auditory_data : Bunch 2245 2246 Returns 2247 ------- 2248 events_filepath : string 2249 Full path to the events.tsv file for spm_auditory dataset. 2250 2251 """ 2252 events_file_location = os.path.dirname(spm_auditory_data['func'][0]) 2253 events_filename = os.path.basename(events_file_location) + '_events.tsv' 2254 events_filepath = os.path.join(events_file_location, events_filename) 2255 return events_filepath 2256 2257 2258def _make_events_file_spm_auditory_data(events_filepath): 2259 """Accepts destination filepath including filename and 2260 creates the events.tsv file for the spm_auditory dataset. 2261 2262 Parameters 2263 ---------- 2264 events_filepath : string 2265 The path where the events file will be created. 2266 2267 Returns 2268 ------- 2269 None 2270 2271 """ 2272 tr = 7. 2273 epoch_duration = 6 * tr # duration in seconds 2274 conditions = ['rest', 'active'] * 8 2275 n_blocks = len(conditions) 2276 duration = epoch_duration * np.ones(n_blocks) 2277 onset = np.linspace(0, (n_blocks - 1) * epoch_duration, n_blocks) 2278 events = pd.DataFrame( 2279 {'onset': onset, 'duration': duration, 'trial_type': conditions}) 2280 events.to_csv(events_filepath, sep='\t', index=False, 2281 columns=['onset', 'duration', 'trial_type']) 2282 2283 2284@fill_doc 2285def fetch_spm_auditory(data_dir=None, data_name='spm_auditory', 2286 subject_id='sub001', verbose=1): 2287 """Function to fetch SPM auditory single-subject data. 2288 2289 See :footcite:`spm_auditory`. 2290 2291 Parameters 2292 ---------- 2293 %(data_dir)s 2294 data_name : string, optional 2295 Name of the dataset. Default='spm_auditory'. 2296 2297 subject_id : string, optional 2298 Indicates which subject to retrieve. 2299 Default='sub001'. 2300 %(verbose)s 2301 2302 Returns 2303 ------- 2304 data : sklearn.datasets.base.Bunch 2305 Dictionary-like object, the interest attributes are: 2306 - 'func': string list. Paths to functional images 2307 - 'anat': string list. Path to anat image 2308 2309 References 2310 ---------- 2311 .. footbibliography:: 2312 2313 """ 2314 data_dir = _get_dataset_dir(data_name, data_dir=data_dir, 2315 verbose=verbose) 2316 subject_dir = os.path.join(data_dir, subject_id) 2317 if not os.path.exists(subject_dir): 2318 _download_spm_auditory_data(data_dir, subject_dir, subject_id) 2319 spm_auditory_data = _prepare_downloaded_spm_auditory_data(subject_dir) 2320 try: 2321 spm_auditory_data['events'] 2322 except KeyError: 2323 events_filepath = _make_path_events_file_spm_auditory_data( 2324 spm_auditory_data) 2325 if not os.path.isfile(events_filepath): 2326 _make_events_file_spm_auditory_data(events_filepath) 2327 spm_auditory_data['events'] = events_filepath 2328 return spm_auditory_data 2329 2330 2331def _get_func_data_spm_multimodal(subject_dir, session, _subject_data): 2332 session_func = sorted(glob.glob( 2333 os.path.join( 2334 subject_dir, 2335 ('fMRI/Session%i/fMETHODS-000%i-*-01.img' % ( 2336 session, session + 4) 2337 ) 2338 ) 2339 )) 2340 if len(session_func) < 390: 2341 print('Missing %i functional scans for session %i.' % ( 2342 390 - len(session_func), session)) 2343 return None 2344 2345 _subject_data['func%i' % (session)] = session_func 2346 return _subject_data 2347 2348 2349def _get_session_trials_spm_multimodal(subject_dir, session, _subject_data): 2350 sess_trials = os.path.join( 2351 subject_dir, 2352 'fMRI/trials_ses%i.mat' % (session)) 2353 if not os.path.isfile(sess_trials): 2354 print('Missing session file: %s' % sess_trials) 2355 return None 2356 2357 _subject_data['trials_ses%i' % (session)] = sess_trials 2358 return _subject_data 2359 2360 2361def _get_anatomical_data_spm_multimodal(subject_dir, _subject_data): 2362 anat = os.path.join(subject_dir, 'sMRI/smri.img') 2363 if not os.path.isfile(anat): 2364 print('Missing structural image.') 2365 return None 2366 2367 _subject_data['anat'] = anat 2368 return _subject_data 2369 2370 2371def _glob_spm_multimodal_fmri_data(subject_dir): 2372 """glob data from subject_dir.""" 2373 _subject_data = {'slice_order': 'descending'} 2374 2375 for session in range(1, 3): 2376 # glob func data for session 2377 _subject_data = _get_func_data_spm_multimodal(subject_dir, 2378 session, 2379 _subject_data) 2380 if not _subject_data: 2381 return None 2382 # glob trials .mat file 2383 _subject_data = _get_session_trials_spm_multimodal(subject_dir, 2384 session, 2385 _subject_data) 2386 if not _subject_data: 2387 return None 2388 try: 2389 events = _make_events_file_spm_multimodal_fmri(_subject_data, 2390 session) 2391 except MatReadError as mat_err: 2392 warnings.warn( 2393 '{}. An events.tsv file ' 2394 'cannot be generated'.format(str(mat_err))) 2395 else: 2396 events_filepath = _make_events_filepath_spm_multimodal_fmri( 2397 _subject_data, session) 2398 events.to_csv(events_filepath, sep='\t', index=False) 2399 _subject_data['events{}'.format(session)] = events_filepath 2400 2401 # glob for anat data 2402 _subject_data = _get_anatomical_data_spm_multimodal(subject_dir, 2403 _subject_data) 2404 if not _subject_data: 2405 return None 2406 2407 return Bunch(**_subject_data) 2408 2409 2410def _download_data_spm_multimodal(data_dir, subject_dir, subject_id): 2411 print('Data absent, downloading...') 2412 urls = [ 2413 # fmri 2414 ('http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/' 2415 'multimodal_fmri.zip'), 2416 # structural 2417 ('http://www.fil.ion.ucl.ac.uk/spm/download/data/mmfaces/' 2418 'multimodal_smri.zip') 2419 ] 2420 2421 for url in urls: 2422 archive_path = os.path.join(subject_dir, os.path.basename(url)) 2423 _fetch_file(url, subject_dir) 2424 try: 2425 _uncompress_file(archive_path) 2426 except: # noqa: E722 2427 print('Archive corrupted, trying to download it again.') 2428 return fetch_spm_multimodal_fmri(data_dir=data_dir, 2429 data_name='', 2430 subject_id=subject_id) 2431 2432 return _glob_spm_multimodal_fmri_data(subject_dir) 2433 2434 2435def _make_events_filepath_spm_multimodal_fmri(_subject_data, session): 2436 key = 'trials_ses{}'.format(session) 2437 events_file_location = os.path.dirname(_subject_data[key]) 2438 events_filename = 'session{}_events.tsv'.format(session) 2439 events_filepath = os.path.join(events_file_location, events_filename) 2440 return events_filepath 2441 2442 2443def _make_events_file_spm_multimodal_fmri(_subject_data, session): 2444 tr = 2. 2445 timing = loadmat(_subject_data['trials_ses%i' % (session)], 2446 squeeze_me=True, struct_as_record=False) 2447 faces_onsets = timing['onsets'][0].ravel() 2448 scrambled_onsets = timing['onsets'][1].ravel() 2449 onsets = np.hstack((faces_onsets, scrambled_onsets)) 2450 onsets *= tr # because onsets were reporting in 'scans' units 2451 conditions = ( 2452 ['faces'] * len(faces_onsets) + ['scrambled'] * len(scrambled_onsets) 2453 ) 2454 duration = np.ones_like(onsets) 2455 events = pd.DataFrame({'trial_type': conditions, 'onset': onsets, 2456 'duration': duration}) 2457 return events 2458 2459 2460@fill_doc 2461def fetch_spm_multimodal_fmri(data_dir=None, data_name='spm_multimodal_fmri', 2462 subject_id='sub001', verbose=1): 2463 """Fetcher for Multi-modal Face Dataset. 2464 2465 See :footcite:`spm_multiface`. 2466 2467 Parameters 2468 ---------- 2469 %(data_dir)s 2470 data_name : string, optional 2471 Name of the dataset. Default='spm_multimodal_fmri'. 2472 2473 subject_id : string, optional 2474 Indicates which subject to retrieve. Default='sub001'. 2475 %(verbose)s 2476 2477 Returns 2478 ------- 2479 data : sklearn.datasets.base.Bunch 2480 Dictionary-like object, the interest attributes are: 2481 - 'func1': string list. Paths to functional images for session 1 2482 - 'func2': string list. Paths to functional images for session 2 2483 - 'trials_ses1': string list. Path to onsets file for session 1 2484 - 'trials_ses2': string list. Path to onsets file for session 2 2485 - 'anat': string. Path to anat file 2486 2487 References 2488 ---------- 2489 .. footbibliography:: 2490 2491 """ 2492 data_dir = _get_dataset_dir(data_name, data_dir=data_dir, verbose=verbose) 2493 subject_dir = os.path.join(data_dir, subject_id) 2494 2495 # maybe data_dir already contains the data ? 2496 data = _glob_spm_multimodal_fmri_data(subject_dir) 2497 if data is not None: 2498 return data 2499 2500 # No. Download the data 2501 return _download_data_spm_multimodal(data_dir, subject_dir, subject_id) 2502 2503 2504@fill_doc 2505def fetch_fiac_first_level(data_dir=None, verbose=1): 2506 """Download a first-level fiac fMRI dataset (2 sessions) 2507 2508 Parameters 2509 ---------- 2510 %(data_dir)s 2511 %(verbose)s 2512 2513 """ 2514 data_dir = _get_dataset_dir('fiac_nilearn.glm', data_dir=data_dir, 2515 verbose=verbose) 2516 2517 def _glob_fiac_data(): 2518 """glob data from subject_dir.""" 2519 _subject_data = {} 2520 subject_dir = os.path.join(data_dir, 'nipy-data-0.2/data/fiac/fiac0') 2521 for session in [1, 2]: 2522 # glob func data for session 2523 session_func = os.path.join(subject_dir, 'run%i.nii.gz' % session) 2524 if not os.path.isfile(session_func): 2525 print('Missing functional scan for session %i.' % session) 2526 return None 2527 2528 _subject_data['func%i' % session] = session_func 2529 2530 # glob design matrix .npz file 2531 sess_dmtx = os.path.join(subject_dir, 2532 'run%i_design.npz' % session) 2533 if not os.path.isfile(sess_dmtx): 2534 print('Missing session file: %s' % sess_dmtx) 2535 return None 2536 2537 _subject_data['design_matrix%i' % session] = sess_dmtx 2538 2539 # glob for mask data 2540 mask = os.path.join(subject_dir, 'mask.nii.gz') 2541 if not os.path.isfile(mask): 2542 print('Missing mask image.') 2543 return None 2544 2545 _subject_data['mask'] = mask 2546 return Bunch(**_subject_data) 2547 2548 # maybe data_dir already contains the data ? 2549 data = _glob_fiac_data() 2550 if data is not None: 2551 return data 2552 2553 # No. Download the data 2554 print('Data absent, downloading...') 2555 url = 'http://nipy.sourceforge.net/data-packages/nipy-data-0.2.tar.gz' 2556 2557 archive_path = os.path.join(data_dir, os.path.basename(url)) 2558 _fetch_file(url, data_dir) 2559 try: 2560 _uncompress_file(archive_path) 2561 except: # noqa: E722 2562 print('Archive corrupted, trying to download it again.') 2563 return fetch_fiac_first_level(data_dir=data_dir) 2564 2565 return _glob_fiac_data() 2566