1""" 2Test the datasets module 3""" 4# Author: Alexandre Abraham 5# License: simplified BSD 6 7import os 8import uuid 9from pathlib import Path 10import re 11import gzip 12from collections import OrderedDict 13 14import numpy as np 15import json 16import nibabel 17 18import pandas as pd 19import pytest 20from nibabel.tmpdirs import TemporaryDirectory 21from sklearn.utils import check_random_state 22 23from nilearn.datasets import func 24from nilearn.datasets._testing import list_to_archive, dict_to_archive 25from nilearn.datasets.utils import _get_dataset_dir 26from nilearn._utils.testing import check_deprecation 27 28 29def _load_localizer_index(): 30 data_dir = Path(__file__).parent / "data" 31 with (data_dir / "localizer_index.json").open() as of: 32 localizer_template = json.load(of) 33 localizer_index = {} 34 for idx in range(1, 95): 35 sid = 'S{:02}'.format(idx) 36 localizer_index.update(dict( 37 (key.format(sid), uuid.uuid4().hex) 38 for key in localizer_template)) 39 localizer_index['/localizer/phenotype/behavioural.tsv'] = uuid.uuid4().hex 40 localizer_index['/localizer/participants.tsv'] = uuid.uuid4().hex 41 tsv_files = {} 42 tsv_files['/localizer/phenotype/behavioural.tsv'] = pd.read_csv( 43 str(data_dir / 'localizer_behavioural.tsv'), sep='\t') 44 tsv_files['/localizer/participants.tsv'] = pd.read_csv( 45 str(data_dir / 'localizer_participants.tsv'), sep='\t') 46 return localizer_index, tsv_files 47 48 49@pytest.fixture() 50def localizer_mocker(request_mocker): 51 """ Mocks the index for localizer dataset. 52 """ 53 index, tsv_files = _load_localizer_index() 54 request_mocker.url_mapping["https://osf.io/hwbm2/download"] = json.dumps( 55 index) 56 for k, v in tsv_files.items(): 57 request_mocker.url_mapping[ 58 "*{}?".format(index[k][1:])] = v.to_csv(index=False, sep="\t") 59 60 61def _make_haxby_subject_data(match, response): 62 sub_files = ['bold.nii.gz', 'labels.txt', 63 'mask4_vt.nii.gz', 'mask8b_face_vt.nii.gz', 64 'mask8b_house_vt.nii.gz', 'mask8_face_vt.nii.gz', 65 'mask8_house_vt.nii.gz', 'anat.nii.gz'] 66 return list_to_archive(Path(match.group(1), f) for f in sub_files) 67 68 69def test_fetch_haxby(tmp_path, request_mocker): 70 71 request_mocker.url_mapping[ 72 re.compile(r".*(subj\d).*\.tar\.gz")] = _make_haxby_subject_data 73 for i in range(1, 6): 74 haxby = func.fetch_haxby(data_dir=tmp_path, subjects=[i], 75 verbose=0) 76 # subject_data + (md5 + mask if first subj) 77 assert request_mocker.url_count == i + 2 78 assert len(haxby.func) == 1 79 assert len(haxby.anat) == 1 80 assert len(haxby.session_target) == 1 81 assert haxby.mask is not None 82 assert len(haxby.mask_vt) == 1 83 assert len(haxby.mask_face) == 1 84 assert len(haxby.mask_house) == 1 85 assert len(haxby.mask_face_little) == 1 86 assert len(haxby.mask_house_little) == 1 87 assert haxby.description != '' 88 89 # subjects with list 90 subjects = [1, 2, 6] 91 haxby = func.fetch_haxby(data_dir=tmp_path, subjects=subjects, 92 verbose=0) 93 assert len(haxby.func) == len(subjects) 94 assert len(haxby.mask_house_little) == len(subjects) 95 assert len(haxby.anat) == len(subjects) 96 assert haxby.anat[2] is None 97 assert isinstance(haxby.mask, str) 98 assert len(haxby.mask_face) == len(subjects) 99 assert len(haxby.session_target) == len(subjects) 100 assert len(haxby.mask_vt) == len(subjects) 101 assert len(haxby.mask_face_little) == len(subjects) 102 103 subjects = ['a', 8] 104 message = "You provided invalid subject id {0} in a list" 105 106 for sub_id in subjects: 107 with pytest.raises(ValueError, match=message.format(sub_id)): 108 func.fetch_haxby(data_dir=tmp_path, subjects=[sub_id]) 109 110 111def _adhd_example_subject(match, request): 112 contents = [ 113 Path("data", match.group(1), match.expand(r"\1_regressors.csv")), 114 Path("data", match.group(1), 115 match.expand(r"\1_rest_tshift_RPI_voreg_mni.nii.gz")) 116 ] 117 return list_to_archive(contents) 118 119 120def _adhd_metadata(): 121 sub1 = [3902469, 7774305, 3699991] 122 sub2 = [2014113, 4275075, 1019436, 123 3154996, 3884955, 27034, 124 4134561, 27018, 6115230, 125 27037, 8409791, 27011] 126 sub3 = [3007585, 8697774, 9750701, 127 10064, 21019, 10042, 128 10128, 2497695, 4164316, 129 1552181, 4046678, 23012] 130 sub4 = [1679142, 1206380, 23008, 131 4016887, 1418396, 2950754, 132 3994098, 3520880, 1517058, 133 9744150, 1562298, 3205761, 3624598] 134 subs = pd.DataFrame({"Subject": sub1 + sub2 + sub3 + sub4}) 135 return dict_to_archive( 136 {"ADHD200_40subs_motion_parameters_and_phenotypics.csv": 137 subs.to_csv(index=False)}) 138 139 140def test_fetch_adhd(tmp_path, request_mocker): 141 request_mocker.url_mapping["*metadata.tgz"] = _adhd_metadata() 142 request_mocker.url_mapping[ 143 re.compile(r".*adhd40_([0-9]+)\.tgz")] = _adhd_example_subject 144 adhd = func.fetch_adhd(data_dir=tmp_path, n_subjects=12, verbose=0) 145 assert len(adhd.func) == 12 146 assert len(adhd.confounds) == 12 147 assert request_mocker.url_count == 13 # Subjects + phenotypic 148 assert adhd.description != '' 149 150 151def test_miyawaki2008(tmp_path, request_mocker): 152 dataset = func.fetch_miyawaki2008(data_dir=tmp_path, verbose=0) 153 assert len(dataset.func) == 32 154 assert len(dataset.label) == 32 155 assert isinstance(dataset.mask, str) 156 assert len(dataset.mask_roi) == 38 157 assert isinstance(dataset.background, str) 158 assert request_mocker.url_count == 1 159 assert dataset.description != '' 160 161 162def test_fetch_localizer_contrasts(tmp_path, request_mocker, localizer_mocker): 163 # 2 subjects 164 dataset = func.fetch_localizer_contrasts( 165 ['checkerboard'], 166 n_subjects=2, 167 data_dir=tmp_path, 168 verbose=1) 169 assert not hasattr(dataset, 'anats') 170 assert not hasattr(dataset, 'tmaps') 171 assert not hasattr(dataset, 'masks') 172 assert isinstance(dataset.cmaps[0], str) 173 assert isinstance(dataset.ext_vars, np.recarray) 174 assert len(dataset.cmaps) == 2 175 assert dataset.ext_vars.size == 2 176 177 # Multiple contrasts 178 dataset = func.fetch_localizer_contrasts( 179 ['checkerboard', 'horizontal checkerboard'], 180 n_subjects=2, 181 data_dir=tmp_path, 182 verbose=1) 183 assert isinstance(dataset.ext_vars, np.recarray) 184 assert isinstance(dataset.cmaps[0], str) 185 assert len(dataset.cmaps) == 2 * 2 # two contrasts are fetched 186 assert dataset.ext_vars.size == 2 187 188 # all get_*=True 189 dataset = func.fetch_localizer_contrasts( 190 ['checkerboard'], 191 n_subjects=1, 192 data_dir=tmp_path, 193 get_anats=True, 194 get_masks=True, 195 get_tmaps=True, 196 verbose=1) 197 assert isinstance(dataset.ext_vars, np.recarray) 198 assert isinstance(dataset.anats[0], str) 199 assert isinstance(dataset.cmaps[0], str) 200 assert isinstance(dataset.masks[0], str) 201 assert isinstance(dataset.tmaps[0], str) 202 assert dataset.ext_vars.size == 1 203 assert len(dataset.anats) == 1 204 assert len(dataset.cmaps) == 1 205 assert len(dataset.masks) == 1 206 assert len(dataset.tmaps) == 1 207 assert dataset.description != '' 208 209 # grab a given list of subjects 210 dataset2 = func.fetch_localizer_contrasts( 211 ['checkerboard'], 212 n_subjects=[2, 3, 5], 213 data_dir=tmp_path, 214 verbose=1) 215 assert dataset2.ext_vars.size == 3 216 assert len(dataset2.cmaps) == 3 217 assert ([row[0] for row in dataset2.ext_vars] == 218 [b'S02', b'S03', b'S05']) 219 220 221def test_fetch_localizer_calculation_task(tmp_path, request_mocker, 222 localizer_mocker): 223 # 2 subjects 224 dataset = func.fetch_localizer_calculation_task( 225 n_subjects=2, 226 data_dir=tmp_path, 227 verbose=1) 228 assert isinstance(dataset.ext_vars, np.recarray) 229 assert isinstance(dataset.cmaps[0], str) 230 assert dataset.ext_vars.size == 2 231 assert len(dataset.cmaps) == 2 232 assert dataset.description != '' 233 234 235def test_fetch_localizer_button_task(tmp_path, request_mocker, 236 localizer_mocker): 237 # Disabled: cannot be tested without actually fetching covariates CSV file 238 # Only one subject 239 dataset = func.fetch_localizer_button_task(data_dir=tmp_path, 240 verbose=1) 241 242 assert isinstance(dataset.tmaps, list) 243 assert isinstance(dataset.anats, list) 244 245 assert len(dataset.tmaps) == 1 246 assert len(dataset.anats) == 1 247 248 assert isinstance(dataset.tmap, str) 249 assert isinstance(dataset.anat, str) 250 251 assert dataset.description != '' 252 253 254def test_fetch_abide_pcp(tmp_path, request_mocker): 255 ids = list(range(800)) 256 filenames = ['no_filename'] * 800 257 filenames[::2] = ['filename'] * 400 258 pheno = pd.DataFrame({"subject_id": ids, "FILE_ID": filenames}, 259 columns=["subject_id", "FILE_ID"]) 260 request_mocker.url_mapping["*rocessed1.csv"] = pheno.to_csv(index=False) 261 262 # All subjects 263 dataset = func.fetch_abide_pcp(data_dir=tmp_path, 264 quality_checked=False, verbose=0) 265 assert len(dataset.func_preproc) == 400 266 assert dataset.description != '' 267 268 # Smoke test using only a string, rather than a list of strings 269 dataset = func.fetch_abide_pcp(data_dir=tmp_path, 270 quality_checked=False, verbose=0, 271 derivatives='func_preproc') 272 273 274def test__load_mixed_gambles(request_mocker): 275 rng = check_random_state(42) 276 n_trials = 48 277 affine = np.eye(4) 278 for n_subjects in [1, 5, 16]: 279 zmaps = [] 280 for _ in range(n_subjects): 281 zmaps.append(nibabel.Nifti1Image(rng.randn(3, 4, 5, n_trials), 282 affine)) 283 zmaps, gain, _ = func._load_mixed_gambles(zmaps) 284 assert len(zmaps) == n_subjects * n_trials 285 assert len(zmaps) == len(gain) 286 287 288def test_fetch_mixed_gambles(tmp_path, request_mocker): 289 for n_subjects in [1, 5, 16]: 290 mgambles = func.fetch_mixed_gambles(n_subjects=n_subjects, 291 data_dir=tmp_path, 292 verbose=0, return_raw_data=True) 293 datasetdir = tmp_path / "jimura_poldrack_2012_zmaps" 294 assert mgambles["zmaps"][0] == str( 295 datasetdir / "zmaps" / "sub001_zmaps.nii.gz") 296 assert len(mgambles["zmaps"]) == n_subjects 297 298 299def test_check_parameters_megatrawls_datasets(request_mocker): 300 # testing whether the function raises the same error message 301 # if invalid input parameters are provided 302 message = "Invalid {0} input is provided: {1}." 303 304 for invalid_input_dim in [1, 5, 30]: 305 with pytest.raises( 306 ValueError, 307 match=message.format('dimensionality', invalid_input_dim)): 308 func.fetch_megatrawls_netmats(dimensionality=invalid_input_dim) 309 310 for invalid_input_timeserie in ['asdf', 'time', 'st2']: 311 with pytest.raises( 312 ValueError, 313 match=message.format('timeseries', invalid_input_timeserie)): 314 func.fetch_megatrawls_netmats(timeseries=invalid_input_timeserie) 315 316 for invalid_output_name in ['net1', 'net2']: 317 with pytest.raises( 318 ValueError, 319 match=message.format('matrices', invalid_output_name)): 320 func.fetch_megatrawls_netmats(matrices=invalid_output_name) 321 322 323def test_fetch_megatrawls_netmats(tmp_path, request_mocker): 324 # smoke test to see that files are fetched and read properly 325 # since we are loading data present in it 326 files_dir = str(tmp_path / 'Megatrawls' 327 / '3T_Q1-Q6related468_MSMsulc_d100_ts3') 328 os.makedirs(files_dir) 329 with open(os.path.join(files_dir, 'Znet2.txt'), 'w') as net_file: 330 net_file.write("1") 331 332 files_dir2 = str(tmp_path / 'Megatrawls' 333 / '3T_Q1-Q6related468_MSMsulc_d300_ts2') 334 os.makedirs(files_dir2) 335 with open(os.path.join(files_dir2, 'Znet1.txt'), 'w') as net_file2: 336 net_file2.write("1") 337 338 megatrawl_netmats_data = func.fetch_megatrawls_netmats( 339 data_dir=tmp_path) 340 341 # expected number of returns in output name should be equal 342 assert len(megatrawl_netmats_data) == 5 343 # check if returned bunch should not be empty 344 # dimensions 345 assert megatrawl_netmats_data.dimensions != '' 346 # timeseries 347 assert megatrawl_netmats_data.timeseries != '' 348 # matrices 349 assert megatrawl_netmats_data.matrices != '' 350 # correlation matrices 351 assert megatrawl_netmats_data.correlation_matrices != '' 352 # description 353 assert megatrawl_netmats_data.description != '' 354 355 # check if input provided for dimensions, timeseries, matrices to be same 356 # to user settings 357 netmats_data = func.fetch_megatrawls_netmats(data_dir=tmp_path, 358 dimensionality=300, 359 timeseries='multiple_spatial_regression', 360 matrices='full_correlation') 361 assert netmats_data.dimensions == 300 362 assert netmats_data.timeseries == 'multiple_spatial_regression' 363 assert netmats_data.matrices == 'full_correlation' 364 365 366def _cobre_metadata(): 367 ids_n = [40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 368 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, 369 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, 370 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, 371 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, 372 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, 373 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, 374 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40071, 40072, 375 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, 40081, 376 40082, 40084, 40085, 40086, 40087, 40088, 40089, 40090, 40091, 377 40092, 40093, 40094, 40095, 40096, 40097, 40098, 40099, 40100, 378 40101, 40102, 40103, 40104, 40105, 40106, 40107, 40108, 40109, 379 40110, 40111, 40112, 40113, 40114, 40115, 40116, 40117, 40118, 380 40119, 40120, 40121, 40122, 40123, 40124, 40125, 40126, 40127, 381 40128, 40129, 40130, 40131, 40132, 40133, 40134, 40135, 40136, 382 40137, 40138, 40139, 40140, 40141, 40142, 40143, 40144, 40145, 383 40146, 40147] 384 dummy_data = [] 385 386 for i in np.hstack(ids_n): 387 # Func file 388 f = 'fmri_00' + str(i) + '.nii.gz' 389 m = 'fmri_00' + str(i) + '.tsv.gz' 390 dummy_data.append( 391 {'download_url': 'https://cobre/{}'.format(f), 'name': f}) 392 dummy_data.append( 393 {'download_url': 'https://cobre/{}'.format(m), 'name': m}) 394 395 # Add the TSV file 396 dummy_data.append({'download_url': 'https://cobre/phenotypic_data.tsv.gz', 397 'name': 'phenotypic_data.tsv.gz'}) 398 # Add JSON files 399 dummy_data.append({'download_url': 'https://cobre/keys_confounds.json', 400 'name': 'keys_confounds.json'}) 401 dummy_data.append({ 402 'download_url': 'https://cobre/keys_phenotypic_data.json', 403 'name': 'keys_phenotypic_data.json'}) 404 dummy_data = {'files': dummy_data} 405 return json.dumps(dummy_data), np.asarray(ids_n, dtype='|U17') 406 407 408def _cobre_data(ids): 409 current_age = np.ones(len(ids), dtype='<f8') 410 gender = np.ones(len(ids), dtype='<f8') 411 handedness = np.ones(len(ids), dtype='<f8') 412 413 subject_type = ["Control"] * 74 + ["Patient"] * (146 - 74) 414 diagnosis = np.ones(len(ids), dtype='<f8') 415 frames_ok = np.ones(len(ids), dtype='<f8') 416 fd = np.ones(len(ids), dtype='<f8') 417 fd_scrubbed = np.ones(len(ids), dtype='<f8') 418 csv = pd.DataFrame( 419 OrderedDict( 420 [ 421 ("ID", ids), 422 ("Current Age", current_age), 423 ("Gender", gender), 424 ("Handedness", handedness), 425 ("Subject Type", subject_type), 426 ("Diagnosis", diagnosis), 427 ("Frames OK", frames_ok), 428 ("FD", fd), 429 ("FD Scrubbed", fd_scrubbed), 430 ] 431 ) 432 ) 433 return gzip.compress(csv.to_csv(index=False, sep="\t").encode("utf-8")) 434 435 436def test_fetch_cobre(tmp_path, request_mocker): 437 metadata, ids = _cobre_metadata() 438 request_mocker.url_mapping["*phenotypic_data.tsv.gz"] = _cobre_data(ids) 439 request_mocker.url_mapping["*articles/4197885"] = metadata 440 441 # All subjects 442 cobre_data = check_deprecation( 443 func.fetch_cobre, "'fetch_cobre' has been deprecated")( 444 n_subjects=None, data_dir=tmp_path) 445 446 phenotypic_names = ['func', 'confounds', 'phenotypic', 'description', 447 'desc_con', 'desc_phenotypic'] 448 449 # test length of functional filenames to max 146 450 assert len(cobre_data.func) == 146 451 # test length of corresponding confounds files of same length to max 146 452 assert len(cobre_data.confounds) == 146 453 # test return type variables 454 assert sorted(cobre_data) == sorted(phenotypic_names) 455 # test functional filenames in a list 456 assert isinstance(cobre_data.func, list) 457 # test confounds files in a list 458 assert isinstance(cobre_data.confounds, list) 459 assert isinstance(cobre_data.func[0], str) 460 # returned phenotypic data will be an array 461 assert isinstance(cobre_data.phenotypic, np.recarray) 462 463 # Fetch only 30 subjects 464 data_30_subjects = func.fetch_cobre(n_subjects=30, 465 data_dir=tmp_path) 466 assert len(data_30_subjects.func) == 30 467 assert len(data_30_subjects.confounds) == 30 468 469 # Test more than maximum subjects 470 test_150_subjects = func.fetch_cobre(n_subjects=150, 471 data_dir=tmp_path) 472 assert len(test_150_subjects.func) == 146 473 474 475def test_fetch_surf_nki_enhanced(tmp_path, request_mocker, verbose=0): 476 477 ids = np.asarray(['A00028185', 'A00035827', 'A00037511', 'A00039431', 478 'A00033747', 'A00035840', 'A00038998', 'A00035072', 479 'A00037112', 'A00039391'], dtype='U9') 480 age = np.ones(len(ids), dtype='<f8') 481 hand = np.asarray(len(ids) * ['x'], dtype='U1') 482 sex = np.asarray(len(ids) * ['x'], dtype='U1') 483 pheno_data = pd.DataFrame( 484 OrderedDict([("id", ids), ("age", age), ("hand", hand), ("sex", sex)]) 485 ) 486 request_mocker.url_mapping[ 487 "*pheno_nki_nilearn.csv"] = pheno_data.to_csv(index=False) 488 nki_data = func.fetch_surf_nki_enhanced(data_dir=tmp_path) 489 490 assert nki_data.description != '' 491 assert len(nki_data.func_left) == 10 492 assert len(nki_data.func_right) == 10 493 assert isinstance(nki_data.phenotypic, np.ndarray) 494 assert nki_data.phenotypic.shape == (10,) 495 assert nki_data.description != '' 496 497 498def _mock_participants_data(n_ids=5): 499 """Maximum 8 ids are allowed to mock 500 """ 501 ids = ['sub-pixar052', 'sub-pixar073', 'sub-pixar074', 'sub-pixar110', 502 'sub-pixar042', 'sub-pixar109', 'sub-pixar068', 'sub-pixar007' 503 ][:n_ids] 504 age = np.ones(len(ids)) 505 age_group = len(ids) * ['2yo'] 506 child_adult = [["child", "adult"][i % 2] for i in range(n_ids)] 507 gender = len(ids) * ['m'] 508 handedness = len(ids) * ['r'] 509 participants = pd.DataFrame(OrderedDict([ 510 ("participant_id", ids), ("Age", age), ("AgeGroup", age_group), 511 ("Child_Adult", child_adult), ("Gender", gender), 512 ("Handedness", handedness) 513 ])) 514 return participants 515 516 517def _mock_development_confounds(): 518 keep_confounds = ['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y', 519 'rot_z', 'framewise_displacement', 'a_comp_cor_00', 520 'a_comp_cor_01', 'a_comp_cor_02', 'a_comp_cor_03', 521 'a_comp_cor_04', 'a_comp_cor_05', 'csf', 522 'white_matter'] 523 other_confounds = ["some_confound"] * 13 524 confounds = keep_confounds + other_confounds 525 return pd.DataFrame(np.ones((10, len(confounds))), columns=confounds) 526 527 528def test_fetch_development_fmri_participants(tmp_path, request_mocker): 529 mock_participants = _mock_participants_data() 530 request_mocker.url_mapping[ 531 "https://osf.io/yr3av/download"] = mock_participants.to_csv( 532 index=False, sep="\t") 533 participants = func._fetch_development_fmri_participants( 534 data_dir=tmp_path, url=None, verbose=1) 535 assert isinstance(participants, np.ndarray) 536 assert participants.shape == (5,) 537 538 539def test_fetch_development_fmri_functional(tmp_path, request_mocker): 540 mock_participants = _mock_participants_data(n_ids=8) 541 funcs, confounds = func._fetch_development_fmri_functional( 542 mock_participants, data_dir=tmp_path, 543 url=None, resume=True, verbose=1) 544 assert len(funcs) == 8 545 assert len(confounds) == 8 546 547 548def test_fetch_development_fmri(tmp_path, request_mocker): 549 mock_participants = _mock_participants_data() 550 request_mocker.url_mapping["*"] = _mock_development_confounds().to_csv( 551 index=False, sep="\t") 552 request_mocker.url_mapping[ 553 "https://osf.io/yr3av/download"] = mock_participants.to_csv( 554 index=False, sep="\t") 555 556 data = func.fetch_development_fmri(n_subjects=2, 557 data_dir=tmp_path, verbose=1) 558 assert len(data.func) == 2 559 assert len(data.confounds) == 2 560 assert isinstance(data.phenotypic, np.ndarray) 561 assert data.phenotypic.shape == (2,) 562 assert data.description != '' 563 564 # check reduced confounds 565 confounds = np.recfromcsv(data.confounds[0], delimiter='\t') 566 assert len(confounds[0]) == 15 567 568 # check full confounds 569 data = func.fetch_development_fmri(n_subjects=2, reduce_confounds=False, 570 verbose=1) 571 confounds = np.recfromcsv(data.confounds[0], delimiter='\t') 572 assert len(confounds[0]) == 28 573 574 # check first subject is an adult 575 data = func.fetch_development_fmri(n_subjects=1, reduce_confounds=False, 576 verbose=1) 577 age_group = data.phenotypic['Child_Adult'][0] 578 assert age_group == 'adult' 579 580 # check first subject is an child if requested with age_group 581 data = func.fetch_development_fmri(n_subjects=1, reduce_confounds=False, 582 verbose=1, age_group='child') 583 age_group = data.phenotypic['Child_Adult'][0] 584 assert age_group == 'child' 585 586 # check one of each age group returned if n_subject == 2 587 # and age_group == 'both 588 data = func.fetch_development_fmri(n_subjects=2, reduce_confounds=False, 589 verbose=1, age_group='both') 590 age_group = data.phenotypic['Child_Adult'] 591 assert(all(age_group == ['adult', 'child'])) 592 593 # check age_group 594 data = func.fetch_development_fmri(n_subjects=2, reduce_confounds=False, 595 verbose=1, age_group='child') 596 assert(all([x == 'child' for x in data.phenotypic['Child_Adult']])) 597 598 599def test_fetch_development_fmri_invalid_n_subjects(request_mocker): 600 max_subjects = 155 601 n_subjects = func._set_invalid_n_subjects_to_max(n_subjects=None, 602 max_subjects=max_subjects, 603 age_group='adult') 604 assert n_subjects == max_subjects 605 with pytest.warns(UserWarning, match='Wrong value for n_subjects='): 606 func._set_invalid_n_subjects_to_max(n_subjects=-1, 607 max_subjects=max_subjects, 608 age_group='adult') 609 610 611def test_fetch_development_fmri_exception(request_mocker): 612 with pytest.raises(ValueError, match='Wrong value for age_group'): 613 func._filter_func_regressors_by_participants(participants='junk', 614 age_group='junk for test') 615 616 617# datasets tests originally belonging to nistats follow 618 619currdir = os.path.dirname(os.path.abspath(__file__)) 620datadir = os.path.join(currdir, 'data') 621 622 623def test_fetch_bids_langloc_dataset(request_mocker, tmp_path): 624 data_dir = str(tmp_path / 'bids_langloc_example') 625 os.mkdir(data_dir) 626 main_folder = os.path.join(data_dir, 'bids_langloc_dataset') 627 os.mkdir(main_folder) 628 629 datadir, dl_files = func.fetch_bids_langloc_dataset(tmp_path) 630 631 assert isinstance(datadir, str) 632 assert isinstance(dl_files, list) 633 634 635def test_select_from_index(request_mocker): 636 dataset_version = 'ds000030_R1.0.4' 637 data_prefix = '{}/{}/uncompressed'.format( 638 dataset_version.split('_')[0], dataset_version) 639 # Prepare url files for subject and filter tests 640 urls = [data_prefix + '/stuff.html', 641 data_prefix + '/sub-xxx.html', 642 data_prefix + '/sub-yyy.html', 643 data_prefix + '/sub-xxx/ses-01_task-rest.txt', 644 data_prefix + '/sub-xxx/ses-01_task-other.txt', 645 data_prefix + '/sub-xxx/ses-02_task-rest.txt', 646 data_prefix + '/sub-xxx/ses-02_task-other.txt', 647 data_prefix + '/sub-yyy/ses-01.txt', 648 data_prefix + '/sub-yyy/ses-02.txt'] 649 650 # Only 1 subject and not subject specific files get downloaded 651 new_urls = func.select_from_index(urls, n_subjects=1) 652 assert len(new_urls) == 6 653 assert data_prefix + '/sub-yyy.html' not in new_urls 654 655 # 2 subjects and not subject specific files get downloaded 656 new_urls = func.select_from_index(urls, n_subjects=2) 657 assert len(new_urls) == 9 658 assert data_prefix + '/sub-yyy.html' in new_urls 659 # ALL subjects and not subject specific files get downloaded 660 new_urls = func.select_from_index(urls, n_subjects=None) 661 assert len(new_urls) == 9 662 663 # test inclusive filters. Only files with task-rest 664 new_urls = func.select_from_index( 665 urls, inclusion_filters=['*task-rest*']) 666 assert len(new_urls) == 2 667 assert data_prefix + '/stuff.html' not in new_urls 668 669 # test exclusive filters. only files without ses-01 670 new_urls = func.select_from_index( 671 urls, exclusion_filters=['*ses-01*']) 672 assert len(new_urls) == 6 673 assert data_prefix + '/stuff.html' in new_urls 674 675 # test filter combination. only files with task-rest and without ses-01 676 new_urls = func.select_from_index( 677 urls, inclusion_filters=['*task-rest*'], 678 exclusion_filters=['*ses-01*']) 679 assert len(new_urls) == 1 680 assert data_prefix + '/sub-xxx/ses-02_task-rest.txt' in new_urls 681 682 683def test_fetch_openneuro_dataset_index(request_mocker): 684 with TemporaryDirectory() as tmpdir: 685 dataset_version = 'ds000030_R1.0.4' 686 subdir_names = ['ds000030', 'ds000030_R1.0.4', 'uncompressed'] 687 tmp_list = [] 688 for subdir in subdir_names: 689 tmp_list.append(subdir) 690 subdirpath = os.path.join(tmpdir, *tmp_list) 691 os.mkdir(subdirpath) 692 693 filepath = os.path.join(subdirpath, 'urls.json') 694 mock_json_content = ['junk1', 'junk2'] 695 with open(filepath, 'w') as f: 696 json.dump(mock_json_content, f) 697 urls_path, urls = func.fetch_openneuro_dataset_index( 698 data_dir=tmpdir, 699 dataset_version=dataset_version, 700 verbose=1, 701 ) 702 urls_path = urls_path.replace('/', os.sep) 703 assert urls_path == filepath 704 assert urls == mock_json_content 705 706 707def test_fetch_openneuro_dataset(request_mocker, tmp_path): 708 dataset_version = 'ds000030_R1.0.4' 709 data_prefix = '{}/{}/uncompressed'.format( 710 dataset_version.split('_')[0], dataset_version) 711 data_dir = _get_dataset_dir(data_prefix, data_dir=tmp_path, 712 verbose=1) 713 url_file = os.path.join(data_dir, 'urls.json') 714 # Prepare url files for subject and filter tests 715 urls = [ 716 "https://example.com/{}/stuff.html" + data_prefix + '', 717 "https://example.com/{}/sub-xxx.html", 718 "https://example.com/{}/sub-yyy.html", 719 "https://example.com/{}/sub-xxx/ses-01_task-rest.txt", 720 "https://example.com/{}/sub-xxx/ses-01_task-other.txt", 721 "https://example.com/{}/sub-xxx/ses-02_task-rest.txt", 722 "https://example.com/{}/sub-xxx/ses-02_task-other.txt", 723 "https://example.com/{}/sub-yyy/ses-01.txt", 724 "https://example.com/{}/sub-yyy/ses-02.txt"] 725 urls = [url.format(data_prefix) for url in urls] 726 json.dump(urls, open(url_file, 'w')) 727 728 # Only 1 subject and not subject specific files get downloaded 729 datadir, dl_files = func.fetch_openneuro_dataset( 730 urls, tmp_path, dataset_version) 731 assert isinstance(datadir, str) 732 assert isinstance(dl_files, list) 733 assert len(dl_files) == 9 734 735 736def test_fetch_localizer(request_mocker, tmp_path): 737 dataset = func.fetch_localizer_first_level(data_dir=tmp_path) 738 assert isinstance(dataset['events'], str) 739 assert isinstance(dataset.epi_img, str) 740 741 742def _mock_original_spm_auditory_events_file(): 743 expected_events_data = { 744 'onset': [factor * 42.0 for factor in range(0, 16)], 745 'duration': [42.0] * 16, 746 'trial_type': ['rest', 'active'] * 8, 747 } 748 expected_events_data = pd.DataFrame(expected_events_data) 749 expected_events_data_string = expected_events_data.to_csv( 750 sep='\t', 751 index=0, 752 columns=['onset', 'duration', 'trial_type'], 753 ) 754 return expected_events_data_string 755 756 757def _mock_bids_compliant_spm_auditory_events_file(): 758 events_filepath = os.path.join(os.getcwd(), 'tests_events.tsv') 759 func._make_events_file_spm_auditory_data( 760 events_filepath=events_filepath) 761 with open(events_filepath, 'r') as actual_events_file_obj: 762 actual_events_data_string = actual_events_file_obj.read() 763 return actual_events_data_string, events_filepath 764 765 766def test_fetch_language_localizer_demo_dataset(request_mocker, tmp_path): 767 data_dir = tmp_path 768 expected_data_dir = tmp_path / 'fMRI-language-localizer-demo-dataset' 769 contents_dir = Path( 770 __file__).parent / "data" / "archive_contents" 771 contents_list_file = contents_dir / "language_localizer.txt" 772 with contents_list_file.open() as f: 773 expected_files = [str(expected_data_dir / file_path.strip()) for 774 file_path in f.readlines()[1:]] 775 actual_dir, actual_subdirs = func.fetch_language_localizer_demo_dataset( 776 data_dir) 777 assert actual_dir == str(expected_data_dir) 778 assert actual_subdirs == sorted(expected_files) 779 780 781def test_make_spm_auditory_events_file(request_mocker): 782 try: 783 ( 784 actual_events_data_string, 785 events_filepath, 786 ) = _mock_bids_compliant_spm_auditory_events_file() 787 finally: 788 os.remove(events_filepath) 789 expected_events_data_string = _mock_original_spm_auditory_events_file() 790 791 replace_win_line_ends = ( 792 lambda text: text.replace('\r\n', '\n') 793 if text.find('\r\n') != -1 else text 794 ) 795 actual_events_data_string = replace_win_line_ends( 796 actual_events_data_string) 797 expected_events_data_string = replace_win_line_ends( 798 expected_events_data_string) 799 800 assert actual_events_data_string == expected_events_data_string 801 802 803def test_fetch_spm_auditory(request_mocker, tmp_path): 804 import nibabel as nib 805 import shutil 806 saf = ["fM00223/fM00223_%03i.img" % index for index in range(4, 100)] 807 saf_ = ["fM00223/fM00223_%03i.hdr" % index for index in range(4, 100)] 808 809 data_dir = str(tmp_path / 'spm_auditory') 810 os.mkdir(data_dir) 811 subject_dir = os.path.join(data_dir, 'sub001') 812 os.mkdir(subject_dir) 813 os.mkdir(os.path.join(subject_dir, 'fM00223')) 814 os.mkdir(os.path.join(subject_dir, 'sM00223')) 815 816 path_img = str(tmp_path / 'tmp.img') 817 path_hdr = str(tmp_path / 'tmp.hdr') 818 nib.save(nib.Nifti1Image(np.zeros((2, 3, 4)), np.eye(4)), path_img) 819 shutil.copy(path_img, os.path.join(subject_dir, 820 "sM00223/sM00223_002.img")) 821 shutil.copy(path_hdr, os.path.join(subject_dir, 822 "sM00223/sM00223_002.hdr")) 823 for file_ in saf: 824 shutil.copy(path_img, os.path.join(subject_dir, file_)) 825 for file_ in saf_: 826 shutil.copy(path_hdr, os.path.join(subject_dir, file_)) 827 828 dataset = func.fetch_spm_auditory(data_dir=tmp_path) 829 assert isinstance(dataset.anat, str) 830 assert isinstance(dataset.func[0], str) 831 assert len(dataset.func) == 96 832 833 834def test_fetch_spm_multimodal(request_mocker, tmp_path): 835 data_dir = str(tmp_path / 'spm_multimodal_fmri') 836 os.mkdir(data_dir) 837 subject_dir = os.path.join(data_dir, 'sub001') 838 os.mkdir(subject_dir) 839 os.mkdir(os.path.join(subject_dir, 'fMRI')) 840 os.mkdir(os.path.join(subject_dir, 'sMRI')) 841 open(os.path.join(subject_dir, 'sMRI', 'smri.img'), 'a').close() 842 for session in [0, 1]: 843 open(os.path.join(subject_dir, 'fMRI', 844 'trials_ses%i.mat' % (session + 1)), 'a').close() 845 dir_ = os.path.join(subject_dir, 'fMRI', 'Session%d' % (session + 1)) 846 os.mkdir(dir_) 847 for i in range(390): 848 open(os.path.join(dir_, 'fMETHODS-000%i-%i-01.img' % 849 (session + 5, i)), 'a').close() 850 851 dataset = func.fetch_spm_multimodal_fmri(data_dir=tmp_path) 852 assert isinstance(dataset.anat, str) 853 assert isinstance(dataset.func1[0], str) 854 assert len(dataset.func1) == 390 855 assert isinstance(dataset.func2[0], str) 856 assert len(dataset.func2) == 390 857 assert dataset.slice_order == 'descending' 858 assert isinstance(dataset.trials_ses1, str) 859 assert isinstance(dataset.trials_ses2, str) 860 861 862def test_fiac(request_mocker, tmp_path): 863 # Create dummy 'files' 864 fiac_dir = str(tmp_path / 'fiac_nilearn.glm' / 'nipy-data-0.2' / 865 'data' / 'fiac') 866 fiac0_dir = os.path.join(fiac_dir, 'fiac0') 867 os.makedirs(fiac0_dir) 868 for session in [1, 2]: 869 # glob func data for session session + 1 870 session_func = os.path.join(fiac0_dir, 'run%i.nii.gz' % session) 871 open(session_func, 'a').close() 872 sess_dmtx = os.path.join(fiac0_dir, 'run%i_design.npz' % session) 873 open(sess_dmtx, 'a').close() 874 mask = os.path.join(fiac0_dir, 'mask.nii.gz') 875 open(mask, 'a').close() 876 877 dataset = func.fetch_fiac_first_level(data_dir=tmp_path) 878 assert isinstance(dataset.func1, str) 879 assert isinstance(dataset.func2, str) 880 assert isinstance(dataset.design_matrix1, str) 881 assert isinstance(dataset.design_matrix2, str) 882 assert isinstance(dataset.mask, str) 883