1"""
2Test the datasets module
3"""
4# Author: Alexandre Abraham
5# License: simplified BSD
6
7import os
8import uuid
9from pathlib import Path
10import re
11import gzip
12from collections import OrderedDict
13
14import numpy as np
15import json
16import nibabel
17
18import pandas as pd
19import pytest
20from nibabel.tmpdirs import TemporaryDirectory
21from sklearn.utils import check_random_state
22
23from nilearn.datasets import func
24from nilearn.datasets._testing import list_to_archive, dict_to_archive
25from nilearn.datasets.utils import _get_dataset_dir
26from nilearn._utils.testing import check_deprecation
27
28
29def _load_localizer_index():
30    data_dir = Path(__file__).parent / "data"
31    with (data_dir / "localizer_index.json").open() as of:
32        localizer_template = json.load(of)
33    localizer_index = {}
34    for idx in range(1, 95):
35        sid = 'S{:02}'.format(idx)
36        localizer_index.update(dict(
37            (key.format(sid), uuid.uuid4().hex)
38            for key in localizer_template))
39    localizer_index['/localizer/phenotype/behavioural.tsv'] = uuid.uuid4().hex
40    localizer_index['/localizer/participants.tsv'] = uuid.uuid4().hex
41    tsv_files = {}
42    tsv_files['/localizer/phenotype/behavioural.tsv'] = pd.read_csv(
43        str(data_dir / 'localizer_behavioural.tsv'), sep='\t')
44    tsv_files['/localizer/participants.tsv'] = pd.read_csv(
45        str(data_dir / 'localizer_participants.tsv'), sep='\t')
46    return localizer_index, tsv_files
47
48
49@pytest.fixture()
50def localizer_mocker(request_mocker):
51    """ Mocks the index for localizer dataset.
52    """
53    index, tsv_files = _load_localizer_index()
54    request_mocker.url_mapping["https://osf.io/hwbm2/download"] = json.dumps(
55        index)
56    for k, v in tsv_files.items():
57        request_mocker.url_mapping[
58            "*{}?".format(index[k][1:])] = v.to_csv(index=False, sep="\t")
59
60
61def _make_haxby_subject_data(match, response):
62    sub_files = ['bold.nii.gz', 'labels.txt',
63                 'mask4_vt.nii.gz', 'mask8b_face_vt.nii.gz',
64                 'mask8b_house_vt.nii.gz', 'mask8_face_vt.nii.gz',
65                 'mask8_house_vt.nii.gz', 'anat.nii.gz']
66    return list_to_archive(Path(match.group(1), f) for f in sub_files)
67
68
69def test_fetch_haxby(tmp_path, request_mocker):
70
71    request_mocker.url_mapping[
72        re.compile(r".*(subj\d).*\.tar\.gz")] = _make_haxby_subject_data
73    for i in range(1, 6):
74        haxby = func.fetch_haxby(data_dir=tmp_path, subjects=[i],
75                                 verbose=0)
76        # subject_data + (md5 + mask if first subj)
77        assert request_mocker.url_count == i + 2
78        assert len(haxby.func) == 1
79        assert len(haxby.anat) == 1
80        assert len(haxby.session_target) == 1
81        assert haxby.mask is not None
82        assert len(haxby.mask_vt) == 1
83        assert len(haxby.mask_face) == 1
84        assert len(haxby.mask_house) == 1
85        assert len(haxby.mask_face_little) == 1
86        assert len(haxby.mask_house_little) == 1
87        assert haxby.description != ''
88
89    # subjects with list
90    subjects = [1, 2, 6]
91    haxby = func.fetch_haxby(data_dir=tmp_path, subjects=subjects,
92                             verbose=0)
93    assert len(haxby.func) == len(subjects)
94    assert len(haxby.mask_house_little) == len(subjects)
95    assert len(haxby.anat) == len(subjects)
96    assert haxby.anat[2] is None
97    assert isinstance(haxby.mask, str)
98    assert len(haxby.mask_face) == len(subjects)
99    assert len(haxby.session_target) == len(subjects)
100    assert len(haxby.mask_vt) == len(subjects)
101    assert len(haxby.mask_face_little) == len(subjects)
102
103    subjects = ['a', 8]
104    message = "You provided invalid subject id {0} in a list"
105
106    for sub_id in subjects:
107        with pytest.raises(ValueError, match=message.format(sub_id)):
108            func.fetch_haxby(data_dir=tmp_path, subjects=[sub_id])
109
110
111def _adhd_example_subject(match, request):
112    contents = [
113        Path("data", match.group(1), match.expand(r"\1_regressors.csv")),
114        Path("data", match.group(1),
115             match.expand(r"\1_rest_tshift_RPI_voreg_mni.nii.gz"))
116    ]
117    return list_to_archive(contents)
118
119
120def _adhd_metadata():
121    sub1 = [3902469, 7774305, 3699991]
122    sub2 = [2014113, 4275075, 1019436,
123            3154996, 3884955, 27034,
124            4134561, 27018, 6115230,
125            27037, 8409791, 27011]
126    sub3 = [3007585, 8697774, 9750701,
127            10064, 21019, 10042,
128            10128, 2497695, 4164316,
129            1552181, 4046678, 23012]
130    sub4 = [1679142, 1206380, 23008,
131            4016887, 1418396, 2950754,
132            3994098, 3520880, 1517058,
133            9744150, 1562298, 3205761, 3624598]
134    subs = pd.DataFrame({"Subject": sub1 + sub2 + sub3 + sub4})
135    return dict_to_archive(
136        {"ADHD200_40subs_motion_parameters_and_phenotypics.csv":
137         subs.to_csv(index=False)})
138
139
140def test_fetch_adhd(tmp_path, request_mocker):
141    request_mocker.url_mapping["*metadata.tgz"] = _adhd_metadata()
142    request_mocker.url_mapping[
143        re.compile(r".*adhd40_([0-9]+)\.tgz")] = _adhd_example_subject
144    adhd = func.fetch_adhd(data_dir=tmp_path, n_subjects=12, verbose=0)
145    assert len(adhd.func) == 12
146    assert len(adhd.confounds) == 12
147    assert request_mocker.url_count == 13  # Subjects + phenotypic
148    assert adhd.description != ''
149
150
151def test_miyawaki2008(tmp_path, request_mocker):
152    dataset = func.fetch_miyawaki2008(data_dir=tmp_path, verbose=0)
153    assert len(dataset.func) == 32
154    assert len(dataset.label) == 32
155    assert isinstance(dataset.mask, str)
156    assert len(dataset.mask_roi) == 38
157    assert isinstance(dataset.background, str)
158    assert request_mocker.url_count == 1
159    assert dataset.description != ''
160
161
162def test_fetch_localizer_contrasts(tmp_path, request_mocker, localizer_mocker):
163    # 2 subjects
164    dataset = func.fetch_localizer_contrasts(
165        ['checkerboard'],
166        n_subjects=2,
167        data_dir=tmp_path,
168        verbose=1)
169    assert not hasattr(dataset, 'anats')
170    assert not hasattr(dataset, 'tmaps')
171    assert not hasattr(dataset, 'masks')
172    assert isinstance(dataset.cmaps[0], str)
173    assert isinstance(dataset.ext_vars, np.recarray)
174    assert len(dataset.cmaps) == 2
175    assert dataset.ext_vars.size == 2
176
177    # Multiple contrasts
178    dataset = func.fetch_localizer_contrasts(
179        ['checkerboard', 'horizontal checkerboard'],
180        n_subjects=2,
181        data_dir=tmp_path,
182        verbose=1)
183    assert isinstance(dataset.ext_vars, np.recarray)
184    assert isinstance(dataset.cmaps[0], str)
185    assert len(dataset.cmaps) == 2 * 2  # two contrasts are fetched
186    assert dataset.ext_vars.size == 2
187
188    # all get_*=True
189    dataset = func.fetch_localizer_contrasts(
190        ['checkerboard'],
191        n_subjects=1,
192        data_dir=tmp_path,
193        get_anats=True,
194        get_masks=True,
195        get_tmaps=True,
196        verbose=1)
197    assert isinstance(dataset.ext_vars, np.recarray)
198    assert isinstance(dataset.anats[0], str)
199    assert isinstance(dataset.cmaps[0], str)
200    assert isinstance(dataset.masks[0], str)
201    assert isinstance(dataset.tmaps[0], str)
202    assert dataset.ext_vars.size == 1
203    assert len(dataset.anats) == 1
204    assert len(dataset.cmaps) == 1
205    assert len(dataset.masks) == 1
206    assert len(dataset.tmaps) == 1
207    assert dataset.description != ''
208
209    # grab a given list of subjects
210    dataset2 = func.fetch_localizer_contrasts(
211        ['checkerboard'],
212        n_subjects=[2, 3, 5],
213        data_dir=tmp_path,
214        verbose=1)
215    assert dataset2.ext_vars.size == 3
216    assert len(dataset2.cmaps) == 3
217    assert ([row[0] for row in dataset2.ext_vars] ==
218                 [b'S02', b'S03', b'S05'])
219
220
221def test_fetch_localizer_calculation_task(tmp_path, request_mocker,
222                                          localizer_mocker):
223    # 2 subjects
224    dataset = func.fetch_localizer_calculation_task(
225        n_subjects=2,
226        data_dir=tmp_path,
227        verbose=1)
228    assert isinstance(dataset.ext_vars, np.recarray)
229    assert isinstance(dataset.cmaps[0], str)
230    assert dataset.ext_vars.size == 2
231    assert len(dataset.cmaps) == 2
232    assert dataset.description != ''
233
234
235def test_fetch_localizer_button_task(tmp_path, request_mocker,
236                                     localizer_mocker):
237    # Disabled: cannot be tested without actually fetching covariates CSV file
238    # Only one subject
239    dataset = func.fetch_localizer_button_task(data_dir=tmp_path,
240                                               verbose=1)
241
242    assert isinstance(dataset.tmaps, list)
243    assert isinstance(dataset.anats, list)
244
245    assert len(dataset.tmaps) == 1
246    assert len(dataset.anats) == 1
247
248    assert isinstance(dataset.tmap, str)
249    assert isinstance(dataset.anat, str)
250
251    assert dataset.description != ''
252
253
254def test_fetch_abide_pcp(tmp_path, request_mocker):
255    ids = list(range(800))
256    filenames = ['no_filename'] * 800
257    filenames[::2] = ['filename'] * 400
258    pheno = pd.DataFrame({"subject_id": ids, "FILE_ID": filenames},
259                         columns=["subject_id", "FILE_ID"])
260    request_mocker.url_mapping["*rocessed1.csv"] = pheno.to_csv(index=False)
261
262    # All subjects
263    dataset = func.fetch_abide_pcp(data_dir=tmp_path,
264                                   quality_checked=False, verbose=0)
265    assert len(dataset.func_preproc) == 400
266    assert dataset.description != ''
267
268    # Smoke test using only a string, rather than a list of strings
269    dataset = func.fetch_abide_pcp(data_dir=tmp_path,
270                                   quality_checked=False, verbose=0,
271                                   derivatives='func_preproc')
272
273
274def test__load_mixed_gambles(request_mocker):
275    rng = check_random_state(42)
276    n_trials = 48
277    affine = np.eye(4)
278    for n_subjects in [1, 5, 16]:
279        zmaps = []
280        for _ in range(n_subjects):
281            zmaps.append(nibabel.Nifti1Image(rng.randn(3, 4, 5, n_trials),
282                                             affine))
283        zmaps, gain, _ = func._load_mixed_gambles(zmaps)
284        assert len(zmaps) == n_subjects * n_trials
285        assert len(zmaps) == len(gain)
286
287
288def test_fetch_mixed_gambles(tmp_path, request_mocker):
289    for n_subjects in [1, 5, 16]:
290        mgambles = func.fetch_mixed_gambles(n_subjects=n_subjects,
291                                            data_dir=tmp_path,
292                                            verbose=0, return_raw_data=True)
293        datasetdir = tmp_path / "jimura_poldrack_2012_zmaps"
294        assert mgambles["zmaps"][0] == str(
295            datasetdir / "zmaps" / "sub001_zmaps.nii.gz")
296        assert len(mgambles["zmaps"]) == n_subjects
297
298
299def test_check_parameters_megatrawls_datasets(request_mocker):
300    # testing whether the function raises the same error message
301    # if invalid input parameters are provided
302    message = "Invalid {0} input is provided: {1}."
303
304    for invalid_input_dim in [1, 5, 30]:
305        with pytest.raises(
306                ValueError,
307                match=message.format('dimensionality', invalid_input_dim)):
308            func.fetch_megatrawls_netmats(dimensionality=invalid_input_dim)
309
310    for invalid_input_timeserie in ['asdf', 'time', 'st2']:
311        with pytest.raises(
312                ValueError,
313                match=message.format('timeseries', invalid_input_timeserie)):
314            func.fetch_megatrawls_netmats(timeseries=invalid_input_timeserie)
315
316    for invalid_output_name in ['net1', 'net2']:
317        with pytest.raises(
318                ValueError,
319                match=message.format('matrices', invalid_output_name)):
320            func.fetch_megatrawls_netmats(matrices=invalid_output_name)
321
322
323def test_fetch_megatrawls_netmats(tmp_path, request_mocker):
324    # smoke test to see that files are fetched and read properly
325    # since we are loading data present in it
326    files_dir = str(tmp_path / 'Megatrawls'
327                    / '3T_Q1-Q6related468_MSMsulc_d100_ts3')
328    os.makedirs(files_dir)
329    with open(os.path.join(files_dir, 'Znet2.txt'), 'w') as net_file:
330        net_file.write("1")
331
332    files_dir2 = str(tmp_path / 'Megatrawls'
333                     / '3T_Q1-Q6related468_MSMsulc_d300_ts2')
334    os.makedirs(files_dir2)
335    with open(os.path.join(files_dir2, 'Znet1.txt'), 'w') as net_file2:
336        net_file2.write("1")
337
338    megatrawl_netmats_data = func.fetch_megatrawls_netmats(
339        data_dir=tmp_path)
340
341    # expected number of returns in output name should be equal
342    assert len(megatrawl_netmats_data) == 5
343    # check if returned bunch should not be empty
344    # dimensions
345    assert megatrawl_netmats_data.dimensions != ''
346    # timeseries
347    assert megatrawl_netmats_data.timeseries != ''
348    # matrices
349    assert megatrawl_netmats_data.matrices != ''
350    # correlation matrices
351    assert megatrawl_netmats_data.correlation_matrices != ''
352    # description
353    assert megatrawl_netmats_data.description != ''
354
355    # check if input provided for dimensions, timeseries, matrices to be same
356    # to user settings
357    netmats_data = func.fetch_megatrawls_netmats(data_dir=tmp_path,
358                                                 dimensionality=300,
359                                                 timeseries='multiple_spatial_regression',
360                                                 matrices='full_correlation')
361    assert netmats_data.dimensions == 300
362    assert netmats_data.timeseries == 'multiple_spatial_regression'
363    assert netmats_data.matrices == 'full_correlation'
364
365
366def _cobre_metadata():
367    ids_n = [40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008,
368             40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017,
369             40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026,
370             40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035,
371             40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044,
372             40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053,
373             40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062,
374             40063, 40064, 40065, 40066, 40067, 40068, 40069, 40071, 40072,
375             40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, 40081,
376             40082, 40084, 40085, 40086, 40087, 40088, 40089, 40090, 40091,
377             40092, 40093, 40094, 40095, 40096, 40097, 40098, 40099, 40100,
378             40101, 40102, 40103, 40104, 40105, 40106, 40107, 40108, 40109,
379             40110, 40111, 40112, 40113, 40114, 40115, 40116, 40117, 40118,
380             40119, 40120, 40121, 40122, 40123, 40124, 40125, 40126, 40127,
381             40128, 40129, 40130, 40131, 40132, 40133, 40134, 40135, 40136,
382             40137, 40138, 40139, 40140, 40141, 40142, 40143, 40144, 40145,
383             40146, 40147]
384    dummy_data = []
385
386    for i in np.hstack(ids_n):
387        # Func file
388        f = 'fmri_00' + str(i) + '.nii.gz'
389        m = 'fmri_00' + str(i) + '.tsv.gz'
390        dummy_data.append(
391            {'download_url': 'https://cobre/{}'.format(f), 'name': f})
392        dummy_data.append(
393            {'download_url': 'https://cobre/{}'.format(m), 'name': m})
394
395    # Add the TSV file
396    dummy_data.append({'download_url': 'https://cobre/phenotypic_data.tsv.gz',
397                       'name': 'phenotypic_data.tsv.gz'})
398    # Add JSON files
399    dummy_data.append({'download_url': 'https://cobre/keys_confounds.json',
400                       'name': 'keys_confounds.json'})
401    dummy_data.append({
402        'download_url': 'https://cobre/keys_phenotypic_data.json',
403        'name': 'keys_phenotypic_data.json'})
404    dummy_data = {'files': dummy_data}
405    return json.dumps(dummy_data), np.asarray(ids_n, dtype='|U17')
406
407
408def _cobre_data(ids):
409    current_age = np.ones(len(ids), dtype='<f8')
410    gender = np.ones(len(ids), dtype='<f8')
411    handedness = np.ones(len(ids), dtype='<f8')
412
413    subject_type = ["Control"] * 74 + ["Patient"] * (146 - 74)
414    diagnosis = np.ones(len(ids), dtype='<f8')
415    frames_ok = np.ones(len(ids), dtype='<f8')
416    fd = np.ones(len(ids), dtype='<f8')
417    fd_scrubbed = np.ones(len(ids), dtype='<f8')
418    csv = pd.DataFrame(
419        OrderedDict(
420            [
421                ("ID", ids),
422                ("Current Age", current_age),
423                ("Gender", gender),
424                ("Handedness", handedness),
425                ("Subject Type", subject_type),
426                ("Diagnosis", diagnosis),
427                ("Frames OK", frames_ok),
428                ("FD", fd),
429                ("FD Scrubbed", fd_scrubbed),
430            ]
431        )
432    )
433    return gzip.compress(csv.to_csv(index=False, sep="\t").encode("utf-8"))
434
435
436def test_fetch_cobre(tmp_path, request_mocker):
437    metadata, ids = _cobre_metadata()
438    request_mocker.url_mapping["*phenotypic_data.tsv.gz"] = _cobre_data(ids)
439    request_mocker.url_mapping["*articles/4197885"] = metadata
440
441    # All subjects
442    cobre_data = check_deprecation(
443        func.fetch_cobre, "'fetch_cobre' has been deprecated")(
444            n_subjects=None, data_dir=tmp_path)
445
446    phenotypic_names = ['func', 'confounds', 'phenotypic', 'description',
447                        'desc_con', 'desc_phenotypic']
448
449    # test length of functional filenames to max 146
450    assert len(cobre_data.func) == 146
451    # test length of corresponding confounds files of same length to max 146
452    assert len(cobre_data.confounds) == 146
453    # test return type variables
454    assert sorted(cobre_data) == sorted(phenotypic_names)
455    # test functional filenames in a list
456    assert isinstance(cobre_data.func, list)
457    # test confounds files in a list
458    assert isinstance(cobre_data.confounds, list)
459    assert isinstance(cobre_data.func[0], str)
460    # returned phenotypic data will be an array
461    assert isinstance(cobre_data.phenotypic, np.recarray)
462
463    # Fetch only 30 subjects
464    data_30_subjects = func.fetch_cobre(n_subjects=30,
465                                        data_dir=tmp_path)
466    assert len(data_30_subjects.func) == 30
467    assert len(data_30_subjects.confounds) == 30
468
469    # Test more than maximum subjects
470    test_150_subjects = func.fetch_cobre(n_subjects=150,
471                                         data_dir=tmp_path)
472    assert len(test_150_subjects.func) == 146
473
474
475def test_fetch_surf_nki_enhanced(tmp_path, request_mocker, verbose=0):
476
477    ids = np.asarray(['A00028185', 'A00035827', 'A00037511', 'A00039431',
478                      'A00033747', 'A00035840', 'A00038998', 'A00035072',
479                      'A00037112', 'A00039391'], dtype='U9')
480    age = np.ones(len(ids), dtype='<f8')
481    hand = np.asarray(len(ids) * ['x'], dtype='U1')
482    sex = np.asarray(len(ids) * ['x'], dtype='U1')
483    pheno_data = pd.DataFrame(
484        OrderedDict([("id", ids), ("age", age), ("hand", hand), ("sex", sex)])
485    )
486    request_mocker.url_mapping[
487        "*pheno_nki_nilearn.csv"] = pheno_data.to_csv(index=False)
488    nki_data = func.fetch_surf_nki_enhanced(data_dir=tmp_path)
489
490    assert nki_data.description != ''
491    assert len(nki_data.func_left) == 10
492    assert len(nki_data.func_right) == 10
493    assert isinstance(nki_data.phenotypic, np.ndarray)
494    assert nki_data.phenotypic.shape == (10,)
495    assert nki_data.description != ''
496
497
498def _mock_participants_data(n_ids=5):
499    """Maximum 8 ids are allowed to mock
500    """
501    ids = ['sub-pixar052', 'sub-pixar073', 'sub-pixar074', 'sub-pixar110',
502           'sub-pixar042', 'sub-pixar109', 'sub-pixar068', 'sub-pixar007'
503           ][:n_ids]
504    age = np.ones(len(ids))
505    age_group = len(ids) * ['2yo']
506    child_adult = [["child", "adult"][i % 2] for i in range(n_ids)]
507    gender = len(ids) * ['m']
508    handedness = len(ids) * ['r']
509    participants = pd.DataFrame(OrderedDict([
510        ("participant_id", ids), ("Age", age), ("AgeGroup", age_group),
511        ("Child_Adult", child_adult), ("Gender", gender),
512        ("Handedness", handedness)
513    ]))
514    return participants
515
516
517def _mock_development_confounds():
518    keep_confounds = ['trans_x', 'trans_y', 'trans_z', 'rot_x', 'rot_y',
519                      'rot_z', 'framewise_displacement', 'a_comp_cor_00',
520                      'a_comp_cor_01', 'a_comp_cor_02', 'a_comp_cor_03',
521                      'a_comp_cor_04', 'a_comp_cor_05', 'csf',
522                      'white_matter']
523    other_confounds = ["some_confound"] * 13
524    confounds = keep_confounds + other_confounds
525    return pd.DataFrame(np.ones((10, len(confounds))), columns=confounds)
526
527
528def test_fetch_development_fmri_participants(tmp_path, request_mocker):
529    mock_participants = _mock_participants_data()
530    request_mocker.url_mapping[
531        "https://osf.io/yr3av/download"] = mock_participants.to_csv(
532        index=False, sep="\t")
533    participants = func._fetch_development_fmri_participants(
534        data_dir=tmp_path, url=None, verbose=1)
535    assert isinstance(participants, np.ndarray)
536    assert participants.shape == (5,)
537
538
539def test_fetch_development_fmri_functional(tmp_path, request_mocker):
540    mock_participants = _mock_participants_data(n_ids=8)
541    funcs, confounds = func._fetch_development_fmri_functional(
542        mock_participants, data_dir=tmp_path,
543        url=None, resume=True, verbose=1)
544    assert len(funcs) == 8
545    assert len(confounds) == 8
546
547
548def test_fetch_development_fmri(tmp_path, request_mocker):
549    mock_participants = _mock_participants_data()
550    request_mocker.url_mapping["*"] = _mock_development_confounds().to_csv(
551        index=False, sep="\t")
552    request_mocker.url_mapping[
553        "https://osf.io/yr3av/download"] = mock_participants.to_csv(
554        index=False, sep="\t")
555
556    data = func.fetch_development_fmri(n_subjects=2,
557                                       data_dir=tmp_path, verbose=1)
558    assert len(data.func) == 2
559    assert len(data.confounds) == 2
560    assert isinstance(data.phenotypic, np.ndarray)
561    assert data.phenotypic.shape == (2,)
562    assert data.description != ''
563
564    # check reduced confounds
565    confounds = np.recfromcsv(data.confounds[0], delimiter='\t')
566    assert len(confounds[0]) == 15
567
568    # check full confounds
569    data = func.fetch_development_fmri(n_subjects=2, reduce_confounds=False,
570                                       verbose=1)
571    confounds = np.recfromcsv(data.confounds[0], delimiter='\t')
572    assert len(confounds[0]) == 28
573
574    # check first subject is an adult
575    data = func.fetch_development_fmri(n_subjects=1, reduce_confounds=False,
576                                       verbose=1)
577    age_group = data.phenotypic['Child_Adult'][0]
578    assert age_group == 'adult'
579
580    # check first subject is an child if requested with age_group
581    data = func.fetch_development_fmri(n_subjects=1, reduce_confounds=False,
582                                       verbose=1, age_group='child')
583    age_group = data.phenotypic['Child_Adult'][0]
584    assert age_group == 'child'
585
586    # check one of each age group returned if n_subject == 2
587    # and age_group == 'both
588    data = func.fetch_development_fmri(n_subjects=2, reduce_confounds=False,
589                                       verbose=1, age_group='both')
590    age_group = data.phenotypic['Child_Adult']
591    assert(all(age_group == ['adult', 'child']))
592
593    # check age_group
594    data = func.fetch_development_fmri(n_subjects=2, reduce_confounds=False,
595                                       verbose=1, age_group='child')
596    assert(all([x == 'child' for x in data.phenotypic['Child_Adult']]))
597
598
599def test_fetch_development_fmri_invalid_n_subjects(request_mocker):
600    max_subjects = 155
601    n_subjects = func._set_invalid_n_subjects_to_max(n_subjects=None,
602                                                     max_subjects=max_subjects,
603                                                     age_group='adult')
604    assert n_subjects == max_subjects
605    with pytest.warns(UserWarning, match='Wrong value for n_subjects='):
606        func._set_invalid_n_subjects_to_max(n_subjects=-1,
607                                            max_subjects=max_subjects,
608                                            age_group='adult')
609
610
611def test_fetch_development_fmri_exception(request_mocker):
612    with pytest.raises(ValueError, match='Wrong value for age_group'):
613        func._filter_func_regressors_by_participants(participants='junk',
614                                                     age_group='junk for test')
615
616
617# datasets tests originally belonging to nistats follow
618
619currdir = os.path.dirname(os.path.abspath(__file__))
620datadir = os.path.join(currdir, 'data')
621
622
623def test_fetch_bids_langloc_dataset(request_mocker, tmp_path):
624    data_dir = str(tmp_path / 'bids_langloc_example')
625    os.mkdir(data_dir)
626    main_folder = os.path.join(data_dir, 'bids_langloc_dataset')
627    os.mkdir(main_folder)
628
629    datadir, dl_files = func.fetch_bids_langloc_dataset(tmp_path)
630
631    assert isinstance(datadir, str)
632    assert isinstance(dl_files, list)
633
634
635def test_select_from_index(request_mocker):
636    dataset_version = 'ds000030_R1.0.4'
637    data_prefix = '{}/{}/uncompressed'.format(
638        dataset_version.split('_')[0], dataset_version)
639    # Prepare url files for subject and filter tests
640    urls = [data_prefix + '/stuff.html',
641            data_prefix + '/sub-xxx.html',
642            data_prefix + '/sub-yyy.html',
643            data_prefix + '/sub-xxx/ses-01_task-rest.txt',
644            data_prefix + '/sub-xxx/ses-01_task-other.txt',
645            data_prefix + '/sub-xxx/ses-02_task-rest.txt',
646            data_prefix + '/sub-xxx/ses-02_task-other.txt',
647            data_prefix + '/sub-yyy/ses-01.txt',
648            data_prefix + '/sub-yyy/ses-02.txt']
649
650    # Only 1 subject and not subject specific files get downloaded
651    new_urls = func.select_from_index(urls, n_subjects=1)
652    assert len(new_urls) == 6
653    assert data_prefix + '/sub-yyy.html' not in new_urls
654
655    # 2 subjects and not subject specific files get downloaded
656    new_urls = func.select_from_index(urls, n_subjects=2)
657    assert len(new_urls) == 9
658    assert data_prefix + '/sub-yyy.html' in new_urls
659    # ALL subjects and not subject specific files get downloaded
660    new_urls = func.select_from_index(urls, n_subjects=None)
661    assert len(new_urls) == 9
662
663    # test inclusive filters. Only files with task-rest
664    new_urls = func.select_from_index(
665        urls, inclusion_filters=['*task-rest*'])
666    assert len(new_urls) == 2
667    assert data_prefix + '/stuff.html' not in new_urls
668
669    # test exclusive filters. only files without ses-01
670    new_urls = func.select_from_index(
671        urls, exclusion_filters=['*ses-01*'])
672    assert len(new_urls) == 6
673    assert data_prefix + '/stuff.html' in new_urls
674
675    # test filter combination. only files with task-rest and without ses-01
676    new_urls = func.select_from_index(
677        urls, inclusion_filters=['*task-rest*'],
678        exclusion_filters=['*ses-01*'])
679    assert len(new_urls) == 1
680    assert data_prefix + '/sub-xxx/ses-02_task-rest.txt' in new_urls
681
682
683def test_fetch_openneuro_dataset_index(request_mocker):
684    with TemporaryDirectory() as tmpdir:
685        dataset_version = 'ds000030_R1.0.4'
686        subdir_names = ['ds000030', 'ds000030_R1.0.4', 'uncompressed']
687        tmp_list = []
688        for subdir in subdir_names:
689            tmp_list.append(subdir)
690            subdirpath = os.path.join(tmpdir, *tmp_list)
691            os.mkdir(subdirpath)
692
693        filepath = os.path.join(subdirpath, 'urls.json')
694        mock_json_content = ['junk1', 'junk2']
695        with open(filepath, 'w') as f:
696            json.dump(mock_json_content, f)
697        urls_path, urls = func.fetch_openneuro_dataset_index(
698            data_dir=tmpdir,
699            dataset_version=dataset_version,
700            verbose=1,
701        )
702        urls_path = urls_path.replace('/', os.sep)
703        assert urls_path == filepath
704        assert urls == mock_json_content
705
706
707def test_fetch_openneuro_dataset(request_mocker, tmp_path):
708    dataset_version = 'ds000030_R1.0.4'
709    data_prefix = '{}/{}/uncompressed'.format(
710        dataset_version.split('_')[0], dataset_version)
711    data_dir = _get_dataset_dir(data_prefix, data_dir=tmp_path,
712                                verbose=1)
713    url_file = os.path.join(data_dir, 'urls.json')
714    # Prepare url files for subject and filter tests
715    urls = [
716        "https://example.com/{}/stuff.html" + data_prefix + '',
717        "https://example.com/{}/sub-xxx.html",
718        "https://example.com/{}/sub-yyy.html",
719        "https://example.com/{}/sub-xxx/ses-01_task-rest.txt",
720        "https://example.com/{}/sub-xxx/ses-01_task-other.txt",
721        "https://example.com/{}/sub-xxx/ses-02_task-rest.txt",
722        "https://example.com/{}/sub-xxx/ses-02_task-other.txt",
723        "https://example.com/{}/sub-yyy/ses-01.txt",
724        "https://example.com/{}/sub-yyy/ses-02.txt"]
725    urls = [url.format(data_prefix) for url in urls]
726    json.dump(urls, open(url_file, 'w'))
727
728    # Only 1 subject and not subject specific files get downloaded
729    datadir, dl_files = func.fetch_openneuro_dataset(
730        urls, tmp_path, dataset_version)
731    assert isinstance(datadir, str)
732    assert isinstance(dl_files, list)
733    assert len(dl_files) == 9
734
735
736def test_fetch_localizer(request_mocker, tmp_path):
737    dataset = func.fetch_localizer_first_level(data_dir=tmp_path)
738    assert isinstance(dataset['events'], str)
739    assert isinstance(dataset.epi_img, str)
740
741
742def _mock_original_spm_auditory_events_file():
743    expected_events_data = {
744        'onset': [factor * 42.0 for factor in range(0, 16)],
745        'duration': [42.0] * 16,
746        'trial_type': ['rest', 'active'] * 8,
747    }
748    expected_events_data = pd.DataFrame(expected_events_data)
749    expected_events_data_string = expected_events_data.to_csv(
750        sep='\t',
751        index=0,
752        columns=['onset', 'duration', 'trial_type'],
753    )
754    return expected_events_data_string
755
756
757def _mock_bids_compliant_spm_auditory_events_file():
758    events_filepath = os.path.join(os.getcwd(), 'tests_events.tsv')
759    func._make_events_file_spm_auditory_data(
760        events_filepath=events_filepath)
761    with open(events_filepath, 'r') as actual_events_file_obj:
762        actual_events_data_string = actual_events_file_obj.read()
763    return actual_events_data_string, events_filepath
764
765
766def test_fetch_language_localizer_demo_dataset(request_mocker, tmp_path):
767    data_dir = tmp_path
768    expected_data_dir = tmp_path / 'fMRI-language-localizer-demo-dataset'
769    contents_dir = Path(
770        __file__).parent / "data" / "archive_contents"
771    contents_list_file = contents_dir / "language_localizer.txt"
772    with contents_list_file.open() as f:
773        expected_files = [str(expected_data_dir / file_path.strip()) for
774                          file_path in f.readlines()[1:]]
775    actual_dir, actual_subdirs = func.fetch_language_localizer_demo_dataset(
776        data_dir)
777    assert actual_dir == str(expected_data_dir)
778    assert actual_subdirs == sorted(expected_files)
779
780
781def test_make_spm_auditory_events_file(request_mocker):
782    try:
783        (
784            actual_events_data_string,
785            events_filepath,
786        ) = _mock_bids_compliant_spm_auditory_events_file()
787    finally:
788        os.remove(events_filepath)
789    expected_events_data_string = _mock_original_spm_auditory_events_file()
790
791    replace_win_line_ends = (
792        lambda text: text.replace('\r\n', '\n')
793        if text.find('\r\n') != -1 else text
794    )
795    actual_events_data_string = replace_win_line_ends(
796        actual_events_data_string)
797    expected_events_data_string = replace_win_line_ends(
798        expected_events_data_string)
799
800    assert actual_events_data_string == expected_events_data_string
801
802
803def test_fetch_spm_auditory(request_mocker, tmp_path):
804    import nibabel as nib
805    import shutil
806    saf = ["fM00223/fM00223_%03i.img" % index for index in range(4, 100)]
807    saf_ = ["fM00223/fM00223_%03i.hdr" % index for index in range(4, 100)]
808
809    data_dir = str(tmp_path / 'spm_auditory')
810    os.mkdir(data_dir)
811    subject_dir = os.path.join(data_dir, 'sub001')
812    os.mkdir(subject_dir)
813    os.mkdir(os.path.join(subject_dir, 'fM00223'))
814    os.mkdir(os.path.join(subject_dir, 'sM00223'))
815
816    path_img = str(tmp_path / 'tmp.img')
817    path_hdr = str(tmp_path / 'tmp.hdr')
818    nib.save(nib.Nifti1Image(np.zeros((2, 3, 4)), np.eye(4)), path_img)
819    shutil.copy(path_img, os.path.join(subject_dir,
820                                       "sM00223/sM00223_002.img"))
821    shutil.copy(path_hdr, os.path.join(subject_dir,
822                                       "sM00223/sM00223_002.hdr"))
823    for file_ in saf:
824        shutil.copy(path_img, os.path.join(subject_dir, file_))
825    for file_ in saf_:
826        shutil.copy(path_hdr, os.path.join(subject_dir, file_))
827
828    dataset = func.fetch_spm_auditory(data_dir=tmp_path)
829    assert isinstance(dataset.anat, str)
830    assert isinstance(dataset.func[0], str)
831    assert len(dataset.func) == 96
832
833
834def test_fetch_spm_multimodal(request_mocker, tmp_path):
835    data_dir = str(tmp_path / 'spm_multimodal_fmri')
836    os.mkdir(data_dir)
837    subject_dir = os.path.join(data_dir, 'sub001')
838    os.mkdir(subject_dir)
839    os.mkdir(os.path.join(subject_dir, 'fMRI'))
840    os.mkdir(os.path.join(subject_dir, 'sMRI'))
841    open(os.path.join(subject_dir, 'sMRI', 'smri.img'), 'a').close()
842    for session in [0, 1]:
843        open(os.path.join(subject_dir, 'fMRI',
844                          'trials_ses%i.mat' % (session + 1)), 'a').close()
845        dir_ = os.path.join(subject_dir, 'fMRI', 'Session%d' % (session + 1))
846        os.mkdir(dir_)
847        for i in range(390):
848            open(os.path.join(dir_, 'fMETHODS-000%i-%i-01.img' %
849                              (session + 5, i)), 'a').close()
850
851    dataset = func.fetch_spm_multimodal_fmri(data_dir=tmp_path)
852    assert isinstance(dataset.anat, str)
853    assert isinstance(dataset.func1[0], str)
854    assert len(dataset.func1) == 390
855    assert isinstance(dataset.func2[0], str)
856    assert len(dataset.func2) == 390
857    assert dataset.slice_order == 'descending'
858    assert isinstance(dataset.trials_ses1, str)
859    assert isinstance(dataset.trials_ses2, str)
860
861
862def test_fiac(request_mocker, tmp_path):
863    # Create dummy 'files'
864    fiac_dir = str(tmp_path / 'fiac_nilearn.glm' / 'nipy-data-0.2' /
865                   'data' / 'fiac')
866    fiac0_dir = os.path.join(fiac_dir, 'fiac0')
867    os.makedirs(fiac0_dir)
868    for session in [1, 2]:
869        # glob func data for session session + 1
870        session_func = os.path.join(fiac0_dir, 'run%i.nii.gz' % session)
871        open(session_func, 'a').close()
872        sess_dmtx = os.path.join(fiac0_dir, 'run%i_design.npz' % session)
873        open(sess_dmtx, 'a').close()
874    mask = os.path.join(fiac0_dir, 'mask.nii.gz')
875    open(mask, 'a').close()
876
877    dataset = func.fetch_fiac_first_level(data_dir=tmp_path)
878    assert isinstance(dataset.func1, str)
879    assert isinstance(dataset.func2, str)
880    assert isinstance(dataset.design_matrix1, str)
881    assert isinstance(dataset.design_matrix2, str)
882    assert isinstance(dataset.mask, str)
883