Work with Test Data

[4]:
from pathlib import Path
import os
import os.path as op
from pkg_resources import resource_filename as pkgrf
import shutil
import cubids
TEST_DATA = pkgrf("cubids", "testdata")

def test_data(tmp_path):
    data_root = tmp_path / "testdata"
    shutil.copytree(TEST_DATA, str(data_root))
    assert len(list(data_root.rglob("*"))) > 5
    return data_root

workdir = os.getcwd()

def copy_testing_data(dirname):
    newdir = op.join(workdir, dirname)
    os.makedirs(newdir)
    data_dir = test_data(Path(newdir))
    return data_dir

# copy the data
data_root = copy_testing_data("test1")
[3]:
!rm -rf  test1
[ ]:

Test the key / param groups

This test copies the data and makes sure we get the correct number of key and parameter groups out of it

[14]:
from cubids import CuBIDS

bod = CuBIDS(str(first_test / "complete"))
bod._cache_fieldmaps()
100%|██████████| 6/6 [00:00<00:00, 268.30it/s]
[14]:
[]
[15]:
key_groups = bod.get_key_groups()
print(key_groups)
['acquisition-HASC55AP_datatype-dwi_suffix-dwi', 'acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1', 'acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2', 'acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff', 'datatype-anat_suffix-T1w', 'datatype-fmap_direction-PA_fmap-epi_suffix-epi', 'datatype-func_suffix-bold_task-rest']
[19]:
ibod = CuBIDS(str(first_test / "inconsistent"))
misfits = ibod._cache_fieldmaps()
len(misfits)
100%|██████████| 6/6 [00:00<00:00, 267.86it/s]
[19]:
1
[21]:
ikey_groups = ibod.get_key_groups()
[22]:
ikey_groups == key_groups
[22]:
True

Working with datalad

Here we try to initialize a datalad repo on the test data

[5]:
import datalad.api as dlapi

dl = dlapi.create(path=first_test / "inconsistent", force=True)
[INFO] Creating a new annex repo at /Users/mcieslak/projects/CuBIDS/notebooks/test1/testdata/inconsistent
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-5-88fa9c70c810> in <module>
      1 import datalad.api as dlapi
      2
----> 3 dl = dlapi.create(path=first_test / "inconsistent", force=True)

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/interface/utils.py in eval_func(wrapped, instance, args, kwargs)
    493                     return results
    494             lgr.log(2, "Returning return_func from eval_func for %s", wrapped_class)
--> 495             return return_func(generator_func)(*args, **kwargs)
    496
    497     return eval_func(func)

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/interface/utils.py in return_func(wrapped_, instance_, args_, kwargs_)
    481                     # unwind generator if there is one, this actually runs
    482                     # any processing
--> 483                     results = list(results)
    484                 # render summaries
    485                 if not result_xfm and result_renderer in ('tailored', 'default'):

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/interface/utils.py in generator_func(*_args, **_kwargs)
    400
    401             # process main results
--> 402             for r in _process_results(
    403                     # execution
    404                     wrapped(*_args, **_kwargs),

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/interface/utils.py in _process_results(results, cmd_class, on_failure, action_summary, incomplete_results, result_renderer, result_log_level, allkwargs)
    560     render_n_repetitions = 10 if sys.stdout.isatty() else float("inf")
    561
--> 562     for res in results:
    563         if not res or 'action' not in res:
    564             # XXX Yarik has to no clue on how to track the origin of the

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/core/local/create.py in __call__(path, initopts, force, description, dataset, no_annex, annex, fake_dates, cfg_proc)
    393             # always come with annex when created from scratch
    394             lgr.info("Creating a new annex repo at %s", tbds.path)
--> 395             tbrepo = AnnexRepo(
    396                 tbds.path,
    397                 url=None,

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/support/repo.py in __call__(cls, *args, **kwargs)
    149             # we have no such instance yet or the existing one is invalidated,
    150             # so we instantiate:
--> 151             instance = type.__call__(cls, *new_args, **new_kwargs)
    152             cls._unique_instances[id_] = instance
    153         else:

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/support/annexrepo.py in __init__(self, path, url, runner, backend, always_commit, create, create_sanity_checks, init, batch_size, version, description, git_opts, annex_opts, annex_init_opts, repo, fake_dates)
    274
    275         if do_init:
--> 276             self._init(version=version, description=description)
    277
    278         # TODO: RM DIRECT  eventually, but should remain while we have is_direct_mode

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/support/annexrepo.py in _init(self, version, description)
   1275                                 where='local',
   1276                                 reload=False)
-> 1277         self._run_annex_command(
   1278             'init',
   1279             runner="gitwitless",

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/support/annexrepo.py in _run_annex_command(self, annex_cmd, git_options, annex_options, backend, jobs, files, merge_annex_branches, runner, protocol, **kwargs)
   1098             # TODO: RF to use --batch where possible instead of splitting
   1099             # into multiple invocations
-> 1100             return run_gitcommand_on_file_list_chunks(
   1101                 run_func,
   1102                 cmd_list,

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/cmd.py in run_gitcommand_on_file_list_chunks(func, cmd, files, *args, **kwargs)
    142             results.append(func(cmd + ['--'] + file_chunk, *args, **kwargs))
    143         else:
--> 144             results.append(func(cmd, *args, **kwargs))
    145     # if it was a WitlessRunner.run -- we would get dicts.
    146     # If old Runner -- stdout, stderr strings

~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/cmd.py in run(self, cmd, protocol, stdin, cwd, env, **kwargs)
    478         asyncio.set_event_loop(event_loop)
    479         # include the subprocess manager in the asyncio event loop
--> 480         results = event_loop.run_until_complete(
    481             run_async_cmd(
    482                 event_loop,

~/miniconda3/envs/cubids/lib/python3.8/asyncio/base_events.py in run_until_complete(self, future)
    590         """
    591         self._check_closed()
--> 592         self._check_running()
    593
    594         new_task = not futures.isfuture(future)

~/miniconda3/envs/cubids/lib/python3.8/asyncio/base_events.py in _check_running(self)
    552             raise RuntimeError('This event loop is already running')
    553         if events._get_running_loop() is not None:
--> 554             raise RuntimeError(
    555                 'Cannot run the event loop while another loop is running')
    556

RuntimeError: Cannot run the event loop while another loop is running
[5]:
files_df, summary_df = bod.get_param_groups_dataframes()
[23]:
%qtconsole
[7]:
summary_df[["key_group", "ParamGroup", "Count"]]
[7]:
key_group ParamGroup Count
0 acquisition-64dir_datatype-dwi_suffix-dwi 1 3
1 acquisition-HCP_datatype-anat_suffix-T1w 1 3
2 acquisition-HCP_datatype-anat_suffix-T2w 1 1
3 acquisition-dwi_datatype-fmap_direction-AP_suf... 1 3
4 acquisition-dwi_datatype-fmap_direction-PA_suf... 1 3
5 acquisition-fMRI_datatype-fmap_direction-AP_su... 0 1
6 acquisition-fMRI_datatype-fmap_direction-AP_su... 1 1
7 acquisition-fMRI_datatype-fmap_direction-PA_su... 0 1
8 acquisition-fMRI_datatype-fmap_direction-PA_su... 1 1
9 datatype-func_run-1_suffix-bold_task-peer 1 2
10 datatype-func_run-1_suffix-bold_task-rest 1 2
11 datatype-func_run-2_suffix-bold_task-peer 1 2
12 datatype-func_run-2_suffix-bold_task-rest 1 1
13 datatype-func_run-3_suffix-bold_task-peer 1 2
14 datatype-func_suffix-bold_task-movieDM 1 1
15 datatype-func_suffix-bold_task-movieTP 1 2
[ ]:
import pandas as pd
param_group_cols = list(set(df.columns.to_list()) - set(["FilePath"]))
uniques = df.drop_duplicates(param_group_cols, ignore_index=True)
print(uniques.shape)
counts = df.groupby(["key_group", "ParamGroup"]).size().reset_index(name='Count')
print(counts.shape)

params_and_counts = pd.merge(uniques, counts)
print(params_and_counts.shape)
[ ]:

[ ]:
no_paths[["key_group", "ParamGroup"]].groupby(["key_group", "ParamGroup"]).count()
[ ]:

[ ]:

[ ]:
keyparam_df.groupby(["key_group", "ParamGroup"]).size().reset_index(name='Count')
[ ]:
fname = 'sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz'
[ ]:
bod.get_key_groups()
[ ]:
self = bod

[ ]:
from cubids.cubids import *
suffix = '(phase1|phasediff|epi|fieldmap)'
fmap_files = self.layout.get(suffix=suffix, regex_search=True,
                             extension=['.nii.gz', '.nii'])

files_to_fmaps = defaultdict(list)

print("\n".join([f.path for f in fmap_files]))
[ ]:
"""
for fmap_file in tqdm(fmap_files):
    intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
    subject_prefix = "sub-%s/" % fmap_file.entities['subject']
    for intended_for in intentions:
        subject_relative_path = subject_prefix + intended_for
        files_to_fmaps[subject_relative_path].append(fmap_file)
"""
fmap_file = fmap_files[0]
intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
print("intentions:", intentions)
subject_prefix = "sub-%s/" % fmap_file.entities['subject']
print(subject_prefix)
[ ]:
suffix = '(phase1|phasediff|epi|fieldmap)'
fmap_files = self.layout.get(suffix=suffix, regex_search=True,
                             extension=['.nii.gz', '.nii'])

files_to_fmaps = defaultdict(list)
for fmap_file in tqdm(fmap_files):
    intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
    subject_prefix = "sub-%s" % fmap_file.entities['subject']
    for intended_for in intentions:
        full_path = Path(self.path) / subject_prefix / intended_for
        files_to_fmaps[str(full_path)].append(fmap_file)
[ ]:
for data_file, fmap_files in bod.fieldmap_lookup.items():
    print(data_file[44:])
    for fmap_file in fmap_files:
        print("   ", fmap_file.path[44:])
[ ]:
files_to_fmaps.keys()
[ ]:
from cubids.cubids import *
files = [
 '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz',
 '/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARRP384BVX/ses-HBNsiteRU/dwi/sub-NDARRP384BVX_ses-HBNsiteRU_acq-64dir_dwi.nii.gz']

dfs = []
fieldmap_lookup = bod.fieldmap_lookup
key_group_name = "test"
# path needs to be relative to the root with no leading prefix
for path in files:
    metadata = bod.layout.get_metadata(path)
    wanted_keys = metadata.keys() & IMAGING_PARAMS
    example_data = {key: metadata[key] for key in wanted_keys}
    example_data["key_group"] = key_group_name

    # Get the fieldmaps out and add their types
    print(fieldmap_lookup[path])
    fieldmap_types = sorted([fmap.entities['fmap'] for fmap in fieldmap_lookup[path]])
    for fmap_num, fmap_type in enumerate(fieldmap_types):
        example_data['fieldmap_type%02d' % fmap_num] = fmap_type

    # Expand slice timing to multiple columns
    SliceTime = example_data.get('SliceTiming')
    if SliceTime:
        # round each slice time to one place after the decimal
        for i in range(len(SliceTime)):
            SliceTime[i] = round(SliceTime[i], 1)
        example_data.update(
            {"SliceTime%03d" % SliceNum: time for
             SliceNum, time in enumerate(SliceTime)})
        del example_data['SliceTiming']

    dfs.append(example_data)
[ ]:
example_data