Work with Test Data
[4]:
from pathlib import Path
import os
import os.path as op
from pkg_resources import resource_filename as pkgrf
import shutil
import cubids
TEST_DATA = pkgrf("cubids", "testdata")
def test_data(tmp_path):
data_root = tmp_path / "testdata"
shutil.copytree(TEST_DATA, str(data_root))
assert len(list(data_root.rglob("*"))) > 5
return data_root
workdir = os.getcwd()
def copy_testing_data(dirname):
newdir = op.join(workdir, dirname)
os.makedirs(newdir)
data_dir = test_data(Path(newdir))
return data_dir
# copy the data
data_root = copy_testing_data("test1")
[3]:
!rm -rf test1
[ ]:
Test the key / param groups
This test copies the data and makes sure we get the correct number of key and parameter groups out of it
[14]:
from cubids import CuBIDS
bod = CuBIDS(str(first_test / "complete"))
bod._cache_fieldmaps()
100%|██████████| 6/6 [00:00<00:00, 268.30it/s]
[14]:
[]
[15]:
key_groups = bod.get_key_groups()
print(key_groups)
['acquisition-HASC55AP_datatype-dwi_suffix-dwi', 'acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1', 'acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2', 'acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff', 'datatype-anat_suffix-T1w', 'datatype-fmap_direction-PA_fmap-epi_suffix-epi', 'datatype-func_suffix-bold_task-rest']
[19]:
ibod = CuBIDS(str(first_test / "inconsistent"))
misfits = ibod._cache_fieldmaps()
len(misfits)
100%|██████████| 6/6 [00:00<00:00, 267.86it/s]
[19]:
1
[21]:
ikey_groups = ibod.get_key_groups()
[22]:
ikey_groups == key_groups
[22]:
True
Working with datalad
Here we try to initialize a datalad repo on the test data
[5]:
import datalad.api as dlapi
dl = dlapi.create(path=first_test / "inconsistent", force=True)
[INFO] Creating a new annex repo at /Users/mcieslak/projects/CuBIDS/notebooks/test1/testdata/inconsistent
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-5-88fa9c70c810> in <module>
1 import datalad.api as dlapi
2
----> 3 dl = dlapi.create(path=first_test / "inconsistent", force=True)
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/interface/utils.py in eval_func(wrapped, instance, args, kwargs)
493 return results
494 lgr.log(2, "Returning return_func from eval_func for %s", wrapped_class)
--> 495 return return_func(generator_func)(*args, **kwargs)
496
497 return eval_func(func)
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/interface/utils.py in return_func(wrapped_, instance_, args_, kwargs_)
481 # unwind generator if there is one, this actually runs
482 # any processing
--> 483 results = list(results)
484 # render summaries
485 if not result_xfm and result_renderer in ('tailored', 'default'):
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/interface/utils.py in generator_func(*_args, **_kwargs)
400
401 # process main results
--> 402 for r in _process_results(
403 # execution
404 wrapped(*_args, **_kwargs),
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/interface/utils.py in _process_results(results, cmd_class, on_failure, action_summary, incomplete_results, result_renderer, result_log_level, allkwargs)
560 render_n_repetitions = 10 if sys.stdout.isatty() else float("inf")
561
--> 562 for res in results:
563 if not res or 'action' not in res:
564 # XXX Yarik has to no clue on how to track the origin of the
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/core/local/create.py in __call__(path, initopts, force, description, dataset, no_annex, annex, fake_dates, cfg_proc)
393 # always come with annex when created from scratch
394 lgr.info("Creating a new annex repo at %s", tbds.path)
--> 395 tbrepo = AnnexRepo(
396 tbds.path,
397 url=None,
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/support/repo.py in __call__(cls, *args, **kwargs)
149 # we have no such instance yet or the existing one is invalidated,
150 # so we instantiate:
--> 151 instance = type.__call__(cls, *new_args, **new_kwargs)
152 cls._unique_instances[id_] = instance
153 else:
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/support/annexrepo.py in __init__(self, path, url, runner, backend, always_commit, create, create_sanity_checks, init, batch_size, version, description, git_opts, annex_opts, annex_init_opts, repo, fake_dates)
274
275 if do_init:
--> 276 self._init(version=version, description=description)
277
278 # TODO: RM DIRECT eventually, but should remain while we have is_direct_mode
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/support/annexrepo.py in _init(self, version, description)
1275 where='local',
1276 reload=False)
-> 1277 self._run_annex_command(
1278 'init',
1279 runner="gitwitless",
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/support/annexrepo.py in _run_annex_command(self, annex_cmd, git_options, annex_options, backend, jobs, files, merge_annex_branches, runner, protocol, **kwargs)
1098 # TODO: RF to use --batch where possible instead of splitting
1099 # into multiple invocations
-> 1100 return run_gitcommand_on_file_list_chunks(
1101 run_func,
1102 cmd_list,
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/cmd.py in run_gitcommand_on_file_list_chunks(func, cmd, files, *args, **kwargs)
142 results.append(func(cmd + ['--'] + file_chunk, *args, **kwargs))
143 else:
--> 144 results.append(func(cmd, *args, **kwargs))
145 # if it was a WitlessRunner.run -- we would get dicts.
146 # If old Runner -- stdout, stderr strings
~/miniconda3/envs/cubids/lib/python3.8/site-packages/datalad/cmd.py in run(self, cmd, protocol, stdin, cwd, env, **kwargs)
478 asyncio.set_event_loop(event_loop)
479 # include the subprocess manager in the asyncio event loop
--> 480 results = event_loop.run_until_complete(
481 run_async_cmd(
482 event_loop,
~/miniconda3/envs/cubids/lib/python3.8/asyncio/base_events.py in run_until_complete(self, future)
590 """
591 self._check_closed()
--> 592 self._check_running()
593
594 new_task = not futures.isfuture(future)
~/miniconda3/envs/cubids/lib/python3.8/asyncio/base_events.py in _check_running(self)
552 raise RuntimeError('This event loop is already running')
553 if events._get_running_loop() is not None:
--> 554 raise RuntimeError(
555 'Cannot run the event loop while another loop is running')
556
RuntimeError: Cannot run the event loop while another loop is running
[5]:
files_df, summary_df = bod.get_param_groups_dataframes()
[23]:
%qtconsole
[7]:
summary_df[["key_group", "ParamGroup", "Count"]]
[7]:
key_group | ParamGroup | Count | |
---|---|---|---|
0 | acquisition-64dir_datatype-dwi_suffix-dwi | 1 | 3 |
1 | acquisition-HCP_datatype-anat_suffix-T1w | 1 | 3 |
2 | acquisition-HCP_datatype-anat_suffix-T2w | 1 | 1 |
3 | acquisition-dwi_datatype-fmap_direction-AP_suf... | 1 | 3 |
4 | acquisition-dwi_datatype-fmap_direction-PA_suf... | 1 | 3 |
5 | acquisition-fMRI_datatype-fmap_direction-AP_su... | 0 | 1 |
6 | acquisition-fMRI_datatype-fmap_direction-AP_su... | 1 | 1 |
7 | acquisition-fMRI_datatype-fmap_direction-PA_su... | 0 | 1 |
8 | acquisition-fMRI_datatype-fmap_direction-PA_su... | 1 | 1 |
9 | datatype-func_run-1_suffix-bold_task-peer | 1 | 2 |
10 | datatype-func_run-1_suffix-bold_task-rest | 1 | 2 |
11 | datatype-func_run-2_suffix-bold_task-peer | 1 | 2 |
12 | datatype-func_run-2_suffix-bold_task-rest | 1 | 1 |
13 | datatype-func_run-3_suffix-bold_task-peer | 1 | 2 |
14 | datatype-func_suffix-bold_task-movieDM | 1 | 1 |
15 | datatype-func_suffix-bold_task-movieTP | 1 | 2 |
[ ]:
import pandas as pd
param_group_cols = list(set(df.columns.to_list()) - set(["FilePath"]))
uniques = df.drop_duplicates(param_group_cols, ignore_index=True)
print(uniques.shape)
counts = df.groupby(["key_group", "ParamGroup"]).size().reset_index(name='Count')
print(counts.shape)
params_and_counts = pd.merge(uniques, counts)
print(params_and_counts.shape)
[ ]:
[ ]:
no_paths[["key_group", "ParamGroup"]].groupby(["key_group", "ParamGroup"]).count()
[ ]:
[ ]:
[ ]:
keyparam_df.groupby(["key_group", "ParamGroup"]).size().reset_index(name='Count')
[ ]:
fname = 'sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz'
[ ]:
bod.get_key_groups()
[ ]:
self = bod
[ ]:
from cubids.cubids import *
suffix = '(phase1|phasediff|epi|fieldmap)'
fmap_files = self.layout.get(suffix=suffix, regex_search=True,
extension=['.nii.gz', '.nii'])
files_to_fmaps = defaultdict(list)
print("\n".join([f.path for f in fmap_files]))
[ ]:
"""
for fmap_file in tqdm(fmap_files):
intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
subject_prefix = "sub-%s/" % fmap_file.entities['subject']
for intended_for in intentions:
subject_relative_path = subject_prefix + intended_for
files_to_fmaps[subject_relative_path].append(fmap_file)
"""
fmap_file = fmap_files[0]
intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
print("intentions:", intentions)
subject_prefix = "sub-%s/" % fmap_file.entities['subject']
print(subject_prefix)
[ ]:
suffix = '(phase1|phasediff|epi|fieldmap)'
fmap_files = self.layout.get(suffix=suffix, regex_search=True,
extension=['.nii.gz', '.nii'])
files_to_fmaps = defaultdict(list)
for fmap_file in tqdm(fmap_files):
intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
subject_prefix = "sub-%s" % fmap_file.entities['subject']
for intended_for in intentions:
full_path = Path(self.path) / subject_prefix / intended_for
files_to_fmaps[str(full_path)].append(fmap_file)
[ ]:
for data_file, fmap_files in bod.fieldmap_lookup.items():
print(data_file[44:])
for fmap_file in fmap_files:
print(" ", fmap_file.path[44:])
[ ]:
files_to_fmaps.keys()
[ ]:
from cubids.cubids import *
files = [
'/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARAT581NDH/ses-HBNsiteRU/dwi/sub-NDARAT581NDH_ses-HBNsiteRU_acq-64dir_dwi.nii.gz',
'/Users/mcieslak/projects/test_bids_data/HBN/sub-NDARRP384BVX/ses-HBNsiteRU/dwi/sub-NDARRP384BVX_ses-HBNsiteRU_acq-64dir_dwi.nii.gz']
dfs = []
fieldmap_lookup = bod.fieldmap_lookup
key_group_name = "test"
# path needs to be relative to the root with no leading prefix
for path in files:
metadata = bod.layout.get_metadata(path)
wanted_keys = metadata.keys() & IMAGING_PARAMS
example_data = {key: metadata[key] for key in wanted_keys}
example_data["key_group"] = key_group_name
# Get the fieldmaps out and add their types
print(fieldmap_lookup[path])
fieldmap_types = sorted([fmap.entities['fmap'] for fmap in fieldmap_lookup[path]])
for fmap_num, fmap_type in enumerate(fieldmap_types):
example_data['fieldmap_type%02d' % fmap_num] = fmap_type
# Expand slice timing to multiple columns
SliceTime = example_data.get('SliceTiming')
if SliceTime:
# round each slice time to one place after the decimal
for i in range(len(SliceTime)):
SliceTime[i] = round(SliceTime[i], 1)
example_data.update(
{"SliceTime%03d" % SliceNum: time for
SliceNum, time in enumerate(SliceTime)})
del example_data['SliceTiming']
dfs.append(example_data)
[ ]:
example_data