Key Values Proof of Concept
[1]:
# Install bids
pip install bids
File "<ipython-input-1-7aafa4a1451e>", line 2
pip install bids
^
SyntaxError: invalid syntax
[4]:
# Import modules
from bids import BIDSLayout
from bids.tests import get_test_data_path
from bids.layout import parse_file_entities
import os
import glob
import json
[5]:
root_dir = '/Users/krmurtha/Desktop/informatics/CuBIDS/'
bids_dir = 'data/'
[6]:
# use glob to list all of the files recursively
all_files=glob.glob(root_dir + bids_dir + '**/**/**/*')
print(all_files)
['/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-pepolar/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-pepolar/fmap', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-pepolar/anat', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-nofmap/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-phdiff/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-phdiff/fmap', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-buds/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-buds/fmap', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-buds/anat', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-phases/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-phases/fmap', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_magnitude2.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_phase2.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_magnitude2.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_phase2.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_magnitude1.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_magnitude1.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_phase1.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_phase1.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/anat/sub-1832999514_ses-PNC2_T1w.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/anat/sub-1832999514_ses-PNC2_rec-refaced_T1w.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/anat/sub-1832999514_ses-PNC2_rec-refaced_T1w.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/anat/sub-1832999514_ses-PNC2_T1w.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-idemo.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-frac2back.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-idemo.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-rest_acq-singleband_bold.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-frac2back.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-rest_acq-singleband_bold.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-02_dwi.bval', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-01_dwi.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-02_dwi.bvec', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-01_dwi.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-02_dwi.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-02_dwi.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-01_dwi.bval', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-01_dwi.bvec', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/func/sub-1832999514_ses-PNC1_task-rest_acq-singleband_task-rest_bold.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/func/sub-1832999514_ses-PNC1_task-rest_acq-singleband_task-rest_bold.json']
[7]:
# use pybids to extract BIDS entities from single subject
path = all_files[0]
dict1= parse_file_entities(path)
print(dict1)
{'subject': 'PNC', 'session': 'pepolar'}
/Users/krmurtha/anaconda3/envs/cubids/lib/python3.8/site-packages/bids/layout/models.py:148: FutureWarning: The 'extension' entity currently excludes the leading dot ('.'). As of version 0.14.0, it will include the leading dot. To suppress this warning and include the leading dot, use `bids.config.set_option('extension_initial_dot', True)`.
warnings.warn("The 'extension' entity currently excludes the leading dot ('.'). "
[8]:
# test another example
dict2= parse_file_entities(all_files[14])
print(dict2)
{'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase2', 'fmap': 'phase2', 'datatype': 'fmap', 'extension': 'nii.gz'}
[9]:
entities = []
#initialize list
for file in all_files:
#for each file in the list, parse the information into a dictionary and add it to the list we just initialized
result = parse_file_entities(file)
entities.append(result)
#entities.add(string_result)
print(entities)
[{'subject': 'PNC', 'session': 'pepolar'}, {'subject': 'PNC', 'session': 'pepolar'}, {'subject': 'PNC', 'session': 'pepolar'}, {'subject': 'PNC', 'session': 'nofmap'}, {'subject': 'PNC', 'session': 'phdiff'}, {'subject': 'PNC', 'session': 'phdiff'}, {'subject': 'PNC', 'session': 'buds'}, {'subject': 'PNC', 'session': 'buds'}, {'subject': 'PNC', 'session': 'buds'}, {'subject': 'PNC', 'session': 'phases'}, {'subject': 'PNC', 'session': 'phases'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'magnitude2', 'fmap': 'magnitude2', 'datatype': 'fmap', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase2', 'datatype': 'fmap', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'magnitude2', 'datatype': 'fmap', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase2', 'fmap': 'phase2', 'datatype': 'fmap', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'magnitude1', 'fmap': 'magnitude1', 'datatype': 'fmap', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'magnitude1', 'datatype': 'fmap', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase1', 'datatype': 'fmap', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase1', 'fmap': 'phase1', 'datatype': 'fmap', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'T1w', 'datatype': 'anat', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'reconstruction': 'refaced', 'suffix': 'T1w', 'datatype': 'anat', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'reconstruction': 'refaced', 'suffix': 'T1w', 'datatype': 'anat', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'T1w', 'datatype': 'anat', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'idemo', 'suffix': 'idemo', 'datatype': 'func', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'frac2back', 'suffix': 'frac2back', 'datatype': 'func', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'idemo', 'suffix': 'idemo', 'datatype': 'func', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'rest', 'acquisition': 'singleband', 'suffix': 'bold', 'datatype': 'func', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'frac2back', 'suffix': 'frac2back', 'datatype': 'func', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'rest', 'acquisition': 'singleband', 'suffix': 'bold', 'datatype': 'func', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 2, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'bval'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 1, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 2, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'bvec'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 1, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 2, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 2, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 1, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'bval'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 1, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'bvec'}, {'subject': '1832999514', 'session': 'PNC1', 'task': 'rest', 'acquisition': 'singleband', 'suffix': 'bold', 'datatype': 'func', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC1', 'task': 'rest', 'acquisition': 'singleband', 'suffix': 'bold', 'datatype': 'func', 'extension': 'json'}]
[10]:
# loop through files to create a bigger dictionary of discrete keys, adding each value to a list
dictionary = {}
# initialize a new dictionary
for e in entities:
# for each dictionary in the list we created above
for k,v in e.items():
#for each set of key-value pairs in each dictionary
#print(k,v)
if k not in dictionary.keys():
#if the key is not in the larger dictionary keys, set the value as value, but in a list
dictionary[k]=[v]
else:
#if the key is in the dictionary, add the new value to the existing value list
dictionary[k].append(v)
print(dictionary)
{'subject': ['PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514'], 'session': ['pepolar', 'pepolar', 'pepolar', 'nofmap', 'phdiff', 'phdiff', 'buds', 'buds', 'buds', 'phases', 'phases', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1'], 'suffix': ['magnitude2', 'phase2', 'magnitude2', 'phase2', 'magnitude1', 'magnitude1', 'phase1', 'phase1', 'T1w', 'T1w', 'T1w', 'T1w', 'idemo', 'frac2back', 'idemo', 'bold', 'frac2back', 'bold', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'bold', 'bold'], 'fmap': ['magnitude2', 'phase2', 'magnitude1', 'phase1'], 'datatype': ['fmap', 'fmap', 'fmap', 'fmap', 'fmap', 'fmap', 'fmap', 'fmap', 'anat', 'anat', 'anat', 'anat', 'func', 'func', 'func', 'func', 'func', 'func', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'func', 'func'], 'extension': ['nii.gz', 'json', 'json', 'nii.gz', 'nii.gz', 'json', 'json', 'nii.gz', 'nii.gz', 'json', 'nii.gz', 'json', 'nii.gz', 'nii.gz', 'json', 'nii.gz', 'json', 'json', 'bval', 'json', 'bvec', 'nii.gz', 'nii.gz', 'json', 'bval', 'bvec', 'nii.gz', 'json'], 'reconstruction': ['refaced', 'refaced'], 'task': ['idemo', 'frac2back', 'idemo', 'rest', 'frac2back', 'rest', 'rest', 'rest'], 'acquisition': ['singleband', 'singleband', 'singleband', 'singleband'], 'run': [2, 1, 2, 1, 2, 2, 1, 1]}
[11]:
#create one dictionary value per key in original dictionary
# loop through dictionary values and create dictionaries for instances of each list
l_dicts = []
for key in dictionary.keys():
# for each list that is the value of the big dictionary:
#print (key)
counts = {} #initialize a new dictionary for # of instances
l_labels = dictionary[key]
#print(l_labels)
for item in l_labels:
#for each item in those lists
if item not in counts.keys():
#if the item is not in the new dictionary, set it to 1
counts[item]= 1
else:
#if it already exists, add 1
counts[item]+= 1
l_dicts.append(counts)
#list of dictionaries where KEYS: BIDS entities values and VALUES: instances of that key
print(l_dicts)
[{'PNC': 11, '1832999514': 28}, {'pepolar': 3, 'nofmap': 1, 'phdiff': 2, 'buds': 3, 'phases': 2, 'PNC2': 18, 'PNC1': 10}, {'magnitude2': 2, 'phase2': 2, 'magnitude1': 2, 'phase1': 2, 'T1w': 4, 'idemo': 2, 'frac2back': 2, 'bold': 4, 'dwi': 8}, {'magnitude2': 1, 'phase2': 1, 'magnitude1': 1, 'phase1': 1}, {'fmap': 8, 'anat': 4, 'func': 8, 'dwi': 8}, {'nii.gz': 12, 'json': 12, 'bval': 2, 'bvec': 2}, {'refaced': 2}, {'idemo': 2, 'frac2back': 2, 'rest': 4}, {'singleband': 4}, {2: 4, 1: 4}]
[13]:
#make a new dictionary with KEYS: BIDS entities (ie: subject, session, etc) and VALUES: dictionaries of ID's and instances
new_dictionary = {}
counter = 0
for key in dictionary.keys():
#assign values from l_dicts to each key
new_dictionary[key] = l_dicts[counter]
counter += 1
print(new_dictionary)
{'subject': {'PNC': 11, '1832999514': 28}, 'session': {'pepolar': 3, 'nofmap': 1, 'phdiff': 2, 'buds': 3, 'phases': 2, 'PNC2': 18, 'PNC1': 10}, 'suffix': {'magnitude2': 2, 'phase2': 2, 'magnitude1': 2, 'phase1': 2, 'T1w': 4, 'idemo': 2, 'frac2back': 2, 'bold': 4, 'dwi': 8}, 'fmap': {'magnitude2': 1, 'phase2': 1, 'magnitude1': 1, 'phase1': 1}, 'datatype': {'fmap': 8, 'anat': 4, 'func': 8, 'dwi': 8}, 'extension': {'nii.gz': 12, 'json': 12, 'bval': 2, 'bvec': 2}, 'reconstruction': {'refaced': 2}, 'task': {'idemo': 2, 'frac2back': 2, 'rest': 4}, 'acquisition': {'singleband': 4}, 'run': {2: 4, 1: 4}}
[15]:
#initialize new list for tuples
l_tups= []
for key in new_dictionary:
#list out all keys
e1 = key
#print(e1)
for s_key in new_dictionary[key]:
#list out all
e2 = s_key
#print(e2)
e3 = new_dictionary[key][s_key]
#print(e3)
l_tups.append((e1,e2,e3))
[17]:
import pandas as pd
[18]:
df = pd.DataFrame(l_tups, columns = ['key', 'val', 'count'])
print(df)
key val count
0 subject PNC 11
1 subject 1832999514 28
2 session pepolar 3
3 session nofmap 1
4 session phdiff 2
5 session buds 3
6 session phases 2
7 session PNC2 18
8 session PNC1 10
9 suffix magnitude2 2
10 suffix phase2 2
11 suffix magnitude1 2
12 suffix phase1 2
13 suffix T1w 4
14 suffix idemo 2
15 suffix frac2back 2
16 suffix bold 4
17 suffix dwi 8
18 fmap magnitude2 1
19 fmap phase2 1
20 fmap magnitude1 1
21 fmap phase1 1
22 datatype fmap 8
23 datatype anat 4
24 datatype func 8
25 datatype dwi 8
26 extension nii.gz 12
27 extension json 12
28 extension bval 2
29 extension bvec 2
30 reconstruction refaced 2
31 task idemo 2
32 task frac2back 2
33 task rest 4
34 acquisition singleband 4
35 run 2 4
36 run 1 4
[ ]: