Key Values Proof of Concept

[1]:
# Install bids
pip install bids
  File "<ipython-input-1-7aafa4a1451e>", line 2
    pip install bids
        ^
SyntaxError: invalid syntax

[4]:
# Import modules
from bids import BIDSLayout
from bids.tests import get_test_data_path
from bids.layout import parse_file_entities
import os
import glob
import json
[5]:
root_dir = '/Users/krmurtha/Desktop/informatics/CuBIDS/'
bids_dir = 'data/'
[6]:
# use glob to list all of the files recursively

all_files=glob.glob(root_dir + bids_dir + '**/**/**/*')
print(all_files)
['/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-pepolar/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-pepolar/fmap', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-pepolar/anat', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-nofmap/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-phdiff/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-phdiff/fmap', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-buds/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-buds/fmap', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-buds/anat', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-phases/dwi', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/DSDTI_fmap/sub-PNC/ses-phases/fmap', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_magnitude2.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_phase2.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_magnitude2.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_phase2.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_magnitude1.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_magnitude1.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_phase1.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/fmap/sub-1832999514_ses-PNC2_phase1.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/anat/sub-1832999514_ses-PNC2_T1w.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/anat/sub-1832999514_ses-PNC2_rec-refaced_T1w.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/anat/sub-1832999514_ses-PNC2_rec-refaced_T1w.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/anat/sub-1832999514_ses-PNC2_T1w.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-idemo.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-frac2back.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-idemo.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-rest_acq-singleband_bold.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-frac2back.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC2/func/sub-1832999514_ses-PNC2_task-rest_acq-singleband_bold.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-02_dwi.bval', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-01_dwi.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-02_dwi.bvec', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-01_dwi.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-02_dwi.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-02_dwi.json', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-01_dwi.bval', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/dwi/sub-1832999514_ses-PNC1_run-01_dwi.bvec', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/func/sub-1832999514_ses-PNC1_task-rest_acq-singleband_task-rest_bold.nii.gz', '/Users/krmurtha/Desktop/informatics/CuBIDS/data/sub-1832999514/ses-PNC1/func/sub-1832999514_ses-PNC1_task-rest_acq-singleband_task-rest_bold.json']
[7]:
# use pybids to extract BIDS entities from single subject

path = all_files[0]
dict1= parse_file_entities(path)
print(dict1)
{'subject': 'PNC', 'session': 'pepolar'}
/Users/krmurtha/anaconda3/envs/cubids/lib/python3.8/site-packages/bids/layout/models.py:148: FutureWarning: The 'extension' entity currently excludes the leading dot ('.'). As of version 0.14.0, it will include the leading dot. To suppress this warning and include the leading dot, use `bids.config.set_option('extension_initial_dot', True)`.
  warnings.warn("The 'extension' entity currently excludes the leading dot ('.'). "
[8]:
# test another example

dict2= parse_file_entities(all_files[14])
print(dict2)
{'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase2', 'fmap': 'phase2', 'datatype': 'fmap', 'extension': 'nii.gz'}
[9]:
entities = []
#initialize list

for file in all_files:
#for each file in the list, parse the information into a dictionary and add it to the list we just initialized
    result = parse_file_entities(file)

    entities.append(result)
    #entities.add(string_result)
print(entities)
[{'subject': 'PNC', 'session': 'pepolar'}, {'subject': 'PNC', 'session': 'pepolar'}, {'subject': 'PNC', 'session': 'pepolar'}, {'subject': 'PNC', 'session': 'nofmap'}, {'subject': 'PNC', 'session': 'phdiff'}, {'subject': 'PNC', 'session': 'phdiff'}, {'subject': 'PNC', 'session': 'buds'}, {'subject': 'PNC', 'session': 'buds'}, {'subject': 'PNC', 'session': 'buds'}, {'subject': 'PNC', 'session': 'phases'}, {'subject': 'PNC', 'session': 'phases'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'magnitude2', 'fmap': 'magnitude2', 'datatype': 'fmap', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase2', 'datatype': 'fmap', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'magnitude2', 'datatype': 'fmap', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase2', 'fmap': 'phase2', 'datatype': 'fmap', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'magnitude1', 'fmap': 'magnitude1', 'datatype': 'fmap', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'magnitude1', 'datatype': 'fmap', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase1', 'datatype': 'fmap', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'phase1', 'fmap': 'phase1', 'datatype': 'fmap', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'T1w', 'datatype': 'anat', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'reconstruction': 'refaced', 'suffix': 'T1w', 'datatype': 'anat', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'reconstruction': 'refaced', 'suffix': 'T1w', 'datatype': 'anat', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'suffix': 'T1w', 'datatype': 'anat', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'idemo', 'suffix': 'idemo', 'datatype': 'func', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'frac2back', 'suffix': 'frac2back', 'datatype': 'func', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'idemo', 'suffix': 'idemo', 'datatype': 'func', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'rest', 'acquisition': 'singleband', 'suffix': 'bold', 'datatype': 'func', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'frac2back', 'suffix': 'frac2back', 'datatype': 'func', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC2', 'task': 'rest', 'acquisition': 'singleband', 'suffix': 'bold', 'datatype': 'func', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 2, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'bval'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 1, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 2, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'bvec'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 1, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 2, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 2, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'json'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 1, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'bval'}, {'subject': '1832999514', 'session': 'PNC1', 'run': 1, 'suffix': 'dwi', 'datatype': 'dwi', 'extension': 'bvec'}, {'subject': '1832999514', 'session': 'PNC1', 'task': 'rest', 'acquisition': 'singleband', 'suffix': 'bold', 'datatype': 'func', 'extension': 'nii.gz'}, {'subject': '1832999514', 'session': 'PNC1', 'task': 'rest', 'acquisition': 'singleband', 'suffix': 'bold', 'datatype': 'func', 'extension': 'json'}]
[10]:

# loop through files to create a bigger dictionary of discrete keys, adding each value to a list dictionary = {} # initialize a new dictionary for e in entities: # for each dictionary in the list we created above for k,v in e.items(): #for each set of key-value pairs in each dictionary #print(k,v) if k not in dictionary.keys(): #if the key is not in the larger dictionary keys, set the value as value, but in a list dictionary[k]=[v] else: #if the key is in the dictionary, add the new value to the existing value list dictionary[k].append(v) print(dictionary)
{'subject': ['PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', 'PNC', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514', '1832999514'], 'session': ['pepolar', 'pepolar', 'pepolar', 'nofmap', 'phdiff', 'phdiff', 'buds', 'buds', 'buds', 'phases', 'phases', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC2', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1', 'PNC1'], 'suffix': ['magnitude2', 'phase2', 'magnitude2', 'phase2', 'magnitude1', 'magnitude1', 'phase1', 'phase1', 'T1w', 'T1w', 'T1w', 'T1w', 'idemo', 'frac2back', 'idemo', 'bold', 'frac2back', 'bold', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'bold', 'bold'], 'fmap': ['magnitude2', 'phase2', 'magnitude1', 'phase1'], 'datatype': ['fmap', 'fmap', 'fmap', 'fmap', 'fmap', 'fmap', 'fmap', 'fmap', 'anat', 'anat', 'anat', 'anat', 'func', 'func', 'func', 'func', 'func', 'func', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'dwi', 'func', 'func'], 'extension': ['nii.gz', 'json', 'json', 'nii.gz', 'nii.gz', 'json', 'json', 'nii.gz', 'nii.gz', 'json', 'nii.gz', 'json', 'nii.gz', 'nii.gz', 'json', 'nii.gz', 'json', 'json', 'bval', 'json', 'bvec', 'nii.gz', 'nii.gz', 'json', 'bval', 'bvec', 'nii.gz', 'json'], 'reconstruction': ['refaced', 'refaced'], 'task': ['idemo', 'frac2back', 'idemo', 'rest', 'frac2back', 'rest', 'rest', 'rest'], 'acquisition': ['singleband', 'singleband', 'singleband', 'singleband'], 'run': [2, 1, 2, 1, 2, 2, 1, 1]}
[11]:
#create one dictionary value per key in original dictionary
# loop through dictionary values and create dictionaries for instances of each list
l_dicts = []
for key in dictionary.keys():
# for each list that is the value of the big dictionary:
    #print (key)
    counts = {}  #initialize a new dictionary for # of instances
    l_labels = dictionary[key]
    #print(l_labels)
    for item in l_labels:
    #for each item in those lists
        if item not in counts.keys():
        #if the item is not in the new dictionary, set it to 1
            counts[item]= 1
        else:
        #if it already exists, add 1
            counts[item]+= 1
    l_dicts.append(counts)
#list of dictionaries where KEYS: BIDS entities values and VALUES: instances of that key
print(l_dicts)


[{'PNC': 11, '1832999514': 28}, {'pepolar': 3, 'nofmap': 1, 'phdiff': 2, 'buds': 3, 'phases': 2, 'PNC2': 18, 'PNC1': 10}, {'magnitude2': 2, 'phase2': 2, 'magnitude1': 2, 'phase1': 2, 'T1w': 4, 'idemo': 2, 'frac2back': 2, 'bold': 4, 'dwi': 8}, {'magnitude2': 1, 'phase2': 1, 'magnitude1': 1, 'phase1': 1}, {'fmap': 8, 'anat': 4, 'func': 8, 'dwi': 8}, {'nii.gz': 12, 'json': 12, 'bval': 2, 'bvec': 2}, {'refaced': 2}, {'idemo': 2, 'frac2back': 2, 'rest': 4}, {'singleband': 4}, {2: 4, 1: 4}]
[13]:
#make a new dictionary with KEYS: BIDS entities (ie: subject, session, etc) and VALUES: dictionaries of ID's and instances

new_dictionary = {}
counter = 0
for key in dictionary.keys():
    #assign values from l_dicts to each key
    new_dictionary[key] = l_dicts[counter]
    counter += 1

print(new_dictionary)

{'subject': {'PNC': 11, '1832999514': 28}, 'session': {'pepolar': 3, 'nofmap': 1, 'phdiff': 2, 'buds': 3, 'phases': 2, 'PNC2': 18, 'PNC1': 10}, 'suffix': {'magnitude2': 2, 'phase2': 2, 'magnitude1': 2, 'phase1': 2, 'T1w': 4, 'idemo': 2, 'frac2back': 2, 'bold': 4, 'dwi': 8}, 'fmap': {'magnitude2': 1, 'phase2': 1, 'magnitude1': 1, 'phase1': 1}, 'datatype': {'fmap': 8, 'anat': 4, 'func': 8, 'dwi': 8}, 'extension': {'nii.gz': 12, 'json': 12, 'bval': 2, 'bvec': 2}, 'reconstruction': {'refaced': 2}, 'task': {'idemo': 2, 'frac2back': 2, 'rest': 4}, 'acquisition': {'singleband': 4}, 'run': {2: 4, 1: 4}}
[15]:
#initialize new list for tuples
l_tups= []
for key in new_dictionary:
    #list out all keys
    e1 = key
    #print(e1)
    for s_key in new_dictionary[key]:
        #list out all
        e2 = s_key
        #print(e2)
        e3 = new_dictionary[key][s_key]
        #print(e3)
        l_tups.append((e1,e2,e3))
[17]:
import pandas as pd
[18]:
df = pd.DataFrame(l_tups, columns = ['key', 'val', 'count'])
print(df)
               key         val  count
0          subject         PNC     11
1          subject  1832999514     28
2          session     pepolar      3
3          session      nofmap      1
4          session      phdiff      2
5          session        buds      3
6          session      phases      2
7          session        PNC2     18
8          session        PNC1     10
9           suffix  magnitude2      2
10          suffix      phase2      2
11          suffix  magnitude1      2
12          suffix      phase1      2
13          suffix         T1w      4
14          suffix       idemo      2
15          suffix   frac2back      2
16          suffix        bold      4
17          suffix         dwi      8
18            fmap  magnitude2      1
19            fmap      phase2      1
20            fmap  magnitude1      1
21            fmap      phase1      1
22        datatype        fmap      8
23        datatype        anat      4
24        datatype        func      8
25        datatype         dwi      8
26       extension      nii.gz     12
27       extension        json     12
28       extension        bval      2
29       extension        bvec      2
30  reconstruction     refaced      2
31            task       idemo      2
32            task   frac2back      2
33            task        rest      4
34     acquisition  singleband      4
35             run           2      4
36             run           1      4
[ ]: