Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ Command line arguments
An optional extension to filter the DICOM files of interest from the
<inputDir>.

[-f|--filefilter <fileFilter>]
A list of comma separated string filters to apply across the input file space

[-d|--dirFilter <dirFilter>]
A list of comma separated string filters to apply across the input dir space

[-O|--outputDir <outputDir>]
The output root directory that will contain a tree structure identical
to the input directory, and each "leaf" node will contain the analysis
Expand Down Expand Up @@ -155,7 +161,7 @@ Perform a DICOM anonymization by processing specific tags:
.. code:: bash

pfdicom_tagSub \
-e dcm \
-f ".dcm" \
-I /var/www/html/normsmall \
-O /var/www/html/anon \
--tagStruct '
Expand Down
20 changes: 19 additions & 1 deletion bin/pfdicom_tagSub
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def synopsis(ab_shortOnly = False):
-I|--inputDir <inputDir> \\
[-i|--inputFile <inputFile>] \\
[-e|--extension <DICOMextension>] \\
[-f|--fileFilter <fileFilter>] \\
[-d|--dirFilter <dirFilter>] \\
[-F|--tagFile <tagFile>] | [-T|--tagStruct <tagStruct>] \\
[--threads <numThreads>] \\
-O|--outputDir <outputDir> \\
Expand All @@ -86,7 +88,7 @@ def synopsis(ab_shortOnly = False):
BRIEF EXAMPLE

pfdicom_tagSub \\
-e dcm \\
-f ".dcm" \\
-I /var/www/html/normsmall \\
-O /var/www/html/anon \\
--tagStruct '
Expand Down Expand Up @@ -162,6 +164,12 @@ def synopsis(ab_shortOnly = False):
An optional extension to filter the DICOM files of interest from the
<inputDir>.

[-f|--filefilter <fileFilter>]
A list of comma separated string filters to apply across the input file space

[-d|--dirFilter <dirFilter>]
A list of comma separated string filters to apply across the input dir space

[-O|--outputDir <outputDir>]
The output root directory that will contain a tree structure identical
to the input directory, and each "leaf" node will contain the analysis
Expand Down Expand Up @@ -292,6 +300,14 @@ parser.add_argument("-e", "--extension",
help = "DICOM file extension",
dest = 'extension',
default = '')
parser.add_argument("-f", "--fileFilter",
help = "a list of comma separated string filters to apply across the input file space",
dest = 'fileFilter',
default = '')
parser.add_argument("-d", "--dirFilter",
help = "a list of comma separated string filters to apply across the input dir space",
dest = 'dirFilter',
default = '')
parser.add_argument("-F", "--tagFile",
help = "JSON formatted file containing tags to sub",
dest = 'tagFile',
Expand Down Expand Up @@ -370,6 +386,8 @@ pf_dicom_tagSub = pfdicom_tagSub.pfdicom_tagSub(
inputDir = args.inputDir,
inputFile = args.inputFile,
extension = args.extension,
fileFilter = args.fileFilter,
dirFilter = args.dirFilter,
outputDir = args.outputDir,
outputFileStem = args.outputFileStem,
outputLeafDir = args.outputLeafDir,
Expand Down
107 changes: 107 additions & 0 deletions pfdicom_tagSub/pfdicom_tagSub.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def declare_selfvars(self):
self.str_tagStruct = ''
self.str_tagFile = ''
self.d_tagStruct = {}
self.fileFilter = ''
self.dirFilter = ''

self.dp = None
self.log = None
Expand Down Expand Up @@ -92,6 +94,8 @@ def outputFile_process(str_outputFile):
if key == 'tagFile': tagFile_process(value)
if key == 'tagStruct': tagStruct_process(value)
if key == 'verbosity': self.verbosityLevel = int(value)
if key == 'fileFilter': self.fileFilter = value
if key == 'dirFilter': self.dirFilter = value

# Set logging
self.dp = pfmisc.debug(
Expand Down Expand Up @@ -273,6 +277,102 @@ def tags_substitute(self, **kwargs):
)
return d_tagSub

def FS_filter(self, at_data, *args, **kwargs) -> dict:
"""
Apply a filter to the string space of file and directory
representations.

The purpose of this method is to reduce the original space of

"<path>": [<"filesToProcess">]

to only those paths and files that are relevant to the operation being
performed. Two filters are understood, a `fileFilter` that filters
filenames that match any of the passed search substrings from the CLI
`--fileFilter`, and a`dirFilter` that filters directories whose
leaf node match any of the passed `--dirFilter` substrings.

The effect of these filters is hierarchical. First, the `fileFilter`
is applied across the space of files for a given directory path. The
files are subject to a logical OR operation across the comma separated
filter argument. Thus, a `fileFilter` of "png,jpg,body" will filter
all files that have the substrings of "png" OR "jpg" OR "body" in their
filenames.

Next, if a `dirFilter` has been specified, the current string path
corresponding to the filenames being filtered is considered. Each
string in the comma separated `dirFilter` list is exacted, and if
the basename of the working directory contains the filter substring,
the (filtered) files are conserved. If the basename of the working
directory does not contain any of the `dirFilter` substrings, the
file list is discarded.

Thus, a `dirFilter` of "100307,100556" and a fileFilter of "png,jpg"
will reduce the space of files to process to ONLY files that have
a parent directory of "100307" OR "100556" AND that contain either the
string "png" OR "jpg" in their file names.
"""

b_status : bool = True
l_file : list = []
l_dirHits : list = []
l_dir : list = []
str_path : str = at_data[0]
al_file : list = at_data[1]

if len(self.fileFilter):
al_file = [x \
for y in self.fileFilter.split(',') \
for x in al_file if y in x]

if len(self.dirFilter):
l_dirHits = [os.path.basename(str_path) \
for y in self.dirFilter.split(',') \
if y in os.path.basename(str_path)]
if len(l_dirHits):
# Remove any duplicates in the l_dirHits:. Duplicates can
# occur if the tokens in the filter expression map more than
# once into the leaf node in the <str_path>, as a path that is
#
# /some/dir/in/the/space/1234567
#
# and a search filter on the dirspace of "123,567"
[l_dir.append(x) for x in l_dirHits if x not in l_dir]
else:
# If no dir hits for this dir, then we zero out the
# file filter
al_file = []

if len(al_file):
al_file.sort()
l_file = al_file
b_status = True
else:
self.dp.qprint( "No valid files to analyze found in path %s!" %
str_path, comms = 'warn', level = 5)
l_file = None
b_status = False
return {
'status': b_status,
'l_file': l_file
}

def filterFileHitList(self) -> dict:
"""
Entry point for filtering the file filter list
at each directory node.
"""
d_filterFileHitList = self.pf_tree.tree_process(
inputReadCallback = None,
analysisCallback = self.FS_filter,
outputWriteCallback = None,
applyResultsTo = 'inputTree',
applyKey = 'l_file',
persistAnalysisResults = True
)
return d_filterFileHitList


def run(self, *args, **kwargs):
"""
The run method calls the base class run() to
Expand All @@ -285,6 +385,7 @@ def run(self, *args, **kwargs):
b_status = True
d_tagSub = {}
b_timerStart = False
d_filter = {}

self.dp.qprint(
"Starting pfdicom_tagSub run... (please be patient while running)",
Expand All @@ -306,7 +407,12 @@ def run(self, *args, **kwargs):
timerStart = False
)


if d_pfdicom['status']:
if len(self.fileFilter) or len(self.dirFilter):
d_filter = self.filterFileHitList()
b_status = d_filter['status']

str_startDir = os.getcwd()
os.chdir(self.str_inputDir)
if b_status:
Expand All @@ -318,6 +424,7 @@ def run(self, *args, **kwargs):
'status': b_status,
'd_pfdicom': d_pfdicom,
'd_tagSub': d_tagSub,
'd_filter': d_filter,
'runTime': other.toc()
}

Expand Down