Source code for rsmtool.utils.files
"""
Utility classes and functions for RSMTool file management.
:author: Jeremy Biggs (jbiggs@ets.org)
:author: Anastassia Loukina (aloukina@ets.org)
:author: Nitin Madnani (nmadnani@ets.org)
:organization: ETS
"""
import json
import re
from glob import glob
from pathlib import Path
from os.path import join
from .constants import POSSIBLE_EXTENSIONS
def has_files_with_extension(directory, ext):
"""
Check if the directory has any files with the given extension.
Parameters
----------
directory : str
The path to the directory where output is located.
ext : str
The the given extension.
Returns
-------
bool
True if directory contains files with given extension,
else False.
"""
files_with_extension = glob(join(directory, '*.{}'.format(ext)))
return len(files_with_extension) > 0
def get_output_directory_extension(directory, experiment_id):
"""
Check the output directory to determine what file extensions
exist. If more than one extension (in the possible list of
extensions) exists, then raise a ValueError. Otherwise,
return the one file extension. If no extensions can be found, then
`csv` will be returned by default.
Possible extensions include: `csv`, `tsv`, `xlsx`. Files in the
directory with none of these extensions will be ignored.
Parameters
----------
directory : str
The path to the directory where output is located.
experiment_id : str
The ID of the experiment.
Returns
-------
extension : {'csv', 'tsv', 'xlsx'}
The extension that output files in this directory
end with.
Raises
------
ValueError
If any files in the directory have different extensions,
and are in the list of possible output extensions.
"""
extension = 'csv'
extensions_identified = {ext for ext in POSSIBLE_EXTENSIONS
if has_files_with_extension(directory, ext)}
if len(extensions_identified) > 1:
raise ValueError('Some of the files in the experiment output directory (`{}`) '
'for `{}` have different extensions. All files in this directory '
'must have the same extension. The following extensions were '
'identified : {}'.format(directory,
experiment_id,
', '.join(extensions_identified)))
elif len(extensions_identified) == 1:
extension = list(extensions_identified)[0]
return extension