Source code for minus80.Tools
from .Config import cf
import os
import shutil
from glob import glob
from collections import defaultdict
from pprint import pprint
__all__ = ['available', 'delete']
[docs]def get_files(dtype=None, name=None, fullpath=False):
'''
List the files in the minus80 directory
associated with a dtype and a name.
Parameters
----------
name: str, required
The name of the dataset. Note: accepts glob arguments.
dtype: str, default=None
The data type of the dataset. E.g.: Cohort.
If None, a wildward will be used to retrieve all
dtypes with the name will be returned.
fullpath: bool, default=False
If true, full paths to files will be returned
if false, only filenames will be returned.
.. note:: This will only return top level files which sometimes
will be directories.
'''
bdir = os.path.expanduser(cf.options.basedir)
if name is None:
name = "*"
if dtype is None:
dtype = "*"
data_dir = os.path.join(bdir, 'databases', f'{dtype}.{name}')
files = sorted(glob(data_dir))
if fullpath:
files = files
else:
files = [os.path.basename(x) for x in files]
return files
[docs]def available(dtype=None,name=None):
'''
Reports the available datasets **Frozen** in the minus80
database.
Parameters
----------
dtype : str
Each dataset has a datatype associated with it. E.g.:
`Cohort`. If no dtype is specified, all available dtypes
will be returned.
name : str, default:'*'
The name of the dataset you want to check is available.
The default value is the wildcard '*' which will return
all available datasets with the specified dtype.
Returns
-------
bool, None
If both dtype and name are specified, a bool is returned
indiciating if the dataset is available. Otherise a formatted
table is printed and None is returned.
'''
files = get_files(dtype=dtype,name=name)
bdir = os.path.expanduser(cf.options.basedir)
print(f'Using basedir: {bdir}')
# handle case where bool is returns when both params specified
if dtype != None and name != None:
if len(files) > 0:
return True
else:
return False
else:
# Print message if nothing is here
if len(files) == 0: # pragma: no cover
print("--- Nothing here yet ---")
return None
# group by dtype and print
datasets = defaultdict(list)
for f in files:
dtype,name = f.split('.')
datasets[dtype].append(name)
# Print a formatted table
for dtype, names in datasets.items():
print(f"--- {dtype}: -----------------")
for i, name in enumerate(names, 1):
print(f'\t{i}. {name}')
[docs]def delete(dtype=None, name=None, force=False):
'''
Deletes files associated with Minus80 datasets.
Parameters
----------
name : str
The name of the dataset you want to delete
dtype : str
Each dataset has a datatype associated with it. E.g.:
`Cohort`. If no dtype is specified, all available dtypes
will be returned.
force : bool, default: False
If False, the function will list off the files it wants to delete.
If True, it will do what you tell it to do and just delete things
(not recommended).
Returns
-------
int
Returns the number of files deleted
.. warning:: This is damaging. Deleted datasets cannot be (easily) recovered.
'''
if not available(dtype,name):
print(f'{dtype}.{name} does not exist -- nothing deleted')
return
# Get a filecard for all the minus80 filenames that match the
# type and the name
files = get_files(name=name, dtype=dtype)
if force != True: # pragma: no cover
print(f'Are you sure you want to delete {dtype}.{name}?:\n')
if input('[y/n]: ').upper() != 'Y':
print('Nothing deleted.')
return 0
# delete them
num_deleted = 0
for filename in files:
bdir = os.path.expanduser(cf.options.basedir)
data_dir = os.path.join(bdir, 'databases')
filename = os.path.join(data_dir, filename)
# delete it
shutil.rmtree(filename)
num_deleted += 1
return num_deleted