Source code for dbcollection.utils.os_dir

"""
This module contains methods for parsing directories
"""


from __future__ import print_function
import os
import progressbar


img_extensions = [
    '.jpg', '.JPG', '.jpeg', '.JPEG',
    '.png', '.PNG',
    '.ppm', '.PPM',
    '.bmp', '.BMP',
]


[docs]def dir_get_size(dir_path): """Returns the number of files and subfolders in a directory. Parameters ---------- dir_path : str Directory path. Returns ------- int Number of files in the folder. int Number of folders in the path. """ files = folders = 0 for _, dirname, filenames in os.walk(dir_path): files += len(filenames) folders += len(dirname) return files, folders
[docs]def construct_set_from_dir(dir_path, verbose=True): """Build a dataset from a directory. This method creates a dataset from a root folder. The first child folders compose the dataset's classes and all files inside correspond to the data. Parameters ---------- dir_path : str Directory path to create the set structure from. verbose : bool, optional Prints messages to the screen (if True). Returns ------- dict Set structure with keys as class names and values as image filenames. """ assert os.path.isdir(dir_path), 'Invalid path: {}'.format(dir_path) def is_image_file(filename): """Check if a filename has an extension of an image.""" return any(filename.endswith(extension) for extension in img_extensions) # init set set_data = {} if verbose: print('Fetching files + subdirs from: {}'.format(dir_path)) _, num_folders = dir_get_size(dir_path) counter = 0 prgbar = progressbar.ProgressBar(max_value=num_folders) # cycle all elems in a dir for dname in os.listdir(dir_path): subdir_path = os.path.join(dir_path, dname) # check if it is a dir if not os.path.isdir(subdir_path): continue # cycle all elems in a dir for _, _, fnames in sorted(os.walk(subdir_path)): class_name = dname set_data[class_name] = [] # cycle all files inside the dir for fname in fnames: if is_image_file(fname): set_data[class_name].append(fname) if verbose: # update progress bar counter += 1 prgbar.update(counter) return set_data
[docs]def construct_dataset_from_dir(dir_path, verbose=True): """Build a dataset from a directory. This method creates a dataset from a root folder. The first child folders compose the dataset's partition into train/val/test/etc. Then, child folders of these compose the dataset's classes and all files inside correspond to the data. Parameters ---------- dir_path : str Directory path to create the dataset structure from. verbose : bool, optional Prints messages to the screen (if True). Returns ------- dict Dataset structure. """ assert os.path.isdir(dir_path), 'Invalid path: {}'.format(dir_path) # init images list dataset = {} # cycle all elems in a dir for set_name in os.listdir(dir_path): dir_set = os.path.join(dir_path, set_name) # check if it is a dir if not os.path.isdir(dir_set): continue # fetch all data for this set folder if verbose: print('Processing dir: {}'.format(dir_set)) dataset[set_name] = construct_set_from_dir(dir_set) return dataset