Source code for driftai.project
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Optimise approaches to solve machine learning problems.
TODO: Change random seed to be -1 by default, which (in all scripts) will pick a random one, as in eval_approach.
TODO: Put time_estimate out of the pipeline, in a different library (it already existed).
"""
from datetime import datetime
import hashlib
import os
from pathlib import Path
from driftai.exceptions import OptAppProjectDirNotExistsException, OptAppProjectFileNotExistsException, \
OptAppProjectElementNotExistsException, OptAppProjectNameExistsException, OptAppProjectLoadPathIsNotDirException, OptAppProjectWrongInfoFileStructureException
from driftai.utils import check_folder_structure, str_to_date
from driftai.approach import Approach
from driftai.data import SubDataset
from driftai.db import Database, Collections, DatabaseInjector
__author__ = "Pablo A. Rosado and Francesc Guitart"
__copyright__ = "Copyright 2018, GFT IT Consulting"
__credits__ = ["Pablo A. Rosado", "Francesc Guitart"]
__license__ = "Firefox"
__version__ = "0.0.1"
__maintainer__ = "Francesc Guitart"
__email__ = "francesc.guitart@gft.com"
__status__ = "Developement"
[docs]class Project(object):
dir_components = {
"project_files": "dir",
"approaches": "dir",
"driftai.db": "file"
}
dir_exceptions = {
"file": OptAppProjectFileNotExistsException,
"dir": OptAppProjectDirNotExistsException,
"default": OptAppProjectElementNotExistsException
}
def __init__(self, name=None, path=None, creation_date=None):
"""
Parameters
----------
name: str
Project name. Default: untitled_driftai_project
path: str
Project path
creation_date: datetime
Creation date. Should not be set manually
Raises
------
OptAppProjectNameExistsException
In case project name already exists
"""
self.name = self._get_name(path) if not name else name
self.path = Path("." if not path else path).absolute()
if self.path.stem != name:
self.path = str(self.path.joinpath(self.name))
else:
self.path = str(self.path)
self.creation_date = datetime.now() if not creation_date else creation_date
if creation_date is None and self.exists():
raise OptAppProjectNameExistsException(self.path)
else:
self._maybe_create_path()
@property
def id(self):
return self.name
[docs] def save(self):
# Workaround when project is created
# Project is created at <current_dir>/project_name while user is at <current_dir>
# We must force the database to create the new database in <current_dir>/project_name/driftai.db
db = Database(self.path)
db.insert(self.get_info())
db.close()
[docs] @classmethod
def load(cls):
"""
Creates a project from a tinydb file
Parameters
----------
path: str
Project location
Raises
------
OptAppProjectWrongInfoFileStructureException
In case project structure is not valid
Returns
-------
Project
Returns the loaded project
"""
def check_project_content_structure(params):
if not isinstance(params, dict):
return False
dict_contents = { "path", "creation_date", "name" }
return dict_contents.intersection(list(params.keys())) == dict_contents
project_content = DatabaseInjector.db().all()[0]
if not check_project_content_structure(project_content):
raise OptAppProjectWrongInfoFileStructureException
return cls(**project_content)
[docs] def exists(self):
"""
Check if project exists
Returns
-------
bool
True if project already exists
"""
return Path(self.path).is_dir()
def _create_project_structure(self):
# Creates project structure
project_path = Path(self.path)
project_path.mkdir()
for element in self.dir_components.keys():
path = project_path.joinpath(element)
if self.dir_components[element] == "dir":
path.mkdir()
elif self.dir_components[element] == "file":
path.touch()
else:
raise Exception("Error creating project structure")
def _get_name(self, path):
# Generates a default project name
temp_name = "untitled_driftai_project"
temp_path = Path(path, temp_name).absolute()
if temp_path.is_dir():
i = 1
while temp_path.is_dir():
temp_name = "untitled_driftai_project_{}".format(i)
temp_path = Path(path, temp_name).absolute()
i += 1
return temp_name
def _maybe_create_path(self):
# When loading a project check directory tree structure else, when creating a new project
# creates tree structure
if self.exists():
check_folder_structure(self.path,
Project.dir_components,
Project.dir_exceptions)
else:
self._create_project_structure()
self.save()
[docs] def get_info(self):
"""
Get project info
Returns
-------
dict
Dictionary containing the project info::
{
"path": <project_location>,
"creation_date": <project's creation date>,
"name": <project name>
}
"""
return {
"path": self.path,
"creation_date": str(self.creation_date),
"name": self.name
}
[docs] def get_subdatasets(self):
"""
Get all subdatasets
Returns
-------
list(Subdataset)
All subdatasets related to current project
"""
return [SubDataset.load_from_data(p) for p in Collections.subdatasets().all()]
[docs] def get_last_subdataset(self):
"""
Get last subdataset
Returns
-------
driftai.data.SubDataset
"""
subdatasets = self.get_subdatasets()
return sorted(subdatasets, key=lambda s: s.creation_date)[-1]
[docs] def is_running(self):
# TODO: Check if any approach is running
# last_subdataset = self.get_last_subdataset()
# rp = RunPool(subdataset_path=last_subdataset)
# return rp.has_next()
pass