Source code for driftai.approach

import hashlib
import datetime
import re
import shutil
import subprocess
from datetime import datetime
from pathlib import Path
from abc import abstractmethod, ABC, abstractproperty

from driftai.utils import maybe_make_dir, str_to_date, to_camel_case
from driftai.db import Persistent
from driftai.data import SubDataset
from driftai.run import Run
from driftai.result_report import ResultReport
from driftai.exceptions import OptAppInstanceExistsException

[docs]class Approach(Persistent): """ Responsible of containg approach information as well as generate approach tree structure """ _EMPTY_APPROACH = """ from driftai import RunnableApproach from driftai.run import single_run @single_run class {}Approach(RunnableApproach): @property def parameters(self): \"\"\" Declare your parameters here \"\"\" return [] def learn(self, data, parameters): \"\"\" Define, train and return your model here \"\"\" return None # Return a trained model def inference(self, model, data): \"\"\" Use the injected model to make predictions with the data \"\"\" return None # Return the prediction """ def __init__(self, project, name, subdataset=None, path=None, creation_date=None): """ Parameters ---------- project : Project DriftAI Project name : str Approach name subdataset: Subdataset DriftAI Subdataset which contains the training instances path: str, optional Path to store the approch. Default value is <project.path>/approaches/<name> creation_date: str, optional Date of the approach creation. Should not be set manually """ self.project = project self.name = name.replace("-", "_") # Replace - to avoid import errors self.subdataset = subdataset self.path = path or str(Path(self.project.path, "approaches")) self.script_path = Path(self.path, self.name + ".py") self.runs = [] self.creation_date = str_to_date(creation_date) or datetime.now() if creation_date is None and Approach.collection().exists(self.id): raise OptAppInstanceExistsException("Approach") else: self.create_structure() @property def id(self): s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', self.name) return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
[docs] @staticmethod def collection(): """ Get table containing approaches Returns ------- TinyDB instance """ from driftai.db import Collections return Collections.approaches()
[docs] def create_structure(self): """ Generate approach directory structure """ if not Path(self.path).exists(): Path(self.path).mkdir() # Create script if not self.script_path.exists(): with self.script_path.open("w") as f: f.write(Approach._EMPTY_APPROACH.format(to_camel_case(self.id)))
[docs] @classmethod def load_from_data(cls, data): """ Load approach from JSON coming from TinyDB Parameters ---------- data: dict Dict containing the approach instance summary Returns -------- Approach Approach instance generated from JSON data """ from driftai.project import Project from driftai.db import Collections subdataset = Collections.subdatasets().get(data["subdataset"]) a = Approach( project=Project.load(), name=data["name"], subdataset=subdataset, path=str(Path(data["path"]).absolute()), creation_date=data["creation_date"]) a.runs = [Run.load_from_data(r, subdataset=subdataset) for r in data["runs"]] return a
@property def status(self): """ Get the status of approach runs Returns ------- dict Dictionary containing the done and left runs ... :: { "done": <are ther pendent runs?>, "total_runs": <all runs>, "done_runs": <done runs>, "progres_bar": <string containing the progress bar> } """ total_runs = len(self.runs) done_runs = sum([ 1 for r in self.runs if r.done() ]) percent = ("{}").format(int(100 * (done_runs / total_runs))) filledLength = int(40 * done_runs // total_runs) bar = "=" * filledLength + '>' + '-' * (40 - filledLength) return { "done": done_runs == total_runs, "total_runs": total_runs, "done_runs": done_runs, "progress_bar": " [{}] {} %".format(bar, percent) }
[docs] def get_last_run_date(self): """Get the date of the last run""" last_date = datetime.min for run in self.runs: run.finish_date = str_to_date(run.finish_date) if run.done() and \ run.finish_date is not None and \ run.finish_date > last_date: last_date = run.finish_date if last_date != datetime.min: return last_date else: return None
[docs] def get_info(self): """ Get the info to serialize a Dataset instance Returns ------- dict Dictionariy containing a Approach object summary:: { "id": <unique identifier>, "project": <project containing the approach>, "subdataset": <subdataset which approach will run against>, "name": <approach name>, "path": <approach file system location>, "runs": <runs which runnable approach will execute>, "creation_date": <approach creation date> } """ return { "id": self.id, "project": self.project.id, "subdataset": self.subdataset.id, "name": self.name, "path": self.path, "runs": [r.get_info() for r in self.runs], "creation_date": str(self.creation_date) }
[docs]class RunnableApproach(ABC): """ Object responsible to handle an approach execution Inherit from this class in order to create your own approach and run it """ def __init__(self, **kwargs): self.runner = kwargs["runner"] print("Loading approach data") self.approach = Approach.load(self._get_approach_id_from_class())
[docs] def run(self, resume=False): """ Run the approach Parameters ---------- resume: bool If resume = True only pending runs will be executed, otherwise run will be generated and executed """ self.runner.run(self, resume)
def _get_approach_id_from_class(self): # Get id from runnable approach class # Example: RandomForestApproach -> random_forest class_name = self.__class__.__name__ t = re.match(r"(\w+)Approach$", class_name) if t: approach_name = t.group(1) approach_name = re.sub("([A-Z])", r"_\1", approach_name) return approach_name.strip("_").lower() else: raise ValueError("Wrong class name") @property def parameters(self): """ Define your parameters range here Returns ------- list of AbstractParameter Parameters used to generate the runs """ return []
[docs] @abstractmethod def learn(self, parameters, data): """ Define the train logic for the model here Returns ------- any The trained model """ pass
[docs] @abstractmethod def inference(self, data): """ Define the inference logic here Returns ------- pandas.Series or numpy.array Containing the predicted labels """ pass