Source code for emobpy.database

"""
This module contains data organisation classes to read, load and edit the resulting time series. 
See also the examples in the documentation https://diw-evu.gitlab.io/emobpy/emobpy

For more details see the article and cite:

.. code-block:: python

    @article{Gaete-Morales_2021,
    author={Gaete-Morales, Carlos and Kramer, Hendrik and Schill, Wolf-Peter and Zerrahn, Alexander},
    title={An open tool for creating battery-electric vehicle time series from empirical data, emobpy},
    journal={Scientific Data}, year={2021}, month={Jun}, day={11}, volume={8}, number={1}, pages={152},
    issn={2052-4463}, doi={10.1038/s41597-021-00932-9}, url={https://doi.org/10.1038/s41597-021-00932-9}}

"""

import time
import pickle
import gzip
import os
import uuid
from .tools import parallelize, check_for_new_function_name

VARIABLES = {
    'driving': [
        'name', 'kind', 'profile', 't', 'totalrows', 'hours', 'f', 'refdate',
        'states', 'timeseries'
    ],
    'consumption': [
        'name', 'input', 'kind', 'profile', 't', 'totalrows', 'hours', 'f',
        'refdate', 'states', 'vehicle', 'timeseries'
    ],  # 'Trips', 'timeseries'
    'availability': [
        'name', 'input', 'kind', 'profile', 'timeseries', 'chargingdata',
        'battery_capacity', 'charging_eff', 'discharging_eff', 'soc_init',
        'soc_min', 't', 'totalrows', 'refdate', 'success', 'notation', 'description', 'soc_end',
    ],
    'charging': ['name', 'kind', 'input', 'option', 'timeseries', 'success']
}


[docs]class DataBase(object): """ DataBase object useful to manage many. important attribute: self.db : It is a dictionary that contains all profiles. The dictionary keys are the name of the profile Every profile in this dict has nested dictionary. The keys depend on the type of profile. Common keys: self.db["name of the profile"]["kind"] that can be ["driving", "availability", "charging"] self.db["name of the profile"]["input"] that is a string only for ["availability", "charging"] profiles self.__init__(folder) folder: path as string of folder where profiles are hosted. """ def __init__(self, folder): super(DataBase, self).__init__() self.name = '' self.folder = folder self.oldpath = [] self.db = {} def __getattr__(self, item): check_for_new_function_name(item) # if the return value is not callable, we get TypeError:
[docs] def loadfiles(self, loaddir=''): """ Load profiles and host in a directory other than the "folder". So that directory must be indicated (loaddir). In this way profiles from many directories can be loaded. Args: loaddir (str, optional): Directory to load from. Defaults to ''. """ if loaddir: self.repo = loaddir else: self.repo = self.folder os.makedirs(self.repo, exist_ok=True) self.currentpath = [ f for f in os.listdir(self.repo) if os.path.isfile(os.path.join(self.repo, f)) ] self.path_list = list(set(self.currentpath) - set(self.oldpath)) if self.path_list: self.oldpath = self.currentpath for f in self.path_list: self.fpath = os.path.join(self.repo, f) if f.split('.')[-1] == 'pickle': self.pickle_off = gzip.open(self.fpath, "rb") self.obj = pickle.load(self.pickle_off) self.pickle_off.close() self.db[self.obj['name']] = self.obj del self.pickle_off
[docs] def loadfiles_batch(self, loaddir='', batch=10, nr_workers=4, kind='', add_variables=[]): """ Load datafiles into DataBase object for further usage. Args: loaddir (str, optional): Directory to load from. Defaults to ''. batch (int, optional): Number of batches to load. Defaults to 10. nr_workers (int, optional): Number of workers to load. Defaults to 4. kind (str, optional): Data kind to load. E.g 'consumption'. Defaults to ''. add_variables (list, optional): New variables to load. Defaults to []. """ variables = list(set(VARIABLES[kind] + add_variables)) if loaddir: self.repo = loaddir else: self.repo = self.folder os.makedirs(self.repo, exist_ok=True) self.currentpath = [ f for f in os.listdir(self.repo) if os.path.isfile(os.path.join(self.repo, f)) ] # create batch i = 0 nr_files = len(self.currentpath) paths_batch = [] flag = False for _ in range(nr_files): batch_ = [] for _ in range(batch): if i < nr_files: batch_.append(self.currentpath[i]) else: flag = True break i += 1 if len(batch_) > 0: paths_batch.append(batch_) if flag: break for lt in paths_batch: dc = { k: { 'f': os.path.join(self.repo, v) } for k, v in enumerate(lt) if v.split('.')[-1] == 'pickle' } odc = parallelize(self.loadpkl, dc, nr_workers, **dict(variables=variables, kind=kind)) print(len(odc)) for j in odc: if odc[j][1]: self.db[odc[j][0]['name']] = odc[j][0]
[docs] @staticmethod def loadpkl(f, variables, kind): """ Load from pickle file. Args: f (str): Path to pickle file. variables (str): Variables which should be loaded. kind ([type]): Data kind to load. Returns: DataBase: Loaded object. """ pickle_off = gzip.open(f, "rb") obj = pickle.load(pickle_off) pickle_off.close() if obj['kind'] == kind: new_obj = {} for nm in variables: if nm in obj: new_obj[nm] = obj[nm] return new_obj, True else: return {}, False
[docs] def update(self): """ Run self.laodfiles() to load files from database "folder". """ self.loadfiles()
[docs] def getdb(self): """ Run self.loadfiles() and return imported database. Returns: DataBase: Loaded database object. """ self.update() return self.db
[docs] def remove(self, name): """ Remove part of database. Args: name (str): Key which is to be deleted. """ self.acum = [] self.db.pop(name, None) if os.path.isfile(os.path.join(self.folder, name + '.pickle')): os.remove(os.path.join(self.folder, name + '.pickle')) self.acum.append(name + '.pickle') self.update() print('Files deleted:', len(self.acum)) print(self.acum) del self.acum
[docs]class DataManager: """ Data Manager to load and save files. """ def __init__(self): super(DataManager, self).__init__() def __getattr__(self, item): check_for_new_function_name(item) # if the return value is not callable, we get TypeError:
[docs] def savedb(self, obj, dbdir='db_files'): """ Save database to pickle file. Args: obj (object): Database to be saved. dbdir (str, optional): Path to database directory. Defaults to 'db_files'. """ obj.update() if not obj.name: nnn = 'db_' + time.strftime("%Y%m%d_%H%M%S") + '_' + uuid.uuid4( ).hex[:5] # + time.strftime("%Y%m%d_%H%M%S") obj.name = nnn[:] os.makedirs(dbdir, exist_ok=True) with gzip.open(os.path.join(dbdir, obj.name + '.pickle'), 'wb') as file: pickle.dump(obj, file) print(file) print('=== Database saved ===')
[docs] def loaddb(self, dbfilepath, profilesdir): """ Load database from pickle file. Args: dbfilepath (str): Path to pickle file. profilesdir (str): Path to profiles directory. Returns: object: Loaded database from pickle file. """ with gzip.open(dbfilepath, 'rb') as file: obj = pickle.load(file) obj.folder = profilesdir obj.update() return obj