rbmatlab 0.10.01
rbasis/basisgen/+Greedy/Checkpoint.m
00001 classdef Checkpoint
00002   % Helper class used to store and restore @ref DataTree "data tree" objects at
00003   % specified checkpoints.
00004   %
00005   % This class can be used for checkpointing. The most prominent use case is
00006   % the ReducedModel.gen_detailed_data() method. When this method is called,
00007   % checkpoints are created after each bigger basis extension step at which the
00008   % generated detailed_data is stored. Each of these Afterwards the checkpoint can be re-read
00009   % from the harddrive like this: 
00010   % @code
00011   %   ids = Checkpoint.get_ids(rmodel)
00012   %   [detailed_data, cp] = Checkpoint.restore(rmodel, ids(1), 'latest');
00013   % @endcode
00014   %
00015   % Instead of 'latest' one could also have used a file index. See
00016   % #file_rotation_size and store() for more details on this.
00017 
00018 
00019   properties (Hidden=true, Constant)
00020     % list of values ignored for hash computation by compute_hash()
00021     ignorelist = {'Mmax','MM','Mstrich','ei_Mmax','Mmax','M','N','Nmax',...
00022                   'verbose','debug', ...
00023                   'enable_error_estimator'...
00024                  };
00025   end
00026 
00027   properties(Dependent)
00028     % persistent value for all Checkpoint instances controlling whether
00029     % checkpoint is enabled or not.
00030     enable_auto_restore = 1;
00031 
00032     enable_storing = true;
00033   end
00034 
00035   properties (Access = public)
00036 
00037     % maximum number of files stored in one directory in parallel.
00038     %
00039     % If this limit is reached, the oldest file is deleted before a new
00040     % Checkpoint.store() is executed.
00041     file_rotation_size = 5;
00042 
00043     % The level indicates which level in a data tree of DataTree.INode instances
00044     % can be (re)stored by this Checkpoint instance.
00045     %
00046     % Level '1' corresponds to the root element of the data tree, level '2' to
00047     % a child of the root, level '3' to a child of a child of the root element,
00048     % ...
00049     level      = 1;
00050 
00051     % A cell array of indices as returned by DataTree.INode.get_index() defining
00052     % the index of the stored DataTree.INode instance with respect to the root
00053     % element of the tree.
00054     %
00055     % @todo: Why do I not store a single vector
00056     data_index = {[]};
00057 
00058     % a cell array of arbitrary userdata stored with each DataTree.INode node.
00059     userdata   = {[]};
00060 
00061     % a cell array of id strings which describes the DataTree.INode instance or the
00062     % algorithm which generates it.
00063     ids        = {''};
00064 
00065     % a cell array of filenames where the DataTree.INode children are stored.
00066     filenames  = {''};
00067   end
00068 
00069   methods
00070 
00071     function cp = Checkpoint(old, index)
00072       % function cp = Checkpoint(old, index)
00073       % creates a new checkpoint
00074       %
00075       % Parameters:
00076       %   old: an object of type .Greedy.Checkpoint whose checkpoint data is copied or
00077       %        included in the newly created checkpoint.
00078       %   index: if this is empty, argument old is copied, otherwise a new
00079       %          Checkpoint of level old.level+1 is created with data fields.
00080 
00081       if nargin == 2
00082         if isempty(index)
00083           cp.level = old.level;
00084         else
00085           cp.level = old.level + 1;
00086         end
00087         if cp.level <= length(old.userdata)
00088           cp.userdata   = old.userdata;
00089           cp.ids        = old.ids;
00090           cp.data_index = old.data_index;
00091           cp.filenames  = old.filenames;
00092         else
00093           cp.userdata   = [ old.userdata, {[]} ];
00094           cp.ids        = [ old.ids, {''} ];
00095           cp.filenames  = [ old.filenames, {''} ];
00096           cp.data_index = [ old.data_index, {[old.data_index{end}, index]} ];
00097         end
00098       end
00099     end
00100 
00101     function ec = get.enable_auto_restore(this)
00102       ec = this.static_flags('auto_restore');
00103     end
00104 
00105     function this = set.enable_auto_restore(this, nec)
00106       ec = this.static_flags('auto_restore', nec);
00107     end
00108 
00109     function ec = get.enable_storing(this)
00110       ec = this.static_flags('storing');
00111     end
00112 
00113     function this = set.enable_storing(this, nec)
00114       ec = this.static_flags('storing', nec);
00115     end
00116 
00117     function dd = restore_detailed_data(cp, model, cp_index)
00118       if nargin <= 2
00119         cp_index = 'latest';
00120       end
00121       fp = Greedy.Checkpoint.filepath(model, cp.ids{1});
00122       cpfn = Greedy.Checkpoint.filename(model, cp.ids{1}, cp_index);
00123       tmp = load(fullfile(fp, cpfn));
00124       if length(cp.ids) == 1
00125         dd = tmp.detailed_data;
00126       else
00127         fp  = Greedy.Checkpoint.filepath(model, cp.ids{1});
00128         tmp = load(fullfile(fp, cp.filenames{1}));
00129         dd  = tmp.detailed_data;
00130         for i = 2:length(cp.ids)
00131           if ~isempty(cp.ids{i})
00132             fp  = Greedy.Checkpoint.filepath(model, cp.ids{i});
00133             tmp = load(fullfile(fp, cp.filenames{i}));
00134             set(dd, cp.data_index{i}, tmp.detailed_data);
00135           end
00136         end
00137       end
00138     end
00139 
00140     function ret = get(this, field, default)
00141       % function ret = get(this, field, default)
00142       % access function for user data stored in a Checkpoint instance
00143       %
00144       % Parameters:
00145       %   field: name of a field in the #userdata structure of this #level.
00146       %   default: This value is returned if the field does not exist in the
00147       %            #userdata structure.
00148       %
00149       % Return values:
00150       %   ret:  This returns the value of the userdata field.
00151 
00152       ud = this.userdata{this.level};
00153       if isfield(ud, field)
00154         ret = ud.(field);
00155       else
00156         ret = default;
00157       end
00158     end
00159 
00160     function child = child(this, index)
00161       % function child = child(this, index)
00162       % is called when a new Checkpoint instance for the child of a DataTree.INode
00163       % instance shall be generated.
00164       %
00165       % The child must be a DataTree.INode itself.
00166       %
00167       % Parameters:
00168       %   index: a scalar indicating the father child-relationship. child must
00169       %          be the 'index'-th child of father.
00170       %
00171       % Return values:
00172       %   child: the created instance of type Checkpoint
00173       child = Greedy.Checkpoint(this, index);
00174     end
00175 
00176 %    function ret = subsref(this, S)
00177 %      if isequal(S(1).type, '.') && isequal(S(1).subs, 'ud') ...
00178 %          && length(S) == 2 && isequal(S(2).type, '.')
00179 %        if isfield(this.userdata, S(2).subs)
00180 %          ret = this.userdata.(S(2).subs);
00181 %        else
00182 %          ret = [];
00183 %        end
00184 %      else
00185 %        ret = builtin('subsref', this, S);
00186 %      end
00187 %    end
00188 
00189     function cp = store(cp, model, detailed_data, id, ud)
00190       % function cp = store(cp, model, detailed_data, id, ud)
00191       % stores a DataTree.INode tree on the harddrive and attaches a descriptive
00192       % text and user data.
00193       %
00194       % The directory where the data is stored is computed via
00195       % Greedy.Checkpoint.filepath() from the model argument.
00196       %
00197       % @note The stored DataTree.INode knows its fathers, such that it can be
00198       %       embedded in a full DataTree.INode tree by restore().
00199       %
00200       % Parameters:
00201       %   model: an object or type IReducedModel
00202       %   detailed_data: an object of type DataTree.INode to be stored on the harddrive
00203       %   id:    id strings which describes the DataTree.INode instance or the
00204       %          algorithm which generates it.
00205       %   ud:    arbitrary user data to stored with this checkpoint.
00206       %
00207       % Return values:
00208       %   cp:    handle object of type Greedy.Checkpoint pointing to the changed
00209       %          Checkpoint instance.
00210 
00211       if cp.enable_storing
00212         fp = Greedy.Checkpoint.filepath(model, id);
00213         if ~exist(fp, 'dir')
00214           mkdir(fp);
00215         end
00216 
00217         files = dir(fp);
00218 
00219         dates = zeros(length(files),1);
00220         if length(files) >= cp.file_rotation_size + 2
00221           for i = 1:length(files)
00222             if isempty(setdiff(files(i).name, {'.', '..'}))
00223               dates(i) = inf;
00224             else
00225               dates(i) = files(i).datenum;
00226             end
00227           end
00228           [dates, ind] = min(dates);
00229           cpfn = files(ind).name;
00230         else
00231           cpfn = ['cp', num2str(length(files)-2)];
00232         end
00233 
00234         cp.ids{cp.level}       = id;
00235         cp.userdata{cp.level}  = structcpy(cp.userdata{cp.level}, ud);
00236         cp.filenames{cp.level} = cpfn;
00237         save(fullfile(fp, cpfn), 'detailed_data', 'cp');
00238       end
00239     end
00240 
00241   end
00242 
00243   methods (Static)
00244 
00245     function [dd, cp] = restore_latest_if_available(model, id)
00246       fp = Greedy.Checkpoint.filepath(model, id);
00247       if isempty(dir(fp)) || ~Greedy.Checkpoint.static_flags('auto_restore')
00248         dd = [];
00249         cp = Greedy.Checkpoint;
00250       else
00251         [dd, cp] = Greedy.Checkpoint.restore(model, id, 'latest');
00252       end
00253     end
00254 
00255     function [dd, cp] = restore(model, id, cp_index)
00256     % function [dd, cp] = restore(model, id[, cp_index])
00257     % restores a check point by a descriptive text and a checkpoint index
00258     %
00259     % Parameters:
00260     %   id: the descriptive id string used when storing the DataTree.INode with
00261     %       store().
00262     %   cp_index: an file index between '1' and #file_rotation_size or the
00263     %             string 'latest'. (Default = 'lastest')
00264     %
00265     % Return values:
00266     %   dd: restored object of type DataTree.INode . Note, that is the root of a
00267     %       data tree even if the 'id' argument refers to a child of it.
00268     %   cp: an object of type Greedy.Checkpoint restored together with the 'dd'
00269     %       object.
00270 
00271       if nargin <= 2
00272         cp_index = 'latest';
00273       end
00274       if nargin <= 1
00275         id = 'latest';
00276       end
00277       fp = Greedy.Checkpoint.filepath(model, id);
00278       if ~exist(fp, 'dir')
00279         error(['Could not find checkpoint! Directory ', fp, ' does not exist.']);
00280       end
00281 
00282       cpfn = Greedy.Checkpoint.filename(model, id, cp_index);
00283 
00284       tmp = load(fullfile(fp, cpfn));
00285       cp = tmp.cp;
00286 
00287       dd = restore_detailed_data(cp, model, cp_index);
00288       cp.level = 1;
00289     end
00290 
00291     function cpfn = filename(model, id, cp_index)
00292       fp = Greedy.Checkpoint.filepath(model, id);
00293       
00294       files = dir(fp);
00295       
00296       if isequal(cp_index, 'latest')
00297         dates = zeros(length(files),1);
00298         for i = 1:length(files)
00299           if isempty(setdiff(files(i).name, {'.', '..'}))
00300             dates(i) = 0;
00301           else
00302             dates(i) = datenum(files(i).date);
00303           end
00304         end
00305         [dates, ind] = max(dates);
00306         cpfn = files(ind).name;
00307       else
00308         cpfn = ['cp', num2str(cp_index)];
00309       end
00310     end
00311 
00312     function clear_old_checkpoints(model, num_left)
00313       % function clear_old_checkpoints(model [, num_left])
00314       % clears data files in a checkpoint directory
00315       %
00316       % The filepath is generated from the argument 'model'.
00317       %
00318       % Parameters:
00319       %   model:  an object of type IReducedModel .
00320       %   num_left: number of files to be left in the directory. (Default = 0)
00321 
00322       if nargin == 1
00323         num_left = 0;
00324       end
00325 
00326       fp = Greedy.Checkpoint.basepath;
00327       descr = model.descr;
00328       base = fullfile(fp, descr.name);
00329       if exist(base, 'dir')
00330 
00331         dirs = dir(base);
00332         dates = zeros(length(dirs),1);
00333         for i = 1:length(dirs)
00334           if isempty(setdiff(dirs(i).name, {'.', '..'}))
00335             dates(i) = inf;
00336           else
00337             dates(i) = datenum(dirs(i).date);
00338           end
00339         end
00340         [dates, inds] = sort(dates, 1, 'descend');
00341 
00342         for i = inds(num_left+1:end)
00343           rmdir(fullfile(base, dirs(i).name), 's')
00344         end
00345       end
00346     end
00347 
00348     function fp = basepath()
00349       % function fp = basepath()
00350       % returns the basepath for storage of Greedy.Checkpoint instances on the hdd.
00351       %
00352       % Return values:
00353       %   fp: string of file path to base directory.
00354       fp = fullfile(rbmatlabtemp, 'checkpoints');
00355     end
00356 
00357     function fp = filepath(rmodel, id)
00358       % function fp = filepath(rmodel, id)
00359       % computes a file path from the rmodel hash and an id
00360       %
00361       % Parameters:
00362       %   rmodel: an object of type IReducedModel
00363       %   id:     id strings which describes the DataTree.INode instance or the
00364       %           algorithm which generates it.
00365       %
00366       % Return values:
00367       %   fp: string of file path to the directory where data is stored.
00368 
00369       descr = rmodel.detailed_model;
00370       mhash = Greedy.Checkpoint.compute_hash(descr);
00371       fp = fullfile(Greedy.Checkpoint.basepath, descr.name, mhash, id);
00372     end
00373 
00374     function ids = get_ids(rmodel)
00375       % function ids = get_ids(rmodel)
00376       % returns a list of possible 'ids' to restore for a reduced model.
00377       %
00378       % Parameters:
00379       %   rmodel: an object of type IReducedModel
00380       %
00381       % Return values:
00382       %   ids: a cell array of possible ids to restore.
00383 
00384       ids = {};
00385 
00386       fp = Greedy.Checkpoint.filepath(rmodel, '');
00387       if exist(fp, 'dir')
00388         dirlist = dir(fp);
00389         ids = setdiff(arrayfun(@(x) x.name, dirlist, 'UniformOutput', false), {'.', '..'});
00390       end
00391     end
00392 
00393     function mhash = compute_hash(descr)
00394       % function mhash = compute_hash(descr)
00395       % computes an md5 hash string from the model description used to identify
00396       % it over a file name.
00397       %
00398       % Parameters:
00399       %   descr: a structure describing the model.
00400       %
00401       % Return values:
00402       %   mhash: a string of the hash key computed.
00403       il = [Greedy.Checkpoint.ignorelist, descr.mu_names];
00404       if isfield(descr, 'filecache_ignore_fields_in_model') || isprop(descr, 'filecache_ignore_fields_in_model')
00405         il = [ il, descr.filecache_ignore_fields_in_model ];
00406       end
00407       fns = setdiff(fieldnames(descr), il);
00408       fns = sort(fns);
00409       mstr = '';
00410       for i = 1:length(fns)
00411         mstr = [ mstr, fns{i}, ':', Greedy.Checkpoint.stringify(descr.(fns{i})) ];
00412       end
00413       mhash = hash(mstr, 'md5');
00414     end
00415 
00416     function str = stringify(x)
00417       % function str = stringify(x)
00418       % helper function to make strings out of function_handle, cell and other objects.
00419       %
00420       % This function is utilized by the compute_hash() method.
00421       %
00422       % Parameters:
00423       %   x: an arbitrary object from which a string shall be computed.
00424       %
00425       % Return values:
00426       %   str: the string
00427       if isobject(x)
00428         str = class(x);
00429       else
00430         switch(class(x))
00431           case 'function_handle'
00432             str = func2str(x);
00433           case 'cell'
00434             str = mat2str(cell2mat(cellfun(@Greedy.Checkpoint.stringify, x, 'UniformOutput', false) ) );
00435           case 'struct'
00436             str = 'structs_not_handled_yet';
00437           otherwise % double, logical, uint??, int??, ...
00438             str = mat2str(x);
00439         end
00440       end
00441       str = [str, ','];
00442     end
00443   end
00444 
00445   methods (Static, Access = public)
00446 
00447     function ec = static_flags(flag, nec)
00448       persistent pec;
00449       if isempty(pec)
00450         pec = struct('auto_restore', true, 'storing', true);
00451       end
00452       if nargin == 2
00453         pec.(flag) = nec;
00454       end
00455       ec = pec.(flag);
00456     end
00457   end
00458 
00459 end
 All Classes Namespaces Files Functions Variables