From 9a65bfe5d7c0802772e3a1ab33d846da5906b7f6 Mon Sep 17 00:00:00 2001 From: revesansparole Date: Wed, 20 Jul 2016 10:22:49 +0200 Subject: [PATCH 01/45] added uid in Factory --- src/openalea/core/node.py | 166 +++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 81 deletions(-) diff --git a/src/openalea/core/node.py b/src/openalea/core/node.py index 3a46c531..bdbab143 100644 --- a/src/openalea/core/node.py +++ b/src/openalea/core/node.py @@ -24,13 +24,14 @@ __license__ = "Cecill-C" __revision__ = " $Id$ " +from copy import copy, deepcopy import imp import inspect import os import sys import string import types -from copy import copy, deepcopy +from uuid import uuid1 from weakref import ref, proxy # from signature import get_parameters @@ -39,8 +40,10 @@ from actor import IActor from metadatadict import MetaDataDict, HasAdHoc from interface import TypeNameInterfaceMap + + # Exceptions -class RecursionError (Exception): +class RecursionError(Exception): """todo""" pass @@ -110,7 +113,7 @@ def set_compositenode(self, upper): def set_data(self, key, value, notify=True): """ Set internal node data """ self.internal_data[key] = value - if(notify): + if (notify): self.notify_listeners(("data_modified", key, value)) def close(self): @@ -199,7 +202,7 @@ def get_tip(self, current_value=None): desc = self.get('desc', '') value = self.get('value', None) iname = 'Any' - if(interface): + if (interface): try: iname = interface.__name__ except AttributeError: @@ -213,9 +216,9 @@ def get_tip(self, current_value=None): if len(comment) > 100: comment = comment[:100] + ' ...' - if current_value is None : + if current_value is None: return '%s(%s): %s [default=%s] ' % (name, iname, desc, comment) - else : + else: return '%s(%s): %s' % (name, iname, str(current_value)) @@ -236,6 +239,7 @@ def is_hidden(self): class OutputPort(AbstractPort): """The class describing the output ports """ + def __init__(self, node): AbstractPort.__init__(self, node) @@ -244,11 +248,12 @@ class Annotation(AbstractNode): def __init__(self): AbstractNode.__init__(self) - def to_script (self): + def to_script(self): """Script translation of this node. """ return "" + class Node(AbstractNode): """ It is a callable object with typed inputs and outputs. @@ -293,11 +298,11 @@ def __init__(self, inputs=(), outputs=()): self.modified = True # Internal Data - self.internal_data["caption"] = '' # str(self.__class__.__name__) + self.internal_data["caption"] = '' # str(self.__class__.__name__) self.internal_data["lazy"] = True - self.internal_data["block"] = False # Do not evaluate the node + self.internal_data["block"] = False # Do not evaluate the node self.internal_data["priority"] = 0 - self.internal_data["hide"] = True # hide in composite node widget + self.internal_data["hide"] = True # hide in composite node widget self.internal_data["port_hide_changed"] = set() # Add delay self.internal_data["delay"] = 0 @@ -400,7 +405,8 @@ def get_user_application(self): def set_user_application(self, data): """todo""" self.internal_data["user_application"] = data - self.notify_listeners(("internal_data_changed", "user_application", data)) + self.notify_listeners( + ("internal_data_changed", "user_application", data)) user_application = property(get_user_application, set_user_application) @@ -418,12 +424,12 @@ def get_caption(self): def is_port_hidden(self, index_key): """ Return the hidden state of a port """ index = self.map_index_in[index_key] - s = self.input_desc[index].is_hidden() # get('hide', False) + s = self.input_desc[index].is_hidden() # get('hide', False) changed = self.internal_data["port_hide_changed"] c = index in changed - if(index in changed): + if (index in changed): return not s else: return s @@ -436,7 +442,7 @@ def set_port_hidden(self, index_key, state): :param state: a boolean value. """ index = self.map_index_in[index_key] - s = self.input_desc[index].is_hidden() # get('hide', False) + s = self.input_desc[index].is_hidden() # get('hide', False) changed = self.internal_data["port_hide_changed"] @@ -444,12 +450,11 @@ def set_port_hidden(self, index_key, state): changed.add(index) self.input_desc[index].get_ad_hoc_dict().set_metadata("hide", state) self.notify_listeners(("hiddenPortChange",)) - elif(index in changed): + elif (index in changed): changed.remove(index) self.input_desc[index].get_ad_hoc_dict().set_metadata("hide", state) self.notify_listeners(("hiddenPortChange",)) - # Status def unvalidate_input(self, index_key, notify=True): """ @@ -459,7 +464,7 @@ def unvalidate_input(self, index_key, notify=True): """ self.modified = True index = self.map_index_in[index_key] - if(notify): + if (notify): self.notify_listeners(("input_modified", index)) self.continuous_eval.notify_listeners(("node_modified",)) @@ -473,13 +478,13 @@ def set_io(self, inputs, outputs): """ # # Values - if(inputs is None or len(inputs) != len(self.inputs)): + if (inputs is None or len(inputs) != len(self.inputs)): self.clear_inputs() if inputs: for d in inputs: self.add_input(**d) - if(outputs is None or len(outputs) != len(self.outputs)): + if (outputs is None or len(outputs) != len(self.outputs)): self.clear_outputs() if outputs: for d in outputs: @@ -508,7 +513,6 @@ def clear_outputs(self): self.map_index_out = {} self.notify_listeners(("cleared_output_ports",)) - def add_input(self, **kargs): """ Create an input port """ @@ -517,7 +521,7 @@ def add_input(self, **kargs): interface = kargs.get('interface', None) # default value - if(interface and not kargs.has_key('value')): + if (interface and not kargs.has_key('value')): if isinstance(interface, str): # Create mapping between interface name and interface class from openalea.core.interface import TypeNameInterfaceMap @@ -530,7 +534,7 @@ def add_input(self, **kargs): value = copy(value) - name = str(name) # force to have a string + name = str(name) # force to have a string self.inputs.append(None) port = InputPort(self) @@ -575,14 +579,14 @@ def set_input(self, key, val=None, notify=True): index = self.map_index_in[key] changed = True - if(self.lazy): + if (self.lazy): # Test if the inputs has changed try: changed = (cmp(self.inputs[index], val) != 0) except: pass - if(changed): + if (changed): self.inputs[index] = val self.unvalidate_input(index, notify) @@ -636,7 +640,8 @@ def eval(self): and a timed delay if the node needs a reevaluation at a later time. """ # lazy evaluation - if self.block and self.get_nb_output() != 0 and self.output(0) is not None: + if self.block and self.get_nb_output() != 0 and self.output( + 0) is not None: return False if (self.delay == 0 and self.lazy) and not self.modified: return False @@ -659,9 +664,9 @@ def eval(self): self.output_desc[0].notify_listeners(("tooltip_modified",)) - else: # multi output - if(not isinstance(outlist, tuple) and - not isinstance(outlist, list)): + else: # multi output + if (not isinstance(outlist, tuple) and + not isinstance(outlist, list)): outlist = (outlist,) for i in range(min(len(outlist), len(self.outputs))): @@ -684,7 +689,6 @@ def __getstate__(self): odict['modified'] = True - outputs = range(len(self.outputs)) for i in range(self.get_nb_output()): try: @@ -737,7 +741,7 @@ def reload(self): # if(not connected or self.input_states[i] is "connected"): self.set_input(i, self.input_desc[i].get('value', None)) - if(i > 0): + if (i > 0): self.invalidate() def reset(self): @@ -747,7 +751,7 @@ def reset(self): i = self.get_nb_input() - if(i > 0): + if (i > 0): self.invalidate() def invalidate(self): @@ -758,16 +762,16 @@ def invalidate(self): self.continuous_eval.notify_listeners(("node_modified", self)) -# X @property -# X def outputs(self): -# X return [self.output(i) for i in range(self.get_nb_output())] + # X @property + # X def outputs(self): + # X return [self.output(i) for i in range(self.get_nb_output())] - def to_script (self): + def to_script(self): """Script translation of this node. """ - if self._to_script_func is None : + if self._to_script_func is None: return "#node %s do not define any scripting\n" % self.factory.name - else : + else: return self._to_script_func(self.inputs, self.outputs) @@ -787,7 +791,7 @@ def __init__(self, inputs, outputs, func): def __call__(self, inputs=()): """ Call function. Must be overriden """ - if(self.func): + if (self.func): return self.func(*inputs) def get_process_obj(self): @@ -838,6 +842,7 @@ def __init__(self, Observed.__init__(self) # Factory info + self.uid = kargs.get("uid", uuid1().hex) self.name = name self.description = description self.category = category @@ -853,6 +858,7 @@ def __init__(self, self.delay = delay self.alias = alias self.authors = authors + # Package property def set_pkg(self, port): @@ -862,7 +868,7 @@ def set_pkg(self, port): The package id is the name of the package when the package is the Python object. """ - if(not port): + if (not port): self.__pkg__ = None self.__pkg_id = None else: @@ -873,13 +879,13 @@ def set_pkg(self, port): def get_pkg(self): """todo""" - if(self.__pkg__): + if (self.__pkg__): port = self.__pkg__() else: port = None # Test if pkg has been reloaded # In this case the weakref is not valid anymore - if(not port and self.__pkg_id__): + if (not port and self.__pkg_id__): from openalea.core.pkgmanager import PackageManager port = self.set_pkg(PackageManager()[self.__pkg_id__]) return port @@ -908,7 +914,7 @@ def get_python_name(self): name = self.name - if(not name.isalnum()): + if (not name.isalnum()): name = '_%s' % (id(self)) return name @@ -931,7 +937,6 @@ def get_tip(self, asRst=False): found in its package. """ - if not asRst: return "Name: %s
" % (self.name,) + \ "Category: %s
" % (self.category,) + \ @@ -954,7 +959,7 @@ def instantiate(self, call_stack=[]): raise NotImplementedError() def instantiate_widget(self, node=None, parent=None, edit=False, - autonomous=False): + autonomous=False): """ Return the corresponding widget initialised with node""" raise NotImplementedError() @@ -991,8 +996,8 @@ def is_composite_node(self): return False def __getstate__(self): - odict = self.__dict__.copy() # copy the dict since we change it - odict['__pkg__'] = None # remove weakref reference + odict = self.__dict__.copy() # copy the dict since we change it + odict['__pkg__'] = None # remove weakref reference return odict def __setstate__(self, dict): @@ -1002,7 +1007,7 @@ def __setstate__(self, dict): def Alias(factory, name): """ Create a alias for factory """ - if(factory.alias is None): + if (factory.alias is None): factory.alias = [name] else: factory.alias.append(name) @@ -1063,7 +1068,7 @@ def __init__(self, # Module path, value=0 self.nodemodule_path = None - if(not search_path): + if (not search_path): self.search_path = [] else: self.search_path = search_path @@ -1073,10 +1078,9 @@ def __init__(self, # Context directory # inspect.stack()[1][1] is the caller python module caller_dir = os.path.dirname(os.path.abspath(inspect.stack()[1][1])) - if(not caller_dir in self.search_path): + if (not caller_dir in self.search_path): self.search_path.append(caller_dir) - def is_node(self): return True @@ -1084,7 +1088,7 @@ def get_python_name(self): """ Return a python valid name """ module_name = self.nodemodule_name - module_name = module_name.replace('.','_') + module_name = module_name.replace('.', '_') return "%s_%s" % (self.nodemodule_name, self.nodeclass_name) def __getstate__(self): @@ -1094,7 +1098,7 @@ def __getstate__(self): odict['nodemodule'] = None odict['nodeclass'] = None odict['module_cache'] = None - odict['__pkg__'] = None # remove weakref reference + odict['__pkg__'] = None # remove weakref reference return odict @@ -1132,22 +1136,21 @@ def instantiate(self, call_stack=[]): if classobj is None: raise Exception("Cannot instantiate '" + \ - self.nodeclass_name + "' from " + str(module)) + self.nodeclass_name + "' from " + str(module)) # If class is not a Node, embed object in a Node class - if(not hasattr(classobj, 'mro') or not AbstractNode in classobj.mro()): + if (not hasattr(classobj, 'mro') or not AbstractNode in classobj.mro()): # Check inputs and outputs - if(self.inputs is None): + if (self.inputs is None): sign = sgn.Signature(classobj) self.inputs = sign.get_parameters() - if(self.outputs is None): + if (self.outputs is None): self.outputs = (dict(name="out", interface=None),) - # Check and Instantiate if we have a functor class - if((type(classobj) == types.TypeType) - or (type(classobj) == types.ClassType)): + if ((type(classobj) == types.TypeType) + or (type(classobj) == types.ClassType)): _classobj = classobj() if callable(_classobj): @@ -1166,7 +1169,7 @@ def instantiate(self, call_stack=[]): try: node.factory = self node.lazy = self.lazy - if(not node.caption): + if (not node.caption): node.set_caption(self.name) node.delay = self.delay @@ -1174,17 +1177,18 @@ def instantiate(self, call_stack=[]): pass # to script - if self.toscriptclass_name is not None : - node._to_script_func = module.__dict__.get(self.toscriptclass_name, None) + if self.toscriptclass_name is not None: + node._to_script_func = module.__dict__.get(self.toscriptclass_name, + None) return node def instantiate_widget(self, node=None, parent=None, - edit=False, autonomous=False): + edit=False, autonomous=False): """ Return the corresponding widget initialised with node """ # Code Editor - if(edit): + if (edit): from openalea.visualea.code_editor import get_editor w = get_editor()(parent) try: @@ -1199,15 +1203,15 @@ def instantiate_widget(self, node=None, parent=None, return w # Node Widget - if(node == None): + if (node == None): node = self.instantiate() modulename = self.widgetmodule_name - if(not modulename): + if (not modulename): modulename = self.nodemodule_name # if no widget declared, we create a default one - if(not modulename or not self.widgetclass_name): + if (not modulename or not self.widgetclass_name): from openalea.visualea.node_widget import DefaultNodeWidget return DefaultNodeWidget(node, parent, autonomous) @@ -1215,13 +1219,13 @@ def instantiate_widget(self, node=None, parent=None, else: # load module (file, pathname, desc) = imp.find_module(modulename, - self.search_path + sys.path) + self.search_path + sys.path) sys.path.append(os.path.dirname(pathname)) module = imp.load_module(modulename, file, pathname, desc) sys.path.pop() - if(file): + if (file): file.close() widgetclass = module.__dict__[self.widgetclass_name] @@ -1244,8 +1248,8 @@ def get_node_module(self): # Test if the module is already in sys.modules if (self.nodemodule_path and - self.module_cache and - not hasattr(self.module_cache, 'oa_invalidate')): + self.module_cache and + not hasattr(self.module_cache, 'oa_invalidate')): return self.module_cache sav_path = sys.path @@ -1290,7 +1294,6 @@ def get_node_file(self): self.get_node_module() return self.nodemodule_path - def get_node_src(self, cache=True): """ Return a string containing the node src @@ -1299,7 +1302,7 @@ def get_node_src(self, cache=True): """ # Return cached source if any - if(self.src_cache and cache): + if (self.src_cache and cache): return self.src_cache module = self.get_node_module() @@ -1338,20 +1341,19 @@ def save_new_src(self, newsrc): modulesrc = inspect.getsource(module) # Pass if no modications - if(nodesrc == newsrc): + if (nodesrc == newsrc): return # replace old code with new one modulesrc = modulesrc.replace(nodesrc, newsrc) - # write file myfile = open(self.nodemodule_path, 'w') myfile.write(modulesrc) myfile.close() # reload module - if(self.module_cache): + if (self.module_cache): self.module_cache.invalidate_oa = True self.src_cache = None @@ -1361,6 +1363,7 @@ def save_new_src(self, newsrc): # import py_compile # py_compile.compile(self.nodemodule_path) + # Class Factory: Factory = NodeFactory @@ -1391,7 +1394,7 @@ def __repr__(self): """ Return the python string representation """ f = self.factory fstr = string.Template(self.nodefactory_template) - + name = f.get_python_name() name = name.replace('.', '_') result = fstr.safe_substitute(NAME=name, @@ -1404,9 +1407,10 @@ def __repr__(self): LISTIN=repr(f.inputs), LISTOUT=repr(f.outputs), WIDGETMODULE=repr(f.widgetmodule_name), - WIDGETCLASS=repr(f.widgetclass_name),) + WIDGETCLASS=repr(f.widgetclass_name), ) return result + # Utility functions def gen_port_list(size): """ Generate a list of port description """ @@ -1421,15 +1425,15 @@ def initialise_standard_metadata(): # we declare what are the node model ad hoc data we require: AbstractNode.extend_ad_hoc_slots("position", list, [0, 0], "posx", "posy") Node.extend_ad_hoc_slots("userColor", list, None, "user_color") - Node.extend_ad_hoc_slots("useUserColor", bool, True, "use_user_color",) + Node.extend_ad_hoc_slots("useUserColor", bool, True, "use_user_color", ) Annotation.extend_ad_hoc_slots("text", str, "", "txt") -# Annotation.extend_ad_hoc_slots("htmlText", str, None) + # Annotation.extend_ad_hoc_slots("htmlText", str, None) Annotation.extend_ad_hoc_slots("textColor", list, None) Annotation.extend_ad_hoc_slots("rectP2", tuple, (-1, -1)) Annotation.extend_ad_hoc_slots("color", list, None) Annotation.extend_ad_hoc_slots("visualStyle", int, None) # we declare what are the node model ad hoc data we require: - AbstractPort.extend_ad_hoc_slots("hide" , bool, False) + AbstractPort.extend_ad_hoc_slots("hide", bool, False) AbstractPort.extend_ad_hoc_slots("connectorPosition", list, [0, 0]) From 2aa8f76650108571fe9a8b7c1be4bc0ad3cdd144 Mon Sep 17 00:00:00 2001 From: revesansparole Date: Wed, 20 Jul 2016 15:31:51 +0200 Subject: [PATCH 02/45] added uid to factories and cleaned wraleas --- src/openalea/core/system/__wralea__.py | 417 ++++++++++++------------- 1 file changed, 198 insertions(+), 219 deletions(-) diff --git a/src/openalea/core/system/__wralea__.py b/src/openalea/core/system/__wralea__.py index dd3c02ff..bda144ec 100644 --- a/src/openalea/core/system/__wralea__.py +++ b/src/openalea/core/system/__wralea__.py @@ -15,12 +15,12 @@ # ############################################################################### """Wralea for System nodes""" -__revision__ = " $Id$ " - -from openalea.core.external import * #IGNORE:W0614 +from openalea.core import Factory as Fa +from openalea.core import IBool, IFunction, IInt, ISequence, IStr from openalea.core.pkgdict import protected +__revision__ = " $Id$ " __name__ = "openalea.flow control" __alias__ = ["system"] @@ -33,268 +33,247 @@ __all__ = [] -annotation = Factory(name="annotation", - description="Annotation", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="AnnotationNode", - ) +annotation = Fa(uid="3b4eb8dc4e7d11e6bff6d4bed973e64a", + name="annotation", + description="Annotation", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="AnnotationNode", + ) __all__.append('annotation') -iter = Factory(name="iter", - description="Iteration", - category="System", - nodemodule="openalea.core.system.systemnodes", - nodeclass="IterNode", - inputs = (dict(name="generator", interface=None, value=None), - ), - outputs = ( dict(name="value", interface=None), ), - - ) -__all__.append('iter') - -iter_with_delay = Factory(name="iter with delay", - description="Iteration ", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="IterWithDelayNode", - inputs = (dict(name="generator", interface=None, value=None), - dict(name="delay", interface=IInt, value=1), - ), - outputs = ( dict(name="value", interface=None), ), - - ) +iter_ = Fa(uid="3b4eb8dd4e7d11e6bff6d4bed973e64a", + name="iter", + description="Iteration", + category="System", + nodemodule="openalea.core.system.systemnodes", + nodeclass="IterNode", + inputs=(dict(name="generator", interface=None, value=None), + ), + outputs=(dict(name="value", interface=None),), + + ) +__all__.append('iter_') + +iter_with_delay = Fa(uid="3b4eb8de4e7d11e6bff6d4bed973e64a", + name="iter with delay", + description="Iteration ", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="IterWithDelayNode", + inputs=(dict(name="generator", interface=None, value=None), + dict(name="delay", interface=IInt, value=1), + ), + outputs=(dict(name="value", interface=None),), + + ) __all__.append('iter_with_delay') -counter = Factory(name="counter", - description="Count from start to stop, step by step ", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="Counter", - inputs = (dict(name="start", interface=IInt, value=0), - dict(name="stop", interface=IInt, value=10), - dict(name="step", interface=IInt, value=1), - dict(name="dummy", interface=None), - ), - outputs = ( dict(name="value", interface=IInt), ), - delay = 1, - ) +counter = Fa(uid="3b4eb8df4e7d11e6bff6d4bed973e64a", + name="counter", + description="Count from start to stop, step by step ", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="Counter", + inputs=(dict(name="start", interface=IInt, value=0), + dict(name="stop", interface=IInt, value=10), + dict(name="step", interface=IInt, value=1), + dict(name="dummy", interface=None), + ), + outputs=(dict(name="value", interface=IInt),), + delay=1, + ) __all__.append('counter') -stop_simulation = Factory(name="stop simulation", - description="Iteration ", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="StopSimulation", - inputs = (dict(name="any object"), - dict(name="max nb cycles", interface=IInt, value=10), - ), - outputs = ( dict(name="any"), ), - - ) +stop_simulation = Fa(uid="3b4eb8e04e7d11e6bff6d4bed973e64a", + name="stop simulation", + description="Iteration ", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="StopSimulation", + inputs=(dict(name="any object"), + dict(name="max nb cycles", interface=IInt, + value=10), + ), + outputs=(dict(name="any"),), + + ) __all__.append('stop_simulation') -rdv = Factory(name="rendez vous", - description="Synchronize 2 inputs", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="RDVNode", - inputs = (dict(name="value", interface=None, value=None), - dict(name="control_flow", interface=None, value=None), - ), - outputs = ( dict(name="value", interface=None), - dict(name="flow result", interface=None),), +rdv = Fa(uid="3b4eb8e14e7d11e6bff6d4bed973e64a", + name="rendez vous", + description="Synchronize 2 inputs", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="RDVNode", + inputs=(dict(name="value", interface=None, value=None), + dict(name="control_flow", interface=None, value=None), + ), + outputs=(dict(name="value", interface=None), + dict(name="flow result", interface=None),), - ) + ) __all__.append('rdv') -poolreader = Factory( name="pool reader", - description="Read data from the data pool.", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="PoolReader", - inputs = (dict(name='Key', interface=IStr),), - outputs = (dict(name='Obj', interface=None),), - lazy = False, - - ) - +poolreader = Fa(uid="3b4eb8e24e7d11e6bff6d4bed973e64a", + name="pool reader", + description="Read data from the data pool.", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="PoolReader", + inputs=(dict(name='Key', interface=IStr),), + outputs=(dict(name='Obj', interface=None),), + lazy=False, + + ) + __all__.append('poolreader') -poolwriter = Factory(name="pool writer", - description="Write data to the data pool.", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="PoolWriter", - inputs = (dict(name='Key', interface=IStr), - dict(name='Obj', interface=None),), - outputs = (dict(name='Obj', interface=None),), - lazy = False, - ) +poolwriter = Fa(uid="3b4eb8e34e7d11e6bff6d4bed973e64a", + name="pool writer", + description="Write data to the data pool.", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="PoolWriter", + inputs=(dict(name='Key', interface=IStr), + dict(name='Obj', interface=None),), + outputs=(dict(name='Obj', interface=None),), + lazy=False, + ) __all__.append('poolwriter') -pool_rw = Factory(name="pool setdefault", +pool_rw = Fa(uid="3b4eb8e44e7d11e6bff6d4bed973e64a", + name="pool setdefault", description="pool.setdefault(key,value).", category="flow control", nodemodule="openalea.core.system.systemnodes", nodeclass="PoolDefault", - inputs = (dict(name='Key', interface=IStr), - dict(name='Value', interface=None),), - outputs = (dict(name='Obj', interface=None),), - lazy = False, + inputs=(dict(name='Key', interface=IStr), + dict(name='Value', interface=None),), + outputs=(dict(name='Obj', interface=None),), + lazy=False, ) __all__.append('pool_rw') +init = Fa(uid="3b4eb8e54e7d11e6bff6d4bed973e64a", + name="init", + description="Value selector for graph initialisation", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="InitNode", + inputs=(dict(name="val_init", interface=None, value=0.), + dict(name="value", interface=None, value=None), + dict(name="state", interface=IBool, value=True), + ), + outputs=(dict(name="value", interface=None),), -# nf = Factory(name="list accumulator", -# description="List accumulator", -# category="System", -# nodemodule="openalea.core.system.systemnodes", -# nodeclass="AccuList", -# inputs = (dict(name="value", interface=None, value=None), -# dict(name="varname", interface=IStr, value=None), -# ), -# outputs = ( dict(name="list", interface=ISequence), ), - -# ) - -# package.add_factory(nf) - - -# nf = Factory(name="float accumulator", -# description="Float accumulator", -# category="System", -# nodemodule="openalea.core.system.systemnodes", -# nodeclass="AccuFloat", -# inputs = (dict(name="value", interface=IFloat, value=0.), -# dict(name="varname", interface=IStr, value=None), -# ), -# outputs = ( dict(name="float", interface=IFloat), ), - -# ) - -# package.add_factory(nf) - - -init = Factory(name="init", - description="Value selector for graph initialisation", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="InitNode", - inputs = (dict(name="val_init", interface=None, value=0.), - dict(name="value", interface=None, value=None), - dict(name="state", interface=IBool, value=True), - ), - outputs = ( dict(name="value", interface=None), ), - - ) + ) __all__.append('init') - - -X = Factory(name="X", - description="Function variable", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="LambdaVar", - inputs = (dict(name="name", interface=IStr, value='x'), ), - outputs = ( dict(name="lambda", interface=None), ), - ) + +X = Fa(uid="3b4eb8e64e7d11e6bff6d4bed973e64a", + name="X", + description="Function variable", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="LambdaVar", + inputs=(dict(name="name", interface=IStr, value='x'),), + outputs=(dict(name="lambda", interface=None),), + ) __all__.append('X') - -whileuni = Factory(name="while univariate", - description="While Loop (Univariate)", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="WhileUniVar", - inputs = (dict(name="InitValue", interface=None, value=None), - dict(name="Test", interface=IFunction, value=None), - dict(name="Function", interface=IFunction, value=None), - ), - outputs = ( dict(name="Result", interface=None), ), - ) +whileuni = Fa(uid="3b4eb8e74e7d11e6bff6d4bed973e64a", + name="while univariate", + description="While Loop (Univariate)", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="WhileUniVar", + inputs=(dict(name="InitValue", interface=None, value=None), + dict(name="Test", interface=IFunction, value=None), + dict(name="Function", interface=IFunction, value=None), + ), + outputs=(dict(name="Result", interface=None),), + ) __all__.append('whileuni') - -whilemulti = Factory(name="while multivariate", - description="While Loop (Multivariate)", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="WhileMultiVar", - inputs = (dict(name="InitValues", interface=ISequence, value=[]), - dict(name="Test", interface=IFunction, value=None), - dict(name="Functions", interface=IFunction, value=None), - ), - outputs = ( dict(name="Results", interface=ISequence), ), - ) - +whilemulti = Fa(uid="3b4eb8e84e7d11e6bff6d4bed973e64a", + name="while multivariate", + description="While Loop (Multivariate)", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="WhileMultiVar", + inputs=(dict(name="InitValues", interface=ISequence, value=[]), + dict(name="Test", interface=IFunction, value=None), + dict(name="Functions", interface=IFunction, value=None), + ), + outputs=(dict(name="Results", interface=ISequence),), + ) __all__.append('whilemulti') -whilemulti2 = Factory(name="while multivariate2", - description="While Loop (Multivariate)", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="while_multi2", - inputs = (dict(name="InitValues", interface=ISequence, value=[]), - dict(name="Test", interface=IFunction, value=None), - dict(name="Functions", interface=IFunction, value=None), - ), - outputs = ( dict(name="Results", interface=ISequence), ), - ) - +whilemulti2 = Fa(uid="3b4eb8e94e7d11e6bff6d4bed973e64a", + name="while multivariate2", + description="While Loop (Multivariate)", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="while_multi2", + inputs=(dict(name="InitValues", interface=ISequence, value=[]), + dict(name="Test", interface=IFunction, value=None), + dict(name="Functions", interface=IFunction, + value=None), + ), + outputs=(dict(name="Results", interface=ISequence),), + ) __all__.append('whilemulti2') -cmd = Factory(name=protected("command"), - description="Call a system command", - category="System", - nodemodule="openalea.core.system.systemnodes", - nodeclass="system_cmd", - inputs = (dict(name="commands", interface=ISequence, value=[], - desc='List of command strings'), - ), - outputs = ( dict(name="stdout", interface=None, desc='result'), - dict(name="stderr", interface=None, desc='result'), ), - ) - - +cmd = Fa(uid="3b4eb8ea4e7d11e6bff6d4bed973e64a", + name=protected("command"), + description="Call a system command", + category="System", + nodemodule="openalea.core.system.systemnodes", + nodeclass="system_cmd", + inputs=(dict(name="commands", interface=ISequence, value=[], + desc='List of command strings'), + ), + outputs=(dict(name="stdout", interface=None, desc='result'), + dict(name="stderr", interface=None, desc='result'),), + ) __all__.append('cmd') -_delay = Factory(name="delay", - description="Delay return the previous or an init value.", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="Delay", - inputs = (dict(name="init", interface=None), - dict(name="x", interface=None), - dict(name="reset", interface=IBool )), - outputs = ( dict(name="previous", interface=None), ), - lazy = False, - ) +_delay = Fa(uid="3b4eb8eb4e7d11e6bff6d4bed973e64a", + name="delay", + description="Delay return the previous or an init value.", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="Delay", + inputs=(dict(name="init", interface=None), + dict(name="x", interface=None), + dict(name="reset", interface=IBool)), + outputs=(dict(name="previous", interface=None),), + lazy=False, + ) __all__.append('_delay') - -_for = Factory(name="for", - description="for Loop (Univariate)", - category="flow control", - nodemodule="openalea.core.system.systemnodes", - nodeclass="For", - inputs = (dict(name="InitValue", interface=None, value=None), - dict(name="list", interface=ISequence, value=None), - dict(name="Function", interface=IFunction, value=None), - ), - outputs = ( dict(name="Result", interface=None), ), - ) +_for = Fa(uid="3c43d57e4e7d11e6bff6d4bed973e64a", + name="for", + description="for Loop (Univariate)", + category="flow control", + nodemodule="openalea.core.system.systemnodes", + nodeclass="For", + inputs=(dict(name="InitValue", interface=None, value=None), + dict(name="list", interface=ISequence, value=None), + dict(name="Function", interface=IFunction, value=None), + ), + outputs=(dict(name="Result", interface=None),), + ) __all__.append('_for') - From 7d9d4565006e3402083ea0b24cd242b013be7ace Mon Sep 17 00:00:00 2001 From: revesansparole Date: Tue, 26 Jul 2016 19:24:30 +0200 Subject: [PATCH 03/45] put provenance back into the system --- src/openalea/core/algo/dataflow_evaluation.py | 386 ++++++++++-------- src/openalea/core/compositenode.py | 290 +++++++------ 2 files changed, 369 insertions(+), 307 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index f3cf618b..630505f0 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -22,124 +22,133 @@ import sys from time import clock import traceback as tb -from openalea.core import ScriptLibrary +from openalea.provenance.simple_dict import Provenance as RVProvenance +from openalea.core import ScriptLibrary from openalea.core.dataflow import SubDataflow from openalea.core.interface import IFunction - -PROVENANCE = False - -# Implement provenance in OpenAlea -db_conn = None - -import sqlite3 -from openalea.core.path import path -from openalea.core import settings - -def db_create(cursor): - cur = cursor - #-prospective provenance-# - #User table creation - cur.execute("CREATE TABLE IF NOT EXISTS User (userid INTEGER,createtime DATETIME,name varchar (25), firstname varchar (25), email varchar (25), password varchar (25),PRIMARY KEY(userid))") - - # CompositeNode table creation - cur.execute("CREATE TABLE IF NOT EXISTS CompositeNode (CompositeNodeid INTEGER, creatime DATETIME, name varchar (25), description varchar (25),userid INTEGER,PRIMARY KEY(CompositeNodeid),FOREIGN KEY(userid) references User)") - #Cr?ation de la table Node - cur.execute("CREATE TABLE IF NOT EXISTS Node (Nodeid INTEGER, createtime DATETIME, name varchar (25), NodeFactory varchar (25),CompositeNodeid INTEGER,PRIMARY KEY(Nodeid),FOREIGN KEY(CompositeNodeid) references CompsiteNode)") - #Cr?ation de la table Input - cur.execute("CREATE TABLE IF NOT EXISTS Input (Inputid INTEGER, createtime DATETIME, name varchar (25), typedata varchar (25), InputPort INTEGER,PRIMARY KEY (Inputid))") - #Cr?ation de la table Output - cur.execute("CREATE TABLE IF NOT EXISTS Output (Outputid INTEGER, createtime DATETIME, name varchar (25), typedata varchar (25), OutputPort INTEGER,PRIMARY KEY (Outputid))") - #Cr?ation de la table elt_connection - cur.execute("CREATE TABLE IF NOT EXISTS elt_connection (elt_connectionid INTEGER, createtime DATETIME,srcNodeid INTEGER, srcNodeOutputPortid INTEGER, targetNodeid INTEGER, targetNodeInputPortid INTEGER ,PRIMARY KEY (elt_connectionid))") - - #- retrospective provenance -# - #- CompositeNodeExec table creation - cur.execute("CREATE TABLE IF NOT EXISTS CompositeNodeExec (CompositeNodeExecid INTEGER, createtime DATETIME, endtime DATETIME,userid INTEGER,CompositeNodeid INTEGER,PRIMARY KEY(CompositeNodeExecid),FOREIGN KEY(CompositeNodeid) references CompositeNode,FOREIGN KEY(userid) references User)") - #- NodeExec - cur.execute("CREATE TABLE IF NOT EXISTS NodeExec (NodeExecid INTEGER, createtime DATETIME, endtime DATETIME,Nodeid INTEGER,CompositeNodeExecid INTEGER,dataid INTEGER,PRIMARY KEY(NodeExecid),FOREIGN KEY(Nodeid) references Node, FOREIGN KEY (CompositeNodeExecid) references CompositeNodeExec, FOREIGN KEY (dataid) references Data)") - #- History - cur.execute("CREATE TABLE IF NOT EXISTS Histoire (Histoireid INTEGER, createtime DATETIME, name varchar (25), description varchar (25),userid INTEGER,CompositeNodeExecid INTEGER,PRIMARY KEY (Histoireid), FOREIGN KEY(Userid) references User, FOREIGN KEY(CompositeNodeExecid) references CompositeNodeExec)") - #- Data - cur.execute("CREATE TABLE IF NOT EXISTS Data (dataid INTEGER, createtime DATETIME,NodeExecid INTEGER, PRIMARY KEY(dataid),FOREIGN KEY(NodeExecid) references NodeExec)") - #- Tag - cur.execute("CREATE TABLE IF NOT EXISTS Tag (CompositeNodeExecid INTEGER, createtime DATETIME, name varchar(25),userid INTEGER,PRIMARY KEY(CompositeNodeExecid),FOREIGN KEY(userid) references User)") - return cur - -def get_database_name(): - db_fn = path(settings.get_openalea_home_dir())/'provenance.sq3' - return db_fn - -def db_connexion(): - """ Return a curso on the database. - - If the database does not exists, create it. - """ - global db_conn - if db_conn is None: - db_fn = get_database_name() - if not db_fn.exists(): - db_conn=sqlite3.connect(db_fn) - cur = db_conn.cursor() - cur = db_create(cur) - return cur - else: - cur = db_conn.cursor() - return cur - -class Provenance(object): - def __init__(self, workflow): - self.clear() - self.workflow = workflow - - def edges(self): - cn = self.workflow - edges= list(cn.edges()) - sources=map(cn.source,edges) - targets = map(cn.target,edges) - source_ports=[cn.local_id(cn.source_port(eid)) for eid in edges] - target_ports=[cn.local_id(cn.target_port(eid)) for eid in edges] - _edges = dict(zip(edges,zip(sources,source_ports,targets, target_ports))) - return _edges - - def clear(self): - self.nodes = [] - - def start_time(self): - pass - def end_time(self): - pass - def workflow_exec(self, *args): - pass - def node_exec(self, vid, node, start_time, end_time, *args): - pass - def write(self): - """ Write the provenance in db """ - -class PrintProvenance(Provenance): - def workflow_exec(self, *args): - print 'Workflow execution ', self.workflow.factory.name - def node_exec(self, vid, node, start_time, end_time, *args): - provenance(vid, node, start_time, end_time) +# PROVENANCE = False -def provenance(vid, node, start_time, end_time): - #from service import db - #conn = db.connect() - +# # Implement provenance in OpenAlea +# db_conn = None +# +# import sqlite3 +# from openalea.core.path import path +# from openalea.core import settings +# +# def db_create(cursor): +# cur = cursor +# #-prospective provenance-# +# #User table creation +# cur.execute("CREATE TABLE IF NOT EXISTS User (userid INTEGER,createtime DATETIME,name varchar (25), firstname varchar (25), email varchar (25), password varchar (25),PRIMARY KEY(userid))") +# +# # CompositeNode table creation +# cur.execute("CREATE TABLE IF NOT EXISTS CompositeNode (CompositeNodeid INTEGER, creatime DATETIME, name varchar (25), description varchar (25),userid INTEGER,PRIMARY KEY(CompositeNodeid),FOREIGN KEY(userid) references User)") +# #Cr?ation de la table Node +# cur.execute("CREATE TABLE IF NOT EXISTS Node (Nodeid INTEGER, createtime DATETIME, name varchar (25), NodeFactory varchar (25),CompositeNodeid INTEGER,PRIMARY KEY(Nodeid),FOREIGN KEY(CompositeNodeid) references CompsiteNode)") +# #Cr?ation de la table Input +# cur.execute("CREATE TABLE IF NOT EXISTS Input (Inputid INTEGER, createtime DATETIME, name varchar (25), typedata varchar (25), InputPort INTEGER,PRIMARY KEY (Inputid))") +# #Cr?ation de la table Output +# cur.execute("CREATE TABLE IF NOT EXISTS Output (Outputid INTEGER, createtime DATETIME, name varchar (25), typedata varchar (25), OutputPort INTEGER,PRIMARY KEY (Outputid))") +# #Cr?ation de la table elt_connection +# cur.execute("CREATE TABLE IF NOT EXISTS elt_connection (elt_connectionid INTEGER, createtime DATETIME,srcNodeid INTEGER, srcNodeOutputPortid INTEGER, targetNodeid INTEGER, targetNodeInputPortid INTEGER ,PRIMARY KEY (elt_connectionid))") +# +# #- retrospective provenance -# +# #- CompositeNodeExec table creation +# cur.execute("CREATE TABLE IF NOT EXISTS CompositeNodeExec (CompositeNodeExecid INTEGER, createtime DATETIME, endtime DATETIME,userid INTEGER,CompositeNodeid INTEGER,PRIMARY KEY(CompositeNodeExecid),FOREIGN KEY(CompositeNodeid) references CompositeNode,FOREIGN KEY(userid) references User)") +# #- NodeExec +# cur.execute("CREATE TABLE IF NOT EXISTS NodeExec (NodeExecid INTEGER, createtime DATETIME, endtime DATETIME,Nodeid INTEGER,CompositeNodeExecid INTEGER,dataid INTEGER,PRIMARY KEY(NodeExecid),FOREIGN KEY(Nodeid) references Node, FOREIGN KEY (CompositeNodeExecid) references CompositeNodeExec, FOREIGN KEY (dataid) references Data)") +# #- History +# cur.execute("CREATE TABLE IF NOT EXISTS Histoire (Histoireid INTEGER, createtime DATETIME, name varchar (25), description varchar (25),userid INTEGER,CompositeNodeExecid INTEGER,PRIMARY KEY (Histoireid), FOREIGN KEY(Userid) references User, FOREIGN KEY(CompositeNodeExecid) references CompositeNodeExec)") +# #- Data +# cur.execute("CREATE TABLE IF NOT EXISTS Data (dataid INTEGER, createtime DATETIME,NodeExecid INTEGER, PRIMARY KEY(dataid),FOREIGN KEY(NodeExecid) references NodeExec)") +# #- Tag +# cur.execute("CREATE TABLE IF NOT EXISTS Tag (CompositeNodeExecid INTEGER, createtime DATETIME, name varchar(25),userid INTEGER,PRIMARY KEY(CompositeNodeExecid),FOREIGN KEY(userid) references User)") +# return cur +# +# def get_database_name(): +# db_fn = path(settings.get_openalea_home_dir())/'provenance.sq3' +# return db_fn +# +# def db_connexion(): +# """ Return a curso on the database. +# +# If the database does not exists, create it. +# """ +# global db_conn +# if db_conn is None: +# db_fn = get_database_name() +# if not db_fn.exists(): +# db_conn=sqlite3.connect(db_fn) +# cur = db_conn.cursor() +# cur = db_create(cur) +# return cur +# else: +# cur = db_conn.cursor() +# return cur + +# class Provenance(object): +# def __init__(self, workflow): +# self.clear() +# self.workflow = workflow +# +# def edges(self): +# cn = self.workflow +# edges = list(cn.edges()) +# sources = map(cn.source, edges) +# targets = map(cn.target, edges) +# source_ports = [cn.local_id(cn.source_port(eid)) for eid in edges] +# target_ports = [cn.local_id(cn.target_port(eid)) for eid in edges] +# _edges = dict( +# zip(edges, zip(sources, source_ports, targets, target_ports))) +# return _edges +# +# def clear(self): +# self.nodes = [] +# +# def start_time(self): +# pass +# +# def end_time(self): +# pass +# +# def workflow_exec(self, *args): +# pass +# +# def node_exec(self, vid, node, start_time, end_time, *args): +# pass +# +# def write(self): +# """ Write the provenance in db """ - if PROVENANCE: - cur = db_connexion() - pname = node.factory.package.name - name = node.factory.name +# class PrintProvenance(Provenance): +# def workflow_exec(self, *args): +# print 'Workflow execution ', self.workflow.factory.name +# +# def node_exec(self, vid, node, start_time, end_time, *args): +# provenance(vid, node, start_time, end_time) +# +# +# def provenance(vid, node, start_time, end_time): +# # from service import db +# # conn = db.connect() +# +# +# if PROVENANCE: +# cur = db_connexion() +# +# pname = node.factory.package.name +# name = node.factory.name +# +# print "Provenance Process:" +# print "instance ID ", vid, "Package Name: ", pname, "Name: ", name +# print "start time :", start_time, "end_time: ", end_time, "duration : ", end_time - start_time +# print 'Inputs : ', node.inputs +# print 'outputs : ', node.outputs - print "Provenance Process:" - print "instance ID ", vid, "Package Name: ",pname, "Name: ", name - print "start time :", start_time, "end_time: ", end_time, "duration : ", end_time-start_time - print 'Inputs : ', node.inputs - print 'outputs : ', node.outputs # print the evaluation time # This variable has to be retrieve by the settings @@ -147,8 +156,8 @@ def provenance(vid, node, start_time, end_time): __evaluators__ = [] -class EvaluationException(Exception): +class EvaluationException(Exception): def __init__(self, vid, node, exception, exc_info): Exception.__init__(self) self.vid = vid @@ -180,8 +189,8 @@ def cmp_posx(x, y): """todo""" (xpid, xvid, xactor) = x (ypid, yvid, yactor) = y - #px = xactor.internal_data.get('posx', 0) - #py = yactor.internal_data.get('posx', 0) + # px = xactor.internal_data.get('posx', 0) + # py = yactor.internal_data.get('posx', 0) px = xactor.get_ad_hoc_dict().get_metadata('position')[0] py = yactor.get_ad_hoc_dict().get_metadata('position')[0] @@ -196,15 +205,20 @@ def cmp_posx(x, y): """ Abstract evaluation algorithm """ -class AbstractEvaluation(object): - def __init__(self, dataflow): +class AbstractEvaluation(object): + def __init__(self, dataflow, record_provenance=False): """ :param dataflow: to be done """ self._dataflow = dataflow - if PROVENANCE: - self.provenance = PrintProvenance(dataflow) + # if PROVENANCE: + # self.provenance = PrintProvenance(dataflow) + + if record_provenance: + self._prov = RVProvenance() + else: + self._prov = None def eval(self, *args): """todo""" @@ -223,14 +237,25 @@ def eval_vertex_code(self, vid): node = self._dataflow.actor(vid) try: + # prov before + # print "prov", node.get_caption() + + if self._prov is not None: + self._prov.before_eval(self._dataflow, vid) + t0 = clock() ret = node.eval() t1 = clock() + # prov before + # print "prov", node.get_caption() + + if self._prov is not None: + self._prov.before_eval(self._dataflow, vid) + + # if PROVENANCE: + # self.provenance.node_exec(vid, node, t0, t1) + # # provenance(vid, node, t0,t1) - if PROVENANCE: - self.provenance.node_exec(vid, node, t0,t1) - #provenance(vid, node, t0,t1) - # When an exception is raised, a flag is set. # So we remove it when evaluation is ok. node.raise_exception = False @@ -252,8 +277,7 @@ def eval_vertex_code(self, vid): node.raise_exception = True node.notify_listeners(('data_modified', None, None)) raise EvaluationException(vid, node, e, \ - tb.format_tb(sys.exc_info()[2])) - + tb.format_tb(sys.exc_info()[2])) def get_parent_nodes(self, pid): """ @@ -266,7 +290,7 @@ def get_parent_nodes(self, pid): # For each connected node npids = [(npid, df.vertex(npid), df.actor(df.vertex(npid))) \ - for npid in df.connected_ports(pid)] + for npid in df.connected_ports(pid)] npids.sort(cmp=cmp_posx) return npids @@ -274,13 +298,14 @@ def get_parent_nodes(self, pid): def set_provenance(self, provenance): self.provenance = provenance + class BrutEvaluation(AbstractEvaluation): """ Basic evaluation algorithm """ __evaluators__.append("BrutEvaluation") - def __init__(self, dataflow): + def __init__(self, dataflow, record_provenance=False): - AbstractEvaluation.__init__(self, dataflow) + AbstractEvaluation.__init__(self, dataflow, record_provenance) # a property to specify if the node has already been evaluated self._evaluated = set() @@ -294,7 +319,8 @@ def is_stopped(self, vid, actor): if actor.block: status = True n = actor.get_nb_output() - outputs = [i for i in range(n) if actor.get_output(i) is not None ] + outputs = [i for i in range(n) if + actor.get_output(i) is not None] if not outputs: status = False return status @@ -341,12 +367,12 @@ def eval(self, *args): self._evaluated.clear() # Eval from the leaf - for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid)==0): + for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): self.eval_vertex(vid) t1 = clock() if quantify: - print "Evaluation time: %s"%(t1-t0) + print "Evaluation time: %s" % (t1 - t0) class PriorityEvaluation(BrutEvaluation): @@ -357,7 +383,8 @@ def eval(self, vtx_id=None, *args, **kwds): """todo""" t0 = clock() - is_subdataflow = False if not kwds else kwds.get('is_subdataflow', False) + is_subdataflow = False if not kwds else kwds.get('is_subdataflow', + False) df = self._dataflow # Unvalidate all the nodes if is_subdataflow: @@ -370,7 +397,7 @@ def eval(self, vtx_id=None, *args, **kwds): # Select the leaves (list of (vid, actor)) leaves = [(vid, df.actor(vid)) - for vid in df.vertices() if df.nb_out_edges(vid)==0] + for vid in df.vertices() if df.nb_out_edges(vid) == 0] leaves.sort(cmp_priority) @@ -380,19 +407,19 @@ def eval(self, vtx_id=None, *args, **kwds): t1 = clock() if quantify: - print "Evaluation time: %s"%(t1-t0) + print "Evaluation time: %s" % (t1 - t0) class GeneratorEvaluation(AbstractEvaluation): """ Evaluation algorithm with generator / priority and selection""" __evaluators__.append("GeneratorEvaluation") - def __init__(self, dataflow): + def __init__(self, dataflow, record_provenance=False): - AbstractEvaluation.__init__(self, dataflow) + AbstractEvaluation.__init__(self, dataflow, record_provenance) # a property to specify if the node has already been evaluated self._evaluated = set() - self.reeval = False # Flag to force reevaluation (for generator) + self.reeval = False # Flag to force reevaluation (for generator) def is_stopped(self, vid, actor): """ Return True if evaluation must be stop at this vertex """ @@ -454,7 +481,7 @@ def eval(self, vtx_id=None, step=False): else: # Select the leafs (list of (vid, actor)) leafs = [(vid, df.actor(vid)) - for vid in df.vertices() if df.nb_out_edges(vid)==0] + for vid in df.vertices() if df.nb_out_edges(vid) == 0] leafs.sort(cmp_priority) @@ -462,25 +489,24 @@ def eval(self, vtx_id=None, step=False): for vid, actor in leafs: if not self.is_stopped(vid, actor): self.reeval = True - while(self.reeval): + while (self.reeval): self.clear() self.eval_vertex(vid) t1 = clock() if quantify: - print "Evaluation time: %s"%(t1-t0) + print "Evaluation time: %s" % (t1 - t0) return False - class LambdaEvaluation(PriorityEvaluation): """ Evaluation algorithm with support of lambda / priority and selection""" __evaluators__.append("LambdaEvaluation") - def __init__(self, dataflow): - PriorityEvaluation.__init__(self, dataflow) + def __init__(self, dataflow, record_provenance=False): + PriorityEvaluation.__init__(self, dataflow, record_provenance) - self.lambda_value = {} # lambda resolution dictionary + self.lambda_value = {} # lambda resolution dictionary self._resolution_node = set() def eval_vertex(self, vid, context, lambda_value, *args): @@ -528,7 +554,7 @@ def eval_vertex(self, vid, context, lambda_value, *args): transmit_cxt = context transmit_lambda = lambda_value - cpt = 0 # parent counter + cpt = 0 # parent counter # For each connected node for npid, nvid, nactor in self.get_parent_nodes(pid): @@ -548,7 +574,7 @@ def eval_vertex(self, vid, context, lambda_value, *args): # replace the lambda with value. if (isinstance(outval, SubDataflow) - and interface is not IFunction): + and interface is not IFunction): if (not context and not lambda_value): # we are not in resolution mode @@ -566,7 +592,8 @@ def eval_vertex(self, vid, context, lambda_value, *args): try: lambda_value[outval] = context.pop() except Exception: - raise Exception("The number of lambda variables is insuffisant") + raise Exception( + "The number of lambda variables is insuffisant") # We replace the value with a context value outval = lambda_value[outval] @@ -597,9 +624,14 @@ def eval(self, vtx_id=None, context=None, is_subdataflow=False, step=False): :param context: list a value to assign to lambda variables """ t0 = clock() - if PROVENANCE and (not is_subdataflow): - self.provenance.workflow_exec() - self.provenance.start_time() + if self._prov is not None: + self._prov.workflow = id(self._dataflow) + self._prov.init(self._dataflow) + self._prov.time_init = t0 + + # if PROVENANCE and (not is_subdataflow): + # self.provenance.workflow_exec() + # self.provenance.start_time() self.lambda_value.clear() @@ -608,22 +640,28 @@ def eval(self, vtx_id=None, context=None, is_subdataflow=False, step=False): # thus, we have to reverse the arguments to evaluate the function (FIFO). context.reverse() - PriorityEvaluation.eval(self, vtx_id, context, self.lambda_value, is_subdataflow=is_subdataflow) - self.lambda_value.clear() # do not keep context in memory - - if PROVENANCE: - self.provenance.end_time() + PriorityEvaluation.eval(self, vtx_id, context, self.lambda_value, + is_subdataflow=is_subdataflow) + self.lambda_value.clear() # do not keep context in memory + + # if PROVENANCE: + # self.provenance.end_time() t1 = clock() + if self._prov is not None: + self._prov.time_end = t1 + if quantify: - print "Evaluation time: %s"%(t1-t0) + print "Evaluation time: %s" % (t1 - t0) if not is_subdataflow: self._resolution_node.clear() DefaultEvaluation = LambdaEvaluation -#DefaultEvaluation = GeneratorEvaluation + + +# DefaultEvaluation = GeneratorEvaluation # from collections import deque @@ -896,11 +934,12 @@ def eval(self, *args, **kwds): # Eval from the leaf script = "" - for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid)==0): + for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): script += self.eval_vertex(vid) return script + ############################################################################ # Evaluation with scheduling @@ -915,7 +954,7 @@ def __init__(self, dataflow): AbstractEvaluation.__init__(self, dataflow) # a property to specify if the node has already been evaluated self._evaluated = set() - self.reeval = False # Flag to force reevaluation (for generator) + self.reeval = False # Flag to force reevaluation (for generator) # CPL # At each evaluation of the dataflow, increase the current cycle of @@ -932,7 +971,7 @@ def is_stopped(self, vid, actor): """ Return True if evaluation must be stop at this vertex """ stopped = False try: - if hasattr(actor,'block'): + if hasattr(actor, 'block'): stopped = actor.block stopped = stopped or vid in self._evaluated except: @@ -956,7 +995,7 @@ def next_step(self): def eval_vertex(self, vid): """ Evaluate the vertex vid """ - #print "Step ", self._current_cycle + # print "Step ", self._current_cycle df = self._dataflow actor = df.actor(vid) @@ -998,7 +1037,6 @@ def eval_vertex(self, vid): if delay == 0: delay = self.eval_vertex_code(vid) - # Reevaluation flag # TODO: Add the node to the scheduler rather to execute if (delay): @@ -1023,7 +1061,7 @@ def eval(self, vtx_id=None, step=False): else: # Select the leafs (list of (vid, actor)) leafs = [(vid, df.actor(vid)) - for vid in df.vertices() if df.nb_out_edges(vid)==0] + for vid in df.vertices() if df.nb_out_edges(vid) == 0] leafs.sort(cmp_priority) @@ -1032,7 +1070,7 @@ def eval(self, vtx_id=None, step=False): if not self.is_stopped(vid, actor): self.reeval = True if not step: - while(self.reeval and not self._stop): + while (self.reeval and not self._stop): self.clear() self.eval_vertex(vid) self.next_step() @@ -1046,7 +1084,7 @@ def eval(self, vtx_id=None, step=False): for vid in self._nodes_to_reset: df.actor(vid).reset() - #print 'Run %d times the dataflow'%(self._current_cycle,) + # print 'Run %d times the dataflow'%(self._current_cycle,) # Reset the state if not step: @@ -1055,7 +1093,7 @@ def eval(self, vtx_id=None, step=False): t1 = clock() if quantify: - print "Evaluation time: %s"%(t1-t0) + print "Evaluation time: %s" % (t1 - t0) return False @@ -1083,17 +1121,16 @@ def is_operator(actor): if 'SciFloware' not in factory.package.name: return False elif factory.name in algebra: - return True + return True else: return False - + def scifloware_actors(self): """ Compute the scifloware actors. Only those actors will be evaluated. """ - df = self._dataflow self._scifloware_actors.clear() for vid in df.vertices(): @@ -1101,7 +1138,6 @@ def scifloware_actors(self): if self.is_operator(actor): self._scifloware_actors.add(vid) - def eval_vertex(self, vid): """ Evaluate the vertex vid @@ -1117,7 +1153,7 @@ def eval_vertex(self, vid): """ - #print "Step ", self._current_cycle + # print "Step ", self._current_cycle df = self._dataflow actor = df.actor(vid) @@ -1125,7 +1161,7 @@ def eval_vertex(self, vid): is_op = vid in self._scifloware_actors self._evaluated.add(vid) - #assert self.is_operator(actor) + # assert self.is_operator(actor) # For each inputs # Compute the nodes @@ -1142,17 +1178,18 @@ def eval_vertex(self, vid): out_ports = list(df.connected_ports(pid)) nb_out = len(out_ports) if nb_out > 1: - raise Exception('Too many nodes connected to the SciFloware operator.') + raise Exception( + 'Too many nodes connected to the SciFloware operator.') elif nb_out == 1: out_actor = df.actor(df.vertex(out_ports[0])) - dataflow_name = out_actor.factory.package.name+':'+out_actor.factory.name + dataflow_name = out_actor.factory.package.name + ':' + out_actor.factory.name actor.set_input(df.local_id(pid), dataflow_name) else: cpt = 0 # For each connected node for npid, nvid, nactor in self.get_parent_nodes(pid): # Do no reevaluate the same node - + if not self.is_stopped(nvid, nactor): self.eval_vertex(nvid) @@ -1168,7 +1205,6 @@ def eval_vertex(self, vid): self.eval_vertex_code(vid) - def eval(self, vtx_id=None, **kwds): t0 = clock() @@ -1180,7 +1216,7 @@ def eval(self, vtx_id=None, **kwds): else: # Select the leafs (list of (vid, actor)) leafs = [(vid, df.actor(vid)) - for vid in df.vertices() if df.nb_out_edges(vid)==0] + for vid in df.vertices() if df.nb_out_edges(vid) == 0] leafs.sort(cmp_priority) @@ -1190,6 +1226,6 @@ def eval(self, vtx_id=None, **kwds): t1 = clock() if quantify: - print "Evaluation time: %s"%(t1-t0) + print "Evaluation time: %s" % (t1 - t0) return False diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index 0cd7109f..e350b5de 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -38,13 +38,13 @@ quantify = False + class IncompatibleNodeError(Exception): """todo""" pass class CompositeNodeFactory(AbstractFactory): - mimetype = "openalea/compositenodefactory" """ @@ -120,7 +120,7 @@ def copy(self, **args): for k, v in ret.elt_factory.iteritems(): pkg_id, factory_id = v - if(pkg_id == old_pkg.get_id()): + if (pkg_id == old_pkg.get_id()): pkg_id = new_pkg.get_id() ret.elt_factory[k] = pkg_id, factory_id @@ -153,7 +153,7 @@ def instantiate(self, call_stack=None): new_df.set_caption(self.get_id()) new_df.eval_algo = self.eval_algo - cont_eval = set() # continuous evaluated nodes + cont_eval = set() # continuous evaluated nodes # Instantiate the node with each factory for vid in self.elt_factory: @@ -161,7 +161,7 @@ def instantiate(self, call_stack=None): node = self.instantiate_node(vid, call_stack) # Manage continuous eval - if(node.user_application): + if (node.user_application): cont_eval.add(vid) except (UnknownNodeError, UnknownPackageError): @@ -171,7 +171,7 @@ def instantiate(self, call_stack=None): print "-> Cannot find '%s:%s'" % (pkg, fact) node = self.create_fake_node(vid) node.raise_exception = True - node.notify_listeners(('data_modified', None, None )) + node.notify_listeners(('data_modified', None, None)) new_df.add_node(node, vid, False) @@ -179,10 +179,12 @@ def instantiate(self, call_stack=None): try: self.load_ad_hoc_data(new_df.node(new_df.id_in), copy.deepcopy(self.elt_data["__in__"]), - copy.deepcopy(self.elt_ad_hoc.get("__in__", None))) + copy.deepcopy( + self.elt_ad_hoc.get("__in__", None))) self.load_ad_hoc_data(new_df.node(new_df.id_out), copy.deepcopy(self.elt_data["__out__"]), - copy.deepcopy(self.elt_ad_hoc.get("__out__", None))) + copy.deepcopy( + self.elt_ad_hoc.get("__out__", None))) except: pass @@ -191,23 +193,23 @@ def instantiate(self, call_stack=None): (source_vid, source_port, target_vid, target_port) = link # Replace id for in and out nodes - if(source_vid == '__in__'): + if (source_vid == '__in__'): source_vid = new_df.id_in - if(target_vid == '__out__'): + if (target_vid == '__out__'): target_vid = new_df.id_out new_df.connect(source_vid, source_port, target_vid, target_port) # Set continuous evaluation for vid in cont_eval: - new_df.set_continuous_eval(vid, True) + new_df.set_continuous_eval(vid, True) # Set call stack to its original state call_stack.pop() # Properties new_df.lazy = self.lazy - new_df.graph_modified = False # Graph is not modifyied + new_df.graph_modified = False # Graph is not modifyied return new_df @@ -222,41 +224,40 @@ def create_fake_node(self, vid): for eid, link in self.connections.iteritems(): (source_vid, source_port, target_vid, target_port) = link - if(source_vid == vid): + if (source_vid == vid): outs = max(outs, source_port) - elif(target_vid == vid): + elif (target_vid == vid): ins = max(ins, target_port) node = Node() attributes = copy.deepcopy(self.elt_data[vid]) - ad_hoc = copy.deepcopy(self.elt_ad_hoc.get(vid, None)) + ad_hoc = copy.deepcopy(self.elt_ad_hoc.get(vid, None)) self.load_ad_hoc_data(node, attributes, ad_hoc) # copy node input data if any values = copy.deepcopy(self.elt_value.get(vid, ())) - for p in range(ins+1): - port = node.add_input(name="In"+str(p)) + for p in range(ins + 1): + port = node.add_input(name="In" + str(p)) - for p in range(outs+1): - port = node.add_output(name="Out"+str(p)) + for p in range(outs + 1): + port = node.add_output(name="Out" + str(p)) for vs in values: try: - #the two first elements are the historical - #values : port Id and port value - #beyond that are extensions added by gengraph: - #the ad_hoc_dict representation is third. + # the two first elements are the historical + # values : port Id and port value + # beyond that are extensions added by gengraph: + # the ad_hoc_dict representation is third. port, v = vs[:2] node.set_input(port, eval(v)) - if(len(vs)>2): + if (len(vs) > 2): d = MetaDataDict(vs[2]) node.input_desc[port].get_ad_hoc_dict().update(d) except: continue - return node def paste(self, cnode, data_modifiers=[], call_stack=None, meta=False): @@ -282,8 +283,8 @@ def paste(self, cnode, data_modifiers=[], call_stack=None, meta=False): # Apply modifiers (if callable) for (key, func) in data_modifiers: try: - if(callable(func)): - if(meta): + if (callable(func)): + if (meta): func(n) else: n.internal_data[key] = func(n.internal_data[key]) @@ -309,31 +310,33 @@ def paste(self, cnode, data_modifiers=[], call_stack=None, meta=False): def load_ad_hoc_data(self, node, elt_data, elt_ad_hoc=None): if elt_ad_hoc and len(elt_ad_hoc): - #reading 0.8+ files. + # reading 0.8+ files. d = MetaDataDict(dict=elt_ad_hoc) node.get_ad_hoc_dict().update(d) else: - #extracting ad hoc data from old files. - #we parse the Node class' __ad_hoc_from_old_map__ - #which defines conversions between new ad_hoc_dict keywords - #and old internal_data keywords. - #These dictionnaries are used to extend ad_hoc_dict of a node with the - #data that views expect. See node.initialise_standard_metadata() for an example. + # extracting ad hoc data from old files. + # we parse the Node class' __ad_hoc_from_old_map__ + # which defines conversions between new ad_hoc_dict keywords + # and old internal_data keywords. + # These dictionnaries are used to extend ad_hoc_dict of a node with the + # data that views expect. See node.initialise_standard_metadata() for an example. if hasattr(node, "__ad_hoc_from_old_map__"): for newKey, oldKeys in node.__ad_hoc_from_old_map__.iteritems(): - data = [] #list that stores the new values + data = [] # list that stores the new values _type, default = node.__ad_hoc_slots__.get(newKey) for key in oldKeys: data.append(elt_data.pop(key, None)) - if len(data) == 1 : data = data[0] - if data is None or (isinstance(data, list) and None in data): #? + if len(data) == 1: data = data[0] + if data is None or ( + isinstance(data, list) and None in data): # ? data = default - if data is None : continue + if data is None: continue node.get_ad_hoc_dict().set_metadata(newKey, _type(data)) - #finally put the internal data (elt_data) where it has always been expected. + # finally put the internal data (elt_data) where it has always been expected. node._init_internal_data(elt_data) -# node.internal_data.update(elt_data) + + # node.internal_data.update(elt_data) def instantiate_node(self, vid, call_stack=None): """ Partial instantiation @@ -349,7 +352,7 @@ def instantiate_node(self, vid, call_stack=None): node = factory.instantiate(call_stack) attributes = copy.deepcopy(self.elt_data[vid]) - ad_hoc = copy.deepcopy(self.elt_ad_hoc.get(vid, None)) + ad_hoc = copy.deepcopy(self.elt_ad_hoc.get(vid, None)) self.load_ad_hoc_data(node, attributes, ad_hoc) # copy node input data if any @@ -357,13 +360,14 @@ def instantiate_node(self, vid, call_stack=None): for vs in values: try: - #the two first elements are the historical - #values : port Id and port value - #the values beyond are not used. + # the two first elements are the historical + # values : port Id and port value + # the values beyond are not used. port, v = vs[:2] node.set_input(port, eval(v)) node.input_desc[port].get_ad_hoc_dict().set_metadata("hide", - node.is_port_hidden(port)) + node.is_port_hidden( + port)) except: continue @@ -373,7 +377,7 @@ def instantiate_node(self, vid, call_stack=None): # This shouldn't be here, it is related to visual stuff # ######################################################### def instantiate_widget(self, node=None, parent=None, \ - edit=False, autonomous=False): + edit=False, autonomous=False): """ Return the corresponding widget initialised with node @@ -381,11 +385,11 @@ def instantiate_widget(self, node=None, parent=None, \ widget composed with the node sub widget is returned. """ - if(edit): + if (edit): from openalea.visualea.dataflowview import GraphicalGraph return GraphicalGraph(node).create_view(parent) - if(node == None): + if (node == None): node = self.instantiate() from openalea.visualea.compositenode_widget import DisplayGraphWidget @@ -450,23 +454,23 @@ def set_io(self, inputs, outputs): # I/O ports # Remove node if nb of input has changed - if(self.id_in is not None - and len(inputs) != self.node(self.id_in).get_nb_output()): + if (self.id_in is not None + and len(inputs) != self.node(self.id_in).get_nb_output()): self.remove_node(self.id_in) self.id_in = None - if(self.id_out is not None - and len(outputs) != self.node(self.id_out).get_nb_input()): + if (self.id_out is not None + and len(outputs) != self.node(self.id_out).get_nb_input()): self.remove_node(self.id_out) self.id_out = None # Create new io node if necessary - if(self.id_in is None): + if (self.id_in is None): self.id_in = self.add_node(CompositeNodeInput(inputs)) else: self.node(self.id_in).set_io((), inputs) - if(self.id_out is None): + if (self.id_out is None): self.id_out = self.add_node(CompositeNodeOutput(outputs)) else: self.node(self.id_out).set_io(outputs, ()) @@ -491,28 +495,29 @@ def set_output(self, index_key, val): return self.node(self.id_out).set_output(index_key, val) - def get_eval_algo(self): + def get_eval_algo(self, record_provenance=False): """ Return the evaluation algo instance """ try: algo_str = self.eval_algo - algo_str = algo_str.strip('"'); + algo_str = algo_str.strip('"') algo_str = algo_str.strip("'") - # import module baseimp = "algo.dataflow_evaluation" module = __import__(baseimp, globals(), locals(), [algo_str]) classobj = module.__dict__[algo_str] - return classobj(self) + return classobj(self, record_provenance=record_provenance) except Exception, e: - from openalea.core.algo.dataflow_evaluation import DefaultEvaluation + from openalea.core.algo.dataflow_evaluation import \ + DefaultEvaluation return DefaultEvaluation(self) return self.eval_algo - def eval_as_expression(self, vtx_id=None, step=False): + def eval_as_expression(self, vtx_id=None, step=False, + record_provenance=False): """ Evaluate a vtx_id @@ -520,21 +525,25 @@ def eval_as_expression(self, vtx_id=None, step=False): """ import time t0 = time.time() - if(self.evaluating): + if self.evaluating: return - if(vtx_id != None): + if vtx_id is not None: self.node(vtx_id).modified = True - algo = self.get_eval_algo() + algo = self.get_eval_algo(record_provenance) try: self.evaluating = True - algo.eval(vtx_id,step=step) + algo.eval(vtx_id, step=step) finally: self.evaluating = False t1 = time.time() if quantify: - logger.info('Evaluation time: %s'%(t1-t0)) - print 'Evaluation time: %s'%(t1-t0) + logger.info('Evaluation time: %s' % (t1 - t0)) + print 'Evaluation time: %s' % (t1 - t0) + + if record_provenance: + return algo._prov + # Functions used by the node evaluator def eval(self, *args, **kwds): @@ -555,14 +564,14 @@ def __call__(self, inputs=()): Evaluate the graph """ - if(self.id_out and self.get_nb_output()>0): + if (self.id_out and self.get_nb_output() > 0): self.eval_as_expression(self.id_out) else: self.eval_as_expression(None) return () - def to_script (self) : + def to_script(self): """Translate the dataflow into a python script. """ from algo.dataflow_evaluation import ToScriptEvaluation @@ -585,16 +594,16 @@ def compute_external_io(self, vertex_selection, new_vid): self._compute_inout_connection(vertex_selection, is_input=False) in_edges = \ - self._compute_outside_connection(vertex_selection, in_edges, - new_vid, is_input=True) + self._compute_outside_connection(vertex_selection, in_edges, + new_vid, is_input=True) out_edges = \ self._compute_outside_connection(vertex_selection, out_edges, - new_vid, is_input=False) + new_vid, is_input=False) return in_edges + out_edges def _compute_outside_connection(self, vertex_selection, new_connections, - new_vid, is_input = True): + new_vid, is_input=True): """ Return external connections of a composite node with input and output ports. @@ -625,7 +634,7 @@ def _compute_outside_connection(self, vertex_selection, new_connections, for edge in new_connections: if is_input: - if(edge[0] != '__in__'): + if (edge[0] != '__in__'): continue target_id, target_port = edge[2:] if (target_id, target_port) in selected_port: @@ -635,7 +644,7 @@ def _compute_outside_connection(self, vertex_selection, new_connections, port_id = self.local_id(self.source_port(e)) connections.append((vid, port_id, new_vid, edge[1])) else: - if(edge[2] != '__out__'): + if (edge[2] != '__out__'): continue source_id, source_port = edge[0:2] @@ -690,7 +699,7 @@ def _compute_inout_connection(self, vertex_selection, is_input=True): n = self.node(vid) desc = dict(io_desc(n)[pname]) - caption= '(%s)' % (n.get_caption()) + caption = '(%s)' % (n.get_caption()) count = '' name = desc['name'] @@ -719,7 +728,7 @@ def _compute_inout_connection(self, vertex_selection, is_input=True): desc['value'] = desc['interface'].default() connections.append(('__in__', len(nodes), vid, pname)) - else: # output + else: # output connections.append((vid, pname, '__out__', len(nodes))) nodes.append(desc) @@ -734,7 +743,6 @@ def compute_io(self, v_list=None): v_list is a vertex id list """ - ins, in_edges = self._compute_inout_connection(v_list, is_input=True) outs, out_edges = \ self._compute_inout_connection(v_list, is_input=False) @@ -742,7 +750,7 @@ def compute_io(self, v_list=None): return (ins, outs, connections) - def to_factory(self, sgfactory, listid = None, auto_io=False): + def to_factory(self, sgfactory, listid=None, auto_io=False): """ Update CompositeNodeFactory to fit with the graph @@ -757,9 +765,9 @@ def to_factory(self, sgfactory, listid = None, auto_io=False): # Properties sgfactory.lazy = self.lazy sgfactory.eval_algo = self.eval_algo - #print self.eval_algo + # print self.eval_algo # I / O - if(auto_io): + if (auto_io): (ins, outs, sup_connect) = self.compute_io(listid) sgfactory.inputs = ins sgfactory.outputs = outs @@ -777,11 +785,11 @@ def to_factory(self, sgfactory, listid = None, auto_io=False): src = self.source(eid) tgt = self.target(eid) - if((src not in listid) or (tgt not in listid)): + if ((src not in listid) or (tgt not in listid)): continue - if(src == self.id_in): + if (src == self.id_in): src = '__in__' - if(tgt == self.id_out): + if (tgt == self.id_out): tgt = '__out__' source_port = self.local_id(self.source_port(eid)) @@ -800,9 +808,9 @@ def to_factory(self, sgfactory, listid = None, auto_io=False): kdata = node.internal_data # Do not copy In and Out - if(vid == self.id_in): + if (vid == self.id_in): vid = "__in__" - elif(vid == self.id_out): + elif (vid == self.id_out): vid = "__out__" else: pkg_id = node.factory.package.get_id() @@ -815,10 +823,11 @@ def to_factory(self, sgfactory, listid = None, auto_io=False): # We do the exact opposite than in load_ad_hoc_data, have a look there. if hasattr(node, "__ad_hoc_from_old_map__"): for newKey, oldKeys in node.__ad_hoc_from_old_map__.iteritems(): - if len(oldKeys)==0: continue + if len(oldKeys) == 0: continue data = node.get_ad_hoc_dict().get_metadata(newKey) for pos, newKey in enumerate(oldKeys): - sgfactory.elt_data[vid][newKey] = data[pos] if isinstance(data, list) else data + sgfactory.elt_data[vid][newKey] = data[ + pos] if isinstance(data, list) else data # Copy ad_hoc data sgfactory.elt_ad_hoc[vid] = copy.deepcopy(node.get_ad_hoc_dict()) @@ -835,10 +844,10 @@ def to_factory(self, sgfactory, listid = None, auto_io=False): self.graph_modified = False # Set node factory if all node have been exported - if(listid is None): + if (listid is None): self.factory = sgfactory - def add_node(self, node, vid = None, modify=True): + def add_node(self, node, vid=None, modify=True): """ Add a node in the Graph with a particular id if id is None, autogenrate one @@ -863,9 +872,9 @@ def add_node(self, node, vid = None, modify=True): self.set_actor(vid, node) self.notify_vertex_addition(node, vid) - #self.id_cpt += 1 - if(modify): - self.notify_listeners(("graph_modified", )) + # self.id_cpt += 1 + if (modify): + self.notify_listeners(("graph_modified",)) self.graph_modified = True return vid @@ -873,24 +882,30 @@ def add_node(self, node, vid = None, modify=True): def notify_vertex_addition(self, vertex, vid=None): vtype = "vertex" doNotify = True - if(vertex.__class__.__dict__.has_key("__graphitem__")): vtype = "annotation" + if (vertex.__class__.__dict__.has_key("__graphitem__")): + vtype = "annotation" elif isinstance(vertex, CompositeNodeOutput): vtype = "outNode" doNotify = True if len(vertex.input_desc) else False - elif isinstance(vertex, CompositeNodeInput) : + elif isinstance(vertex, CompositeNodeInput): vtype = "inNode" doNotify = True if len(vertex.output_desc) else False - else: pass + else: + pass if doNotify: self.notify_listeners(("vertex_added", (vtype, vertex))) def notify_vertex_removal(self, vertex): vtype = "vertex" doNotify = True - if(vertex.__class__.__dict__.has_key("__graphitem__")): vtype = "annotation" - elif isinstance(vertex, CompositeNodeOutput): vtype = "outNode" - elif isinstance(vertex, CompositeNodeInput) : vtype = "inNode" - else: pass + if (vertex.__class__.__dict__.has_key("__graphitem__")): + vtype = "annotation" + elif isinstance(vertex, CompositeNodeOutput): + vtype = "outNode" + elif isinstance(vertex, CompositeNodeInput): + vtype = "inNode" + else: + pass self.notify_listeners(("vertex_removed", (vtype, vertex))) def remove_node(self, vtx_id): @@ -899,12 +914,14 @@ def remove_node(self, vtx_id): :param vtx_id: element id """ node = self.node(vtx_id) - if vtx_id == self.id_in : self.id_in = None - elif vtx_id == self.id_out : self.id_out = None + if vtx_id == self.id_in: + self.id_in = None + elif vtx_id == self.id_out: + self.id_out = None self.remove_vertex(vtx_id) node.close() self.notify_vertex_removal(node) - self.notify_listeners(("graph_modified", )) + self.notify_listeners(("graph_modified",)) self.graph_modified = True def remove_edge(self, eid): @@ -915,9 +932,9 @@ def remove_edge(self, eid): port = None DataFlow.remove_edge(self, eid) if port: - self.actor(port._vid).set_input_state(port._local_pid, "disconnected") - self.notify_listeners(("edge_removed", ("default",eid) )) - + self.actor(port._vid).set_input_state(port._local_pid, + "disconnected") + self.notify_listeners(("edge_removed", ("default", eid))) def simulate_destruction_notifications(self): """emits messages as if we were adding elements to @@ -931,7 +948,7 @@ def simulate_destruction_notifications(self): for eid in self.edges(): (src_id, dst_id) = self.source(eid), self.target(eid) - etype=None + etype = None src_port_id = self.local_id(self.source_port(eid)) dst_port_id = self.local_id(self.target_port(eid)) @@ -940,10 +957,10 @@ def simulate_destruction_notifications(self): src_port = nodeSrc.output_desc[src_port_id] dst_port = nodeDst.input_desc[dst_port_id] - #don't notify if the edge is connected to the input or - #output nodes. + # don't notify if the edge is connected to the input or + # output nodes. # if(src_id == self.id_in or dst_id == self.id_out): - # continue + # continue edgedata = "default", eid self.notify_listeners(("edge_removed", edgedata)) @@ -962,11 +979,12 @@ def connect(self, src_id, port_src, dst_id, port_dst): target_pid = self.in_port(dst_id, port_dst) eid = DataFlow.connect(self, source_pid, target_pid) except: - logger.error("Enable to create the edge %s %d %d %d %d"%( self.factory.name, src_id, port_src, dst_id, port_dst)) + logger.error("Enable to create the edge %s %d %d %d %d" % ( + self.factory.name, src_id, port_src, dst_id, port_dst)) return self.actor(dst_id).set_input_state(port_dst, "connected") - self.notify_listeners(("connection_modified", )) + self.notify_listeners(("connection_modified",)) self.graph_modified = True self.update_eval_listeners(src_id) @@ -976,8 +994,8 @@ def connect(self, src_id, port_src, dst_id, port_dst): dst_port = nodeDst.input_desc[port_dst] edgedata = "default", eid, src_port, dst_port - #connected ports cannot be hidden: - #nodeSrc.set_port_hidden(port_src, False) + # connected ports cannot be hidden: + # nodeSrc.set_port_hidden(port_src, False) nodeDst.set_port_hidden(port_dst, False) self.notify_listeners(("edge_added", edgedata)) @@ -996,10 +1014,10 @@ def disconnect(self, src_id, port_src, dst_id, port_dst): for eid in self.connected_edges(source_pid): if self.target_port(eid) == target_pid: - self.notify_listeners(("edge_removed", ("default",eid))) + self.notify_listeners(("edge_removed", ("default", eid))) self.remove_edge(eid) self.actor(dst_id).set_input_state(port_dst, "disconnected") - self.notify_listeners(("connection_modified", )) + self.notify_listeners(("connection_modified",)) self.graph_modified = True self.update_eval_listeners(src_id) @@ -1016,8 +1034,8 @@ def replace_node(self, vid, newnode): # newnode.internal_data.update(oldnode.internal_data) newnode.caption = caption - if(oldnode.get_nb_input() != newnode.get_nb_input() or - oldnode.get_nb_output() != newnode.get_nb_output()): + if (oldnode.get_nb_input() != newnode.get_nb_input() or + oldnode.get_nb_output() != newnode.get_nb_output()): raise IncompatibleNodeError() self.set_actor(vid, newnode) @@ -1029,11 +1047,11 @@ def set_continuous_eval(self, vid, state=True): node = self.actor(vid) - if(not node.user_application and not state): + if (not node.user_application and not state): return # Remove previous listener - if(node.user_application and hasattr(node, 'continuous_listener')): + if (node.user_application and hasattr(node, 'continuous_listener')): listener = node.continuous_listener node.continuous_listener = None if listener: @@ -1041,7 +1059,7 @@ def set_continuous_eval(self, vid, state=True): node.user_application = state - if(state): + if (state): listener = ContinuousEvalListener(self, vid) node.continuous_listener = listener @@ -1067,6 +1085,7 @@ def update_eval_listeners(self, vid): listeners = dst_node.continuous_eval.listeners src_node.continuous_eval.listeners.update(listeners) + from openalea.core.observer import AbstractListener @@ -1109,7 +1128,7 @@ def get_input(self, input_pid): def eval(self): return False - def to_script (self): + def to_script(self): return "" @@ -1138,7 +1157,7 @@ def set_output(self, output_pid, val): def eval(self): return False - def to_script (self): + def to_script(self): return "" @@ -1181,13 +1200,16 @@ def __repr__(self): name = name.replace('.', '_') result = fstr.safe_substitute(NAME=name, PNAME=self.pprint_repr(f.name), - DESCRIPTION=self.pprint_repr(f.description), + DESCRIPTION=self.pprint_repr( + f.description), CATEGORY=self.pprint_repr(f.category), DOC=self.pprint_repr(f.doc), INPUTS=self.pprint_repr(f.inputs), OUTPUTS=self.pprint_repr(f.outputs), - ELT_FACTORY=self.pprint_repr(f.elt_factory), - ELT_CONNECTIONS=self.pprint_repr(f.connections), + ELT_FACTORY=self.pprint_repr( + f.elt_factory), + ELT_CONNECTIONS=self.pprint_repr( + f.connections), ELT_DATA=self.pprint_repr(f.elt_data), ELT_VALUE=self.pprint_repr(f.elt_value), ELT_AD_HOC=self.pprint_repr(f.elt_ad_hoc), @@ -1196,14 +1218,18 @@ def __repr__(self): ) return result + import json -class JSONCNFactoryWriter(PyCNFactoryWriter): + +class JSONCNFactoryWriter(PyCNFactoryWriter): def __repr__(self): f = self.factory - minx = min(f.elt_ad_hoc.itervalues(), key=lambda x: x["position"][0])["position"][0] - miny = min(f.elt_ad_hoc.itervalues(), key=lambda x: x["position"][1])["position"][1] + minx = min(f.elt_ad_hoc.itervalues(), key=lambda x: x["position"][0])[ + "position"][0] + miny = min(f.elt_ad_hoc.itervalues(), key=lambda x: x["position"][1])[ + "position"][1] print minx, miny @@ -1216,12 +1242,12 @@ def __repr__(self): description=f.description, category=f.category, doc=f.doc, - #inputs=f.inputs, - #outputs=f.outputs, - #elt_factory=f.elt_factory, + # inputs=f.inputs, + # outputs=f.outputs, + # elt_factory=f.elt_factory, elt_connections=list(f.connections.itervalues()), - #elt_data=f.elt_data, - #elt_value=f.elt_value, + # elt_data=f.elt_data, + # elt_value=f.elt_value, elt_ad_hoc=f.elt_ad_hoc, lazy=f.lazy, eval_algo=f.eval_algo, From e076241495c862973a6757d8e6d16be76608c1f5 Mon Sep 17 00:00:00 2001 From: revesansparole Date: Wed, 27 Jul 2016 10:03:42 +0200 Subject: [PATCH 04/45] added get method in metadatadict --- src/openalea/core/metadatadict.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/openalea/core/metadatadict.py b/src/openalea/core/metadatadict.py index 41e9d5a9..6a641e0d 100644 --- a/src/openalea/core/metadatadict.py +++ b/src/openalea/core/metadatadict.py @@ -84,6 +84,18 @@ def __repr__(self): def __len__(self): return len(self._metaTypes) + def get(self, key, default=None): + """Subclass of dict.get method + + Args: + key (any): + default (any): + + Returns: + (any) + """ + return self._metaValues.get(key, default) + def add_metadata(self, key, valType, notify=True): """Creates a new entry in the meta data registry. The data to set will be of the given 'valType' type.""" From 3ebaa0cc8e70e866b3f0a3068b2d7446226f38e6 Mon Sep 17 00:00:00 2001 From: revesansparole Date: Wed, 27 Jul 2016 10:17:59 +0200 Subject: [PATCH 05/45] removed bad init of provenance --- src/openalea/core/algo/dataflow_evaluation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 630505f0..f7e42da0 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -625,7 +625,6 @@ def eval(self, vtx_id=None, context=None, is_subdataflow=False, step=False): """ t0 = clock() if self._prov is not None: - self._prov.workflow = id(self._dataflow) self._prov.init(self._dataflow) self._prov.time_init = t0 From ef89307a8b19338b1e5ef31f450f92f545ac82ca Mon Sep 17 00:00:00 2001 From: revesansparole Date: Wed, 27 Jul 2016 10:40:36 +0200 Subject: [PATCH 06/45] bug correction call to after_eval --- src/openalea/core/algo/dataflow_evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index f7e42da0..4e2170c6 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -250,7 +250,7 @@ def eval_vertex_code(self, vid): # print "prov", node.get_caption() if self._prov is not None: - self._prov.before_eval(self._dataflow, vid) + self._prov.after_eval(self._dataflow, vid) # if PROVENANCE: # self.provenance.node_exec(vid, node, t0, t1) From 2ee25604a24d9b029b9e4fbde629d0b3ceebb52b Mon Sep 17 00:00:00 2001 From: revesansparole Date: Wed, 27 Jul 2016 11:11:08 +0200 Subject: [PATCH 07/45] preserve uid over rewriting of cnf --- src/openalea/core/compositenode.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index e350b5de..5f02e40d 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -1166,7 +1166,8 @@ class PyCNFactoryWriter(object): sgfactory_template = """ -$NAME = CompositeNodeFactory(name=$PNAME, +$NAME = CompositeNodeFactory(uid=$UID, + name=$PNAME, description=$DESCRIPTION, category=$CATEGORY, doc=$DOC, @@ -1198,7 +1199,8 @@ def __repr__(self): name = f.get_python_name() name = name.replace('.', '_') - result = fstr.safe_substitute(NAME=name, + result = fstr.safe_substitute(UID=self.pprint_repr(f.uid), + NAME=name, PNAME=self.pprint_repr(f.name), DESCRIPTION=self.pprint_repr( f.description), From 513c8272b5d8424e4d77457c9bddbe2d8b6fb4b1 Mon Sep 17 00:00:00 2001 From: revesansparole Date: Wed, 27 Jul 2016 11:22:39 +0200 Subject: [PATCH 08/45] preserve uid over rewriting, bug correction when writing node factories --- src/openalea/core/node.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/openalea/core/node.py b/src/openalea/core/node.py index bdbab143..b2ec01db 100644 --- a/src/openalea/core/node.py +++ b/src/openalea/core/node.py @@ -1087,9 +1087,8 @@ def is_node(self): def get_python_name(self): """ Return a python valid name """ - module_name = self.nodemodule_name - module_name = module_name.replace('.', '_') - return "%s_%s" % (self.nodemodule_name, self.nodeclass_name) + module_name = self.nodemodule_name.replace('.', '_') + return "%s_%s" % (module_name, self.nodeclass_name) def __getstate__(self): """ Pickle function """ @@ -1373,7 +1372,8 @@ class PyNodeFactoryWriter(object): nodefactory_template = """ -$NAME = Factory(name=$PNAME, +$NAME = Factory(uid=$UID, + name=$PNAME, authors=$AUTHORS, description=$DESCRIPTION, category=$CATEGORY, @@ -1397,7 +1397,8 @@ def __repr__(self): name = f.get_python_name() name = name.replace('.', '_') - result = fstr.safe_substitute(NAME=name, + result = fstr.safe_substitute(UID=repr(f.uid), + NAME=name, AUTHORS=repr(f.get_authors()), PNAME=repr(f.name), DESCRIPTION=repr(f.description), From a4aaf02c40d17e5e96cbc0969be7c591cc6a2a30 Mon Sep 17 00:00:00 2001 From: revesansparole Date: Thu, 4 Aug 2016 11:27:49 +0200 Subject: [PATCH 09/45] added IRef interface --- src/openalea/core/interface.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/openalea/core/interface.py b/src/openalea/core/interface.py index fa085ad8..915efed3 100644 --- a/src/openalea/core/interface.py +++ b/src/openalea/core/interface.py @@ -193,6 +193,13 @@ class ICodeStr(IStr): pass +class IRef(IStr): + """Interface for uids + """ + __label__ = u'ref' + pass + + class IFloat(IInterface): """ Float interface """ From 475e8175646fd179d7e9c9c017b3c80b5c2255f2 Mon Sep 17 00:00:00 2001 From: revesansparole Date: Thu, 4 Aug 2016 14:08:42 +0200 Subject: [PATCH 10/45] added openalea.provenance as requirement --- .pkglts/pkg_cfg.json | 8 ++------ .pkglts/pkg_hash.json | 10 +++++----- .travis.yml | 1 + AUTHORS.rst | 1 - doc/conf.py | 6 +++--- requirements.txt | 2 ++ setup.py | 4 ++-- 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/.pkglts/pkg_cfg.json b/.pkglts/pkg_cfg.json index 07bc35cc..e7cbdcee 100644 --- a/.pkglts/pkg_cfg.json +++ b/.pkglts/pkg_cfg.json @@ -7,10 +7,6 @@ }, "base": { "authors": [ - [ - "openalea", - "openalea@inria.fr" - ], [ "Christophe Pradal", "christophe dot pradal at cirad dot fr" @@ -36,7 +32,7 @@ ] }, "github": { - "owner": "{{ base.authors[0][0] }}", + "owner": "openalea", "project": "{{ base.pkgname }}", "url": "https://github.com/{{ github.owner }}/{{ github.project }}" }, @@ -50,7 +46,7 @@ "intended_versions": [ "27" ], - "require": [["git", "openalea/deploy"]] + "require": [["git", "openalea/deploy"], ["git", "openalea/provenance"]] }, "readthedocs": { "project": "openalea-core" diff --git a/.pkglts/pkg_hash.json b/.pkglts/pkg_hash.json index be7a8594..2c5f3c21 100644 --- a/.pkglts/pkg_hash.json +++ b/.pkglts/pkg_hash.json @@ -9,11 +9,11 @@ "travis": "p39laCS5PHN7gbwAtgU28Z+UXHRifGIORlali3ZkK7OQKjjRhn1Dbwscp76SZeocjKUEBtl3fI1857Ri37wPWQ==", "travis.addons": "z4PhNX7vuL3xVChQ1m2AB9Yg5AULVxXcg/SpIdNs6c5H0NE8XYXysP+DGNKHfuwvY7kxvUdBeoGlODJ6+SfaPg==", "travis.after": "z4PhNX7vuL3xVChQ1m2AB9Yg5AULVxXcg/SpIdNs6c5H0NE8XYXysP+DGNKHfuwvY7kxvUdBeoGlODJ6+SfaPg==", - "travis.install": "CwigKKORJdg9WEl3DxAU095HFtme5Hwv7S2mH5Lgo0sUuwWuoSyhs+pBYWCzDXGyxZIcTPSIEY5/0PuaYqGR6Q==", + "travis.install": "Ay/BrD6JoeZi+JszHb0sHpFfzVEp9qaeFsWBBBJG0t2xwgF3tinQxifkNFwuJdlyCjy6U9k901dZnqt6yrzzxQ==", "travis.script": "STjpu3u3YoOeTz4mBIflZ58OodPdFRPFtLt6c8CUciNwe/fYhCTxlKTMB/S7VL1ippUsQVIfrFGl+OQ/ALWmPQ==" }, "AUTHORS.rst": { - "doc": "mr4dDvb808KCdcLSRWe9brxcIBamJp3DMpFzoEhHCeSLmr7Q0QfrNRZ79dnneEyWHgcs2srtibLIZlGOq1S0rQ==" + "doc": "HvXVe1vS1t/xzXuerP756vvyf3I4V8xEOqvNIEGejKpuzVRZr6wR4C24rm8O9OnBsYedYHvDxQRF25urtQCDMQ==" }, "CONTRIBUTING.rst": {}, "HISTORY.rst": {}, @@ -32,7 +32,7 @@ "doc/_static/nonempty.txt": {}, "doc/authors.rst": {}, "doc/conf.py": { - "sphinx": "tTFNmgl+NIWu4UMwBw2aJyhQQDpiLrp/aYkMscpqFQWjeHO7YBU9ErwQX5XnRLMGZHCtBsweFcnlbzN2b0NsBg==" + "sphinx": "vfmn2Bw5s7XSpWirr5eXEjxifmpqNg4YCiNCojKEmRaCHDw3F/yzd6ghqanWEdjEkbs7Dn9Z9U3jdHBM6/j3oA==" }, "doc/contributing.rst": {}, "doc/history.rst": {}, @@ -47,14 +47,14 @@ "pysetup": "XWPKeIfT2RoNPeDhPDgWWOssKAjkq/OXoEfIpSs8vJrYkPahRrEs1qXFvqxzqoV09MjmnVg0zrG9m/wRXfrRxw==" }, "requirements.txt": { - "pysetup": "/5djJXLbrvuFk7LIgtfgYR8RnQrY5BEmOB4zEd0PrKeswR/8sQkzj4yKy7Zl3LAdRzYjPDhWiRkS1oMCG4FX7Q==" + "pysetup": "ROpjsUQMjMbn87UPzWB8XD7JnQAUNptjTBj99UwCmDUKNGQNqmlZD0gN+ukVdk4E87brwVEgqOZ47nid1EGpww==" }, "setup.cfg": { "pysetup": "QBedkKDNLW8WmznOFdyIyCL2sTDHYcmudSzjx+7QUcRI1vVxIHnNm82aiMvaMeEAYpOCr+8+Nj4ZTr9P0dSxSg==" }, "setup.py": { "pysetup.call": "2wHuP94SpWKZpqOzExkQGTxnhGPgBHK3qOdPn0Ncstj5FkZ7FBGGM/inqAXaI4QLKQtTNlOUYRGSkYUWAUcf1w==", - "pysetup.kwds": "2Wo8eNut4J6R97mY1RTBwstqt7yVHd/9KVgNH17IV7oOc5Wb1g8Vcpgm7H9sabVWjkN9V7QQYNNZEneBkTNddw==" + "pysetup.kwds": "dlgrvck1HbacSgA60B/51tvoA65q1tPJ/9ZWW0mrNh0uCq4ceY5GW2McnmDVal/PjP19xCfhoyUBuElz9HvuSA==" }, "src/openalea/core/__init__.py": { "base": "gR33dW0qqYmsV9NSNB+DD8XmuxnC2t0mKjnMoU5728qh97fSER6MbX+3QKxpZDLByZToaAay4xhx8acxketJmA==" diff --git a/.travis.yml b/.travis.yml index e5ce3cc2..ef39fc18 100644 --- a/.travis.yml +++ b/.travis.yml @@ -37,6 +37,7 @@ install: - pip install git+https://github.com/openalea/deploy + - pip install git+https://github.com/openalea/provenance - conda install coverage - conda install mock - conda install nose diff --git a/AUTHORS.rst b/AUTHORS.rst index 598449c8..e8409b7c 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -8,7 +8,6 @@ Development Lead .. {# pkglts, doc -* openalea, * Christophe Pradal, * Samuel Dufour-Kowalski, * revesansparole, diff --git a/doc/conf.py b/doc/conf.py index 2f4b2211..13cac438 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -247,7 +247,7 @@ latex_documents = [ ('index', 'core.tex', u'core Documentation', - u'openalea', 'manual'), + u'Christophe Pradal', 'manual'), ] # The name of an image file (relative to this directory) to place at @@ -278,7 +278,7 @@ man_pages = [ ('index', 'core', u'core Documentation', - [u'openalea'], 1) + [u'Christophe Pradal'], 1) ] # If true, show URL addresses after external links. @@ -293,7 +293,7 @@ texinfo_documents = [ ('index', 'core', u'core Documentation', - u'openalea', + u'Christophe Pradal', 'core', 'OpenAlea.Core is able to discover and manage packages and logical components, build and evaluate dataflows and Generate final applications', 'Miscellaneous'), diff --git a/requirements.txt b/requirements.txt index 6c4c1dd4..7657a191 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,8 @@ # edit .pkglts/pkg_cfg instead # section pysetup +# openalea/deploy +# openalea/provenance # #} diff --git a/setup.py b/setup.py index 5261e790..2197bafc 100644 --- a/setup.py +++ b/setup.py @@ -23,8 +23,8 @@ version=version["__version__"], description=short_descr, long_description=readme + '\n\n' + history, - author="openalea, Christophe Pradal, Samuel Dufour-Kowalski, revesansparole, ", - author_email="openalea@inria.fr, christophe dot pradal at cirad dot fr, dufourko at cirad dot fr, revesansparole@gmail.com, ", + author="Christophe Pradal, Samuel Dufour-Kowalski, revesansparole, ", + author_email="christophe dot pradal at cirad dot fr, dufourko at cirad dot fr, revesansparole@gmail.com, ", url='https://github.com/openalea/core', license='cecill-c', zip_safe=False, From 0b885b5cc1e0051f1797831884a724eb2fd40e5c Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Fri, 4 Oct 2019 16:21:23 +0200 Subject: [PATCH 11/45] add *args to BrutEval so it doesnt fail with unexpected None arguments --- src/openalea/core/algo/dataflow_evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 4e2170c6..89fcd99f 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -305,7 +305,7 @@ class BrutEvaluation(AbstractEvaluation): def __init__(self, dataflow, record_provenance=False): - AbstractEvaluation.__init__(self, dataflow, record_provenance) + AbstractEvaluation.__init__(self, dataflow, record_provenance, *args) # a property to specify if the node has already been evaluated self._evaluated = set() From 810e51604e7ec220d91dd38309887eea5e3c406a Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Fri, 4 Oct 2019 16:22:48 +0200 Subject: [PATCH 12/45] add *args to BrutEval so it doesnt fail with unexpected None arguments --- src/openalea/core/algo/dataflow_evaluation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 89fcd99f..400ca632 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -303,9 +303,9 @@ class BrutEvaluation(AbstractEvaluation): """ Basic evaluation algorithm """ __evaluators__.append("BrutEvaluation") - def __init__(self, dataflow, record_provenance=False): + def __init__(self, dataflow, record_provenance=False, *args): - AbstractEvaluation.__init__(self, dataflow, record_provenance, *args) + AbstractEvaluation.__init__(self, dataflow, record_provenance) # a property to specify if the node has already been evaluated self._evaluated = set() From c76e34e86fe0d9b1f167f2ec0396da98d463145c Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Fri, 4 Oct 2019 16:28:32 +0200 Subject: [PATCH 13/45] add *args to BrutEval so it doesnt fail with unexpected None arguments --- src/openalea/core/algo/dataflow_evaluation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 400ca632..d0a81701 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -303,7 +303,7 @@ class BrutEvaluation(AbstractEvaluation): """ Basic evaluation algorithm """ __evaluators__.append("BrutEvaluation") - def __init__(self, dataflow, record_provenance=False, *args): + def __init__(self, dataflow, record_provenance=False, *args, **kwargs): AbstractEvaluation.__init__(self, dataflow, record_provenance) # a property to specify if the node has already been evaluated @@ -328,7 +328,7 @@ def is_stopped(self, vid, actor): pass return False - def eval_vertex(self, vid, *args): + def eval_vertex(self, vid, *args, **kwargs): """ Evaluate the vertex vid """ df = self._dataflow @@ -358,7 +358,7 @@ def eval_vertex(self, vid, *args): # Eval the node self.eval_vertex_code(vid) - def eval(self, *args): + def eval(self, *args, **kwargs): """ Evaluate the whole dataflow starting from leaves""" t0 = clock() df = self._dataflow From d3d5c80ff2cbec85ebcc1ece10a7744982a39c17 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Fri, 4 Oct 2019 17:26:45 +0200 Subject: [PATCH 14/45] add record_provenance as parameter for cn.eval so it can get the prov while evaluating --- src/openalea/core/algo/dataflow_evaluation.py | 17 +++++++++++++++++ src/openalea/core/compositenode.py | 8 ++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index d0a81701..0c21400e 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -252,6 +252,13 @@ def eval_vertex_code(self, vid): if self._prov is not None: self._prov.after_eval(self._dataflow, vid) + if self._prov is not None: + # print self._prov.time_init + # print self._prov.time_end + + print self._prov.as_wlformat() + # provenance(vid, node, t0,t1) + # if PROVENANCE: # self.provenance.node_exec(vid, node, t0, t1) # # provenance(vid, node, t0,t1) @@ -360,9 +367,15 @@ def eval_vertex(self, vid, *args, **kwargs): def eval(self, *args, **kwargs): """ Evaluate the whole dataflow starting from leaves""" + t0 = clock() df = self._dataflow + if self._prov is not None: + self._prov.init(df) + self._prov.time_init = t0 + + # Unvalidate all the nodes self._evaluated.clear() @@ -371,6 +384,10 @@ def eval(self, *args, **kwargs): self.eval_vertex(vid) t1 = clock() + + if self._prov is not None: + self._prov.time_end = t1 + if quantify: print "Evaluation time: %s" % (t1 - t0) diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index 5f02e40d..e63af2cb 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -552,22 +552,22 @@ def eval(self, *args, **kwds): Return True if the node need a reevaluation (like generator) """ - self.__call__() + self.__call__(*args, **kwds) self.modified = False self.notify_listeners(("status_modified", self.modified)) return False - def __call__(self, inputs=()): + def __call__(self, inputs=(), *args, **kwds): """ Evaluate the graph """ if (self.id_out and self.get_nb_output() > 0): - self.eval_as_expression(self.id_out) + self.eval_as_expression(self.id_out, *args, **kwds) else: - self.eval_as_expression(None) + self.eval_as_expression(None, *args, **kwds) return () From b5530d896fb437614c6a64563aff7266be34775d Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Mon, 7 Oct 2019 12:00:27 +0200 Subject: [PATCH 15/45] change uid def to be the name - same at each construction --- src/openalea/core/compositenode.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index e63af2cb..b0848b2d 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -88,6 +88,10 @@ def __init__(self, *args, **kargs): self.doc = kargs.get('doc', "") self.__doc__ = self.doc + # Unique ID for the factory - TODO: for now, only built from name + self.uid = str(self.name) + + def is_composite_node(self): return True From 5aea54ac892d976a4760b39a53a4fdb2c36c40f9 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 5 Dec 2019 16:03:40 +0100 Subject: [PATCH 16/45] enable linting banit --- .vscode/settings.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..88d21055 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "python.pythonPath": "/home/gaetan/miniconda2/envs/visualea/bin/python", + "python.linting.pylintEnabled": false, + "python.linting.banditEnabled": true, + "python.linting.enabled": true +} \ No newline at end of file From 52b3e204d6ae202010191a055b21b38ea6db5189 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 5 Dec 2019 16:31:10 +0100 Subject: [PATCH 17/45] add misc functions use to capture provenance --- src/openalea/core/metadata/__init__.py | 0 src/openalea/core/metadata/costs.py | 45 +++++++++++++++++++++++ src/openalea/core/metadata/data_size.py | 48 +++++++++++++++++++++++++ 3 files changed, 93 insertions(+) create mode 100644 src/openalea/core/metadata/__init__.py create mode 100644 src/openalea/core/metadata/costs.py create mode 100644 src/openalea/core/metadata/data_size.py diff --git a/src/openalea/core/metadata/__init__.py b/src/openalea/core/metadata/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/openalea/core/metadata/costs.py b/src/openalea/core/metadata/costs.py new file mode 100644 index 00000000..ffe691c2 --- /dev/null +++ b/src/openalea/core/metadata/costs.py @@ -0,0 +1,45 @@ +# function to get the site and minimal cost for a task, with prov + sites +def minimum_cost_site(vid, provenance, multisites): + vid = str(vid) + # if no provenance data for task vid -> NOT TAKEN INTO ACCOUNT + # TODO: manage if no prov data -> average? best site? random? .... + if vid not in provenance.prov: + print("Not enough provenance information on task: " + str(vid)) + return None, None + + # find the sites where the input data exist: + input_site = [] + for s in multisites.list_sites: + if multisites.list_sites[s].check_input(vid): + input_site.append(s) + # IF No site has the input data -> cannot be computed + if not input_site: + print('Input data not found') + return None, None + + possible_cost = dict() + # for each site get the cost + for s1 in multisites.list_sites: + # if the Input data is not on s1 -> get the minimal cost to transfer the intput data to the site: + min_transfer_cost = [] + for s2 in input_site: + transfer_cost = multisites.list_sites[s1].transfer_cost[s2] + # get the input data size | here from provenance, in real cases the scheduler knows it + input_data_size = provenance.prov[vid].input_size + + get_input_cost_tmp = transfer_cost * input_data_size + min_transfer_cost.append(get_input_cost_tmp) + get_input_cost = min(min_transfer_cost) + + # the cost to compute the task on this site: + compute_cost = multisites.list_sites[s1].compute_cost * provenance.prov[vid].exec_time + + total_cost = get_input_cost + compute_cost + # print s1, "total_cost : " ,total_cost, 'get_input_cost : ', get_input_cost, 'compute_cost', compute_cost + + possible_cost[s1] = total_cost + + best_site = min(possible_cost) + best_cost = possible_cost[best_site] + + return best_site, best_cost \ No newline at end of file diff --git a/src/openalea/core/metadata/data_size.py b/src/openalea/core/metadata/data_size.py new file mode 100644 index 00000000..441df453 --- /dev/null +++ b/src/openalea/core/metadata/data_size.py @@ -0,0 +1,48 @@ +from __future__ import print_function +from sys import getsizeof, stderr +from itertools import chain +from collections import deque +try: + from reprlib import repr +except ImportError: + pass + +def total_size(o, handlers={}, verbose=False): + """ Returns the approximate memory footprint an object and all of its contents. + + Automatically finds the contents of the following builtin containers and + their subclasses: tuple, list, deque, dict, set and frozenset. + To search other containers, add handlers to iterate over their contents: + + handlers = {SomeContainerClass: iter, + OtherContainerClass: OtherContainerClass.get_elements} + + """ + dict_handler = lambda d: chain.from_iterable(d.items()) + all_handlers = {tuple: iter, + list: iter, + deque: iter, + dict: dict_handler, + set: iter, + frozenset: iter, + } + all_handlers.update(handlers) # user handlers take precedence + seen = set() # track which object id's have already been seen + default_size = getsizeof(0) # estimate sizeof object without __sizeof__ + + def sizeof(o): + if id(o) in seen: # do not double count the same object + return 0 + seen.add(id(o)) + s = getsizeof(o, default_size) + + if verbose: + print(s, type(o), repr(o), file=stderr) + + for typ, handler in all_handlers.items(): + if isinstance(o, typ): + s += sum(map(sizeof, handler(o))) + break + return s + + return sizeof(o) \ No newline at end of file From 1994b71b3e8c6eeedbc93f1470c239ca967439a2 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 5 Dec 2019 16:32:06 +0100 Subject: [PATCH 18/45] Add CacheIndex & Prov class --- src/openalea/core/metadata/cache_index.py | 38 ++++++++++++++ src/openalea/core/metadata/provenance_data.py | 49 +++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 src/openalea/core/metadata/cache_index.py create mode 100644 src/openalea/core/metadata/provenance_data.py diff --git a/src/openalea/core/metadata/cache_index.py b/src/openalea/core/metadata/cache_index.py new file mode 100644 index 00000000..a508efc6 --- /dev/null +++ b/src/openalea/core/metadata/cache_index.py @@ -0,0 +1,38 @@ +# Cache index : { vid: time/ cost to get} - if vid in index -> cache exist + +class Cache_item(): + def __init__(self, vid=None, size=0, site=None): + self.vid = vid + self.size = size + self.site = site + + def __repr__(self): + return "Cache item - vid : " + str(self.vid) + " | size : " + str(self.size) + " | site : " + str(self.site) + + +class Cache_index(): + def __init__(self): + self.cache_index = dict() + + def add_cache_index_item(self, item): + self.cache_index[item.vid] = item + + def add_cache_index_itemlist(self, items): + for item in items: + self.cache_index[item.vid] = item + + def generate_fake(self): + list_items = [] + item1 = Cache_item(vid="2", size=100, site="s2") + item2 = Cache_item(vid="5", size=50000, site="s1") + + list_items.append(item1) + list_items.append(item2) + + self.add_cache_index_itemlist(list_items) + + def check_index(self, vid): + if vid in self.cache_index: + return self.cache_index[vid] + else: + return None \ No newline at end of file diff --git a/src/openalea/core/metadata/provenance_data.py b/src/openalea/core/metadata/provenance_data.py new file mode 100644 index 00000000..0b73f444 --- /dev/null +++ b/src/openalea/core/metadata/provenance_data.py @@ -0,0 +1,49 @@ +# fake provenance for WF test "add_test" +# 6 vertices +# 3 edges +# input vid = 2 et 4 +# output vid = 5 +# 0 et 1 = auto input et output "node" - not used +# 4 = add node + + +class Prov_item(): + def __init__(self, vid=None, exec_time=0, input_size=0, output_size=0): + self.vid = vid + self.exec_time = exec_time + self.input_size = input_size + self.output_size = output_size + + def __repr__(self): + return "Provenance item - vid : " + str(self.vid) + " | exec_time : " + str(self.exec_time) + \ + " | input_size : " + str(self.input_size) + " | output_size : " + str(self.output_size) + + +class Prov(): + def __init__(self): + self.prov = dict() + + def add_prov_item(self, item): + self.prov[item.vid] = item + + def add_prov_itemlist(self, items): + for item in items: + self.prov[item.vid] = item + + def check_prov(self, vid): + if vid in self.prov: + return self.prov[vid] + else: + return None + + def generate_fake(self): + list_items = [] + item1 = Prov_item("2", 10, 5000, 300) + item2 = Prov_item("4", 3, 200, 1000) + item3 = Prov_item("5", 5, 1300, 100) + + list_items.append(item1) + list_items.append(item2) + list_items.append(item3) + + self.add_prov_itemlist(list_items) \ No newline at end of file From dd53673fc40484529487408fd4153b62c4379edc Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 5 Dec 2019 16:32:31 +0100 Subject: [PATCH 19/45] some function in tests --- src/openalea/core/metadata/cloud_sites.py | 76 +++++++++++++++++++ src/openalea/core/metadata/scheduling_plan.py | 13 ++++ 2 files changed, 89 insertions(+) create mode 100644 src/openalea/core/metadata/cloud_sites.py create mode 100644 src/openalea/core/metadata/scheduling_plan.py diff --git a/src/openalea/core/metadata/cloud_sites.py b/src/openalea/core/metadata/cloud_sites.py new file mode 100644 index 00000000..847101d4 --- /dev/null +++ b/src/openalea/core/metadata/cloud_sites.py @@ -0,0 +1,76 @@ +from random import choice +from string import ascii_letters + + +# sites: +class Site(): + def __init__(self, sid="", compute_cost=0., storage_cost=0., compute_power=0., buzyness=0.): + if not sid: + sid = ''.join(choice(ascii_letters) for i in range(12)) + self.sid = sid + self.compute_cost = compute_cost + self.compute_power = compute_power + self.storage_cost = storage_cost + self.buzyness = buzyness + + self.transfer_cost = dict() + self.transfer_cost[self.sid] = 0. + # list task id, whose input data is on the site + self.input_storage = set() + + def __repr__(self): + return "Site - sid : " + str(self.sid) + " | compute_cost : " + str(self.compute_cost) \ + + " | storage_cost : " + str(self.storage_cost) + " | buzyness : " + str(self.buzyness) \ + + " | input data : " + str(self.input_storage) + " | transfer : " + str(self.transfer_cost) + + def free(self): + self.buzyness = 0 + + def increase_workload(self, inc=0): + self.buzyness += inc + + def add_transfer_site(self, nsite_sid, t_cost): + self.transfer_cost[nsite_sid] = t_cost + + def add_input_data(self, vid): + self.input_storage.add(vid) + + def check_input(self, vid): + return vid in self.input_storage + + +def link_two_sites(site1, site2, transfer_cost): + site1.add_transfer_site(site2.sid, transfer_cost) + site2.add_transfer_site(site1.sid, transfer_cost) + + +class MultiSiteCloud(): + def __init__(self, list_sites=dict()): + self.list_sites = list_sites + + def add_site(self, site): + self.list_sites[site.sid] = site + + def add_sitelist(self, sites): + for site in sites: + self.list_sites[site.sid] = site + + def generate_fake(self): + # start 3 sites + list_items = [] + site1 = Site(sid='s0', compute_cost=10., storage_cost=10., compute_power=10.) + site2 = Site(sid='s1', compute_cost=100., storage_cost=1., compute_power=10.) + site3 = Site(sid='s2', compute_cost=1., storage_cost=100., compute_power=10.) + + list_items.append(site1) + list_items.append(site2) + list_items.append(site3) + + self.add_sitelist(list_items) + + # generate transfer cost + link_two_sites(site1, site2, 2) + link_two_sites(site1, site3, 4) + link_two_sites(site3, site2, 6) + + diff --git a/src/openalea/core/metadata/scheduling_plan.py b/src/openalea/core/metadata/scheduling_plan.py new file mode 100644 index 00000000..ef59d19a --- /dev/null +++ b/src/openalea/core/metadata/scheduling_plan.py @@ -0,0 +1,13 @@ +class SchedulingPlan(): + def __init__(self): + self.plan = [] + self.cost = 0. + + def add_to_plan(self, task, site): + self.plan.append((task, site)) + + def add_to_cost(self, new_cost): + if isinstance(new_cost, float): + self.cost+=new_cost + else: + pass \ No newline at end of file From 75953a574373a17d90f2553843830bb7420269c8 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 5 Dec 2019 16:40:15 +0100 Subject: [PATCH 20/45] add an eval that use costs --- src/openalea/core/algo/dataflow_evaluation.py | 255 +++++++++--------- 1 file changed, 135 insertions(+), 120 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 0c21400e..3f61cb84 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -28,131 +28,20 @@ from openalea.core.dataflow import SubDataflow from openalea.core.interface import IFunction -# PROVENANCE = False +# test for distributed executions +from openalea.core.metadata.provenance_data import Prov +from openalea.core.metadata.cache_index import Cache_index +from openalea.core.metadata.cloud_sites import Site, MultiSiteCloud, link_two_sites - -# # Implement provenance in OpenAlea -# db_conn = None -# -# import sqlite3 -# from openalea.core.path import path -# from openalea.core import settings -# -# def db_create(cursor): -# cur = cursor -# #-prospective provenance-# -# #User table creation -# cur.execute("CREATE TABLE IF NOT EXISTS User (userid INTEGER,createtime DATETIME,name varchar (25), firstname varchar (25), email varchar (25), password varchar (25),PRIMARY KEY(userid))") -# -# # CompositeNode table creation -# cur.execute("CREATE TABLE IF NOT EXISTS CompositeNode (CompositeNodeid INTEGER, creatime DATETIME, name varchar (25), description varchar (25),userid INTEGER,PRIMARY KEY(CompositeNodeid),FOREIGN KEY(userid) references User)") -# #Cr?ation de la table Node -# cur.execute("CREATE TABLE IF NOT EXISTS Node (Nodeid INTEGER, createtime DATETIME, name varchar (25), NodeFactory varchar (25),CompositeNodeid INTEGER,PRIMARY KEY(Nodeid),FOREIGN KEY(CompositeNodeid) references CompsiteNode)") -# #Cr?ation de la table Input -# cur.execute("CREATE TABLE IF NOT EXISTS Input (Inputid INTEGER, createtime DATETIME, name varchar (25), typedata varchar (25), InputPort INTEGER,PRIMARY KEY (Inputid))") -# #Cr?ation de la table Output -# cur.execute("CREATE TABLE IF NOT EXISTS Output (Outputid INTEGER, createtime DATETIME, name varchar (25), typedata varchar (25), OutputPort INTEGER,PRIMARY KEY (Outputid))") -# #Cr?ation de la table elt_connection -# cur.execute("CREATE TABLE IF NOT EXISTS elt_connection (elt_connectionid INTEGER, createtime DATETIME,srcNodeid INTEGER, srcNodeOutputPortid INTEGER, targetNodeid INTEGER, targetNodeInputPortid INTEGER ,PRIMARY KEY (elt_connectionid))") -# -# #- retrospective provenance -# -# #- CompositeNodeExec table creation -# cur.execute("CREATE TABLE IF NOT EXISTS CompositeNodeExec (CompositeNodeExecid INTEGER, createtime DATETIME, endtime DATETIME,userid INTEGER,CompositeNodeid INTEGER,PRIMARY KEY(CompositeNodeExecid),FOREIGN KEY(CompositeNodeid) references CompositeNode,FOREIGN KEY(userid) references User)") -# #- NodeExec -# cur.execute("CREATE TABLE IF NOT EXISTS NodeExec (NodeExecid INTEGER, createtime DATETIME, endtime DATETIME,Nodeid INTEGER,CompositeNodeExecid INTEGER,dataid INTEGER,PRIMARY KEY(NodeExecid),FOREIGN KEY(Nodeid) references Node, FOREIGN KEY (CompositeNodeExecid) references CompositeNodeExec, FOREIGN KEY (dataid) references Data)") -# #- History -# cur.execute("CREATE TABLE IF NOT EXISTS Histoire (Histoireid INTEGER, createtime DATETIME, name varchar (25), description varchar (25),userid INTEGER,CompositeNodeExecid INTEGER,PRIMARY KEY (Histoireid), FOREIGN KEY(Userid) references User, FOREIGN KEY(CompositeNodeExecid) references CompositeNodeExec)") -# #- Data -# cur.execute("CREATE TABLE IF NOT EXISTS Data (dataid INTEGER, createtime DATETIME,NodeExecid INTEGER, PRIMARY KEY(dataid),FOREIGN KEY(NodeExecid) references NodeExec)") -# #- Tag -# cur.execute("CREATE TABLE IF NOT EXISTS Tag (CompositeNodeExecid INTEGER, createtime DATETIME, name varchar(25),userid INTEGER,PRIMARY KEY(CompositeNodeExecid),FOREIGN KEY(userid) references User)") -# return cur -# -# def get_database_name(): -# db_fn = path(settings.get_openalea_home_dir())/'provenance.sq3' -# return db_fn -# -# def db_connexion(): -# """ Return a curso on the database. -# -# If the database does not exists, create it. -# """ -# global db_conn -# if db_conn is None: -# db_fn = get_database_name() -# if not db_fn.exists(): -# db_conn=sqlite3.connect(db_fn) -# cur = db_conn.cursor() -# cur = db_create(cur) -# return cur -# else: -# cur = db_conn.cursor() -# return cur - -# class Provenance(object): -# def __init__(self, workflow): -# self.clear() -# self.workflow = workflow -# -# def edges(self): -# cn = self.workflow -# edges = list(cn.edges()) -# sources = map(cn.source, edges) -# targets = map(cn.target, edges) -# source_ports = [cn.local_id(cn.source_port(eid)) for eid in edges] -# target_ports = [cn.local_id(cn.target_port(eid)) for eid in edges] -# _edges = dict( -# zip(edges, zip(sources, source_ports, targets, target_ports))) -# return _edges -# -# def clear(self): -# self.nodes = [] -# -# def start_time(self): -# pass -# -# def end_time(self): -# pass -# -# def workflow_exec(self, *args): -# pass -# -# def node_exec(self, vid, node, start_time, end_time, *args): -# pass -# -# def write(self): -# """ Write the provenance in db """ +from openalea.core.metadata.costs import minimum_cost_site +from openalea.core.metadata.scheduling_plan import SchedulingPlan +from openalea.core.metadata.data_size import total_size -# class PrintProvenance(Provenance): -# def workflow_exec(self, *args): -# print 'Workflow execution ', self.workflow.factory.name -# -# def node_exec(self, vid, node, start_time, end_time, *args): -# provenance(vid, node, start_time, end_time) -# -# -# def provenance(vid, node, start_time, end_time): -# # from service import db -# # conn = db.connect() -# -# -# if PROVENANCE: -# cur = db_connexion() -# -# pname = node.factory.package.name -# name = node.factory.name -# -# print "Provenance Process:" -# print "instance ID ", vid, "Package Name: ", pname, "Name: ", name -# print "start time :", start_time, "end_time: ", end_time, "duration : ", end_time - start_time -# print 'Inputs : ', node.inputs -# print 'outputs : ', node.outputs - - -# print the evaluation time # This variable has to be retrieve by the settings quantify = False +# get the prov when evaluating +provenance = False __evaluators__ = [] @@ -1245,3 +1134,129 @@ def eval(self, vtx_id=None, **kwds): print "Evaluation time: %s" % (t1 - t0) return False + +############################################################ +class TestEval(AbstractEvaluation): + """ Basic evaluation algorithm """ + __evaluators__.append("TestEval") + + def __init__(self, dataflow): + + AbstractEvaluation.__init__(self, dataflow) + # a property to specify if the node has already been evaluated + self._evaluated = set() + + # GENERATE FAKE INFO + # provenance + p = Prov() + p.generate_fake() + self.p = p + # cache + c = Cache_index() + c.generate_fake() + self.c = c + # site cloud + m = MultiSiteCloud() + m.generate_fake() + self.m = m + # set input data on site s1: + m.list_sites["s1"].add_input_data("2") + # scheduling plan + self.SP = SchedulingPlan() + + + def is_stopped(self, vid, actor): + """ Return True if evaluation must be stop at this vertex """ + + if vid in self._evaluated: + return True + + try: + if actor.block: + status = True + n = actor.get_nb_output() + outputs = [i for i in range(n) if actor.get_output(i) is not None ] + if not outputs: + status = False + return status + except: + pass + return False + + def eval_vertex(self, vid, *args): + """ Evaluate the vertex vid """ + print "start the evaluation of node : " + str(vid) + df = self._dataflow + actor = df.actor(vid) + + self._evaluated.add(vid) + + # For each inputs + for pid in df.in_ports(vid): + inputs = [] + + cpt = 0 + # For each connected node + for npid, nvid, nactor in self.get_parent_nodes(pid): + if not self.is_stopped(nvid, nactor): + self.eval_vertex(nvid) + + inputs.append(nactor.get_output(df.local_id(npid))) + cpt += 1 + + # set input as a list or a simple value + if (cpt == 1): + inputs = inputs[0] + if (cpt > 0): + actor.set_input(df.local_id(pid), inputs) + + # Eval the node + print "start the execution of node : " + str(vid) + t0 = clock() + + best_site, cost = minimum_cost_site(vid=vid, provenance=self.p, multisites=self.m) + self.SP.add_to_plan(vid, best_site) + self.SP.add_to_cost(cost) + + + self.eval_vertex_code(vid) + + t1 = clock() + if provenance: + # print id task + print vid + # print actor.inputs + + # data info + # if inputs: + # for i in inputs: + print "size", total_size(actor.inputs) + # for o in range(actor.get_nb_output()): + # print "out", actor.get_output(o) + print "size out", total_size(actor.outputs) + + # execution info + print "Execution time: %s" % (t1 - t0) + + # vm info + + + def eval(self, *args, **kwgs): + """ Evaluate the whole dataflow starting from leaves""" + t0 = clock() + df = self._dataflow + + # Unvalidate all the nodes + self._evaluated.clear() + + # Eval from the leaf + for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid)==0): + + self.eval_vertex(vid) + + print "end evaluation - Scheduling plan : ", str(self.SP.plan) + print "total cost : ", str(self.SP.cost) + t1 = clock() + if quantify: + print "Evaluation time: %s"%(t1-t0) + \ No newline at end of file From 78010a63b3ed92070b8f6d3816f720ab93791fd5 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 5 Dec 2019 18:22:29 +0100 Subject: [PATCH 21/45] BrutEval can save provenance into file --- src/openalea/core/algo/dataflow_evaluation.py | 127 ++++++------------ 1 file changed, 43 insertions(+), 84 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 3f61cb84..5fb61c82 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -28,14 +28,18 @@ from openalea.core.dataflow import SubDataflow from openalea.core.interface import IFunction +import os +import time +import json + # test for distributed executions -from openalea.core.metadata.provenance_data import Prov -from openalea.core.metadata.cache_index import Cache_index -from openalea.core.metadata.cloud_sites import Site, MultiSiteCloud, link_two_sites +# from openalea.core.metadata.provenance_data import Prov +# from openalea.core.metadata.cache_index import Cache_index +# from openalea.core.metadata.cloud_sites import Site, MultiSiteCloud, link_two_sites -from openalea.core.metadata.costs import minimum_cost_site -from openalea.core.metadata.scheduling_plan import SchedulingPlan -from openalea.core.metadata.data_size import total_size +# from openalea.core.metadata.costs import minimum_cost_site +# from openalea.core.metadata.scheduling_plan import SchedulingPlan +# from openalea.core.metadata.data_size import total_size # This variable has to be retrieve by the settings @@ -101,8 +105,6 @@ def __init__(self, dataflow, record_provenance=False): :param dataflow: to be done """ self._dataflow = dataflow - # if PROVENANCE: - # self.provenance = PrintProvenance(dataflow) if record_provenance: self._prov = RVProvenance() @@ -126,8 +128,6 @@ def eval_vertex_code(self, vid): node = self._dataflow.actor(vid) try: - # prov before - # print "prov", node.get_caption() if self._prov is not None: self._prov.before_eval(self._dataflow, vid) @@ -135,22 +135,11 @@ def eval_vertex_code(self, vid): t0 = clock() ret = node.eval() t1 = clock() - # prov before - # print "prov", node.get_caption() if self._prov is not None: self._prov.after_eval(self._dataflow, vid) + # print self._prov.as_wlformat() - if self._prov is not None: - # print self._prov.time_init - # print self._prov.time_end - - print self._prov.as_wlformat() - # provenance(vid, node, t0,t1) - - # if PROVENANCE: - # self.provenance.node_exec(vid, node, t0, t1) - # # provenance(vid, node, t0,t1) # When an exception is raised, a flag is set. # So we remove it when evaluation is ok. @@ -257,7 +246,7 @@ def eval_vertex(self, vid, *args, **kwargs): def eval(self, *args, **kwargs): """ Evaluate the whole dataflow starting from leaves""" - t0 = clock() + t0 = time.time() df = self._dataflow if self._prov is not None: @@ -272,10 +261,19 @@ def eval(self, *args, **kwargs): for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): self.eval_vertex(vid) - t1 = clock() + t1 = time.time() if self._prov is not None: self._prov.time_end = t1 + # Save the provenance in a file + wf_id = str(df.factory.uid) + ".json" + home = os.path.expanduser("~") + provenance_path = os.path.join(home, ".openalea/provenance", wf_id) + if not os.path.exists(os.path.dirname(provenance_path)): + os.makedirs(provenance_path) + provenance = self._prov.as_wlformat() + with open(provenance_path, "a+") as f: + json.dump(provenance, f, indent=4) if quantify: print "Evaluation time: %s" % (t1 - t0) @@ -1135,36 +1133,18 @@ def eval(self, vtx_id=None, **kwds): return False + ############################################################ class TestEval(AbstractEvaluation): - """ Basic evaluation algorithm """ + """ Basic evaluation algorithm + provenance """ __evaluators__.append("TestEval") - def __init__(self, dataflow): + def __init__(self, dataflow, record_provenance=False, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow) + AbstractEvaluation.__init__(self, dataflow, record_provenance) # a property to specify if the node has already been evaluated self._evaluated = set() - # GENERATE FAKE INFO - # provenance - p = Prov() - p.generate_fake() - self.p = p - # cache - c = Cache_index() - c.generate_fake() - self.c = c - # site cloud - m = MultiSiteCloud() - m.generate_fake() - self.m = m - # set input data on site s1: - m.list_sites["s1"].add_input_data("2") - # scheduling plan - self.SP = SchedulingPlan() - - def is_stopped(self, vid, actor): """ Return True if evaluation must be stop at this vertex """ @@ -1175,7 +1155,8 @@ def is_stopped(self, vid, actor): if actor.block: status = True n = actor.get_nb_output() - outputs = [i for i in range(n) if actor.get_output(i) is not None ] + outputs = [i for i in range(n) if + actor.get_output(i) is not None] if not outputs: status = False return status @@ -1183,9 +1164,9 @@ def is_stopped(self, vid, actor): pass return False - def eval_vertex(self, vid, *args): + def eval_vertex(self, vid, *args, **kwargs): """ Evaluate the vertex vid """ - print "start the evaluation of node : " + str(vid) + df = self._dataflow actor = df.actor(vid) @@ -1211,52 +1192,30 @@ def eval_vertex(self, vid, *args): actor.set_input(df.local_id(pid), inputs) # Eval the node - print "start the execution of node : " + str(vid) - t0 = clock() - - best_site, cost = minimum_cost_site(vid=vid, provenance=self.p, multisites=self.m) - self.SP.add_to_plan(vid, best_site) - self.SP.add_to_cost(cost) - - self.eval_vertex_code(vid) - t1 = clock() - if provenance: - # print id task - print vid - # print actor.inputs - - # data info - # if inputs: - # for i in inputs: - print "size", total_size(actor.inputs) - # for o in range(actor.get_nb_output()): - # print "out", actor.get_output(o) - print "size out", total_size(actor.outputs) - - # execution info - print "Execution time: %s" % (t1 - t0) - - # vm info - - - def eval(self, *args, **kwgs): + def eval(self, *args, **kwargs): """ Evaluate the whole dataflow starting from leaves""" + t0 = clock() df = self._dataflow + if self._prov is not None: + self._prov.init(df) + self._prov.time_init = t0 + + # Unvalidate all the nodes self._evaluated.clear() # Eval from the leaf - for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid)==0): - + for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): self.eval_vertex(vid) - print "end evaluation - Scheduling plan : ", str(self.SP.plan) - print "total cost : ", str(self.SP.cost) t1 = clock() + + if self._prov is not None: + self._prov.time_end = t1 + if quantify: - print "Evaluation time: %s"%(t1-t0) - \ No newline at end of file + print "Evaluation time: %s" % (t1 - t0) \ No newline at end of file From e15b563e6410db35fa89b76c690584c34f0e790c Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 5 Dec 2019 18:25:40 +0100 Subject: [PATCH 22/45] ignore visual studio param --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3003886d..9ffbc614 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ lib64/ # editors .idea/ +.vscode/ # Vim files *.swp From 8a0b95fd92a2db5ca7654ba7565afd74caadfe66 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 5 Dec 2019 19:30:30 +0100 Subject: [PATCH 23/45] start using TetsEval for provenacne tracking tests --- src/openalea/core/algo/dataflow_evaluation.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 5fb61c82..c737b2be 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -1135,9 +1135,9 @@ def eval(self, vtx_id=None, **kwds): ############################################################ -class TestEval(AbstractEvaluation): - """ Basic evaluation algorithm + provenance """ - __evaluators__.append("TestEval") +class TestEvaluation(AbstractEvaluation): + """ Basic evaluation algorithm with provenance capture in file """ + __evaluators__.append("TestEvaluation") def __init__(self, dataflow, record_provenance=False, *args, **kwargs): @@ -1197,7 +1197,7 @@ def eval_vertex(self, vid, *args, **kwargs): def eval(self, *args, **kwargs): """ Evaluate the whole dataflow starting from leaves""" - t0 = clock() + t0 = time.time() df = self._dataflow if self._prov is not None: @@ -1212,10 +1212,19 @@ def eval(self, *args, **kwargs): for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): self.eval_vertex(vid) - t1 = clock() + t1 = time.time() if self._prov is not None: self._prov.time_end = t1 + # Save the provenance in a file + wf_id = str(df.factory.uid) + ".json" + home = os.path.expanduser("~") + provenance_path = os.path.join(home, ".openalea/provenance", wf_id) + if not os.path.exists(os.path.dirname(provenance_path)): + os.makedirs(provenance_path) + provenance = self._prov.as_wlformat() + with open(provenance_path, "a+") as f: + json.dump(provenance, f, indent=4) if quantify: print "Evaluation time: %s" % (t1 - t0) \ No newline at end of file From d98286ee33104c1488dd7e313beac09efd6980c5 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Wed, 11 Dec 2019 19:00:41 +0100 Subject: [PATCH 24/45] Add evaluator that evaluate wf fragments --- src/openalea/core/algo/dataflow_evaluation.py | 219 ++++++++++++++++++ src/openalea/core/compositenode.py | 8 +- 2 files changed, 223 insertions(+), 4 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index c737b2be..964a90a3 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -32,6 +32,8 @@ import time import json +from openalea.core.data_manager import load_data, check_data_to_load, write_outputs + # test for distributed executions # from openalea.core.metadata.provenance_data import Prov # from openalea.core.metadata.cache_index import Cache_index @@ -1207,13 +1209,230 @@ def eval(self, *args, **kwargs): # Unvalidate all the nodes self._evaluated.clear() + + # Eval from the leaf + for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): + self.eval_vertex(vid) + + t1 = time.time() + + if self._prov is not None: + self._prov.time_end = t1 + # Save the provenance in a file + wf_id = str(df.factory.uid) + ".json" + home = os.path.expanduser("~") + provenance_path = os.path.join(home, ".openalea/provenance", wf_id) + if not os.path.exists(os.path.dirname(provenance_path)): + os.makedirs(provenance_path) + provenance = self._prov.as_wlformat() + with open(provenance_path, "a+") as f: + json.dump(provenance, f, indent=4) + + if quantify: + print "Evaluation time: %s" % (t1 - t0) + + +# class ZMQEvaluation(AbstractEvaluation): +# """ Evaluation with ZMQ """ +# __evaluators__.append("ZMQEvaluation") + +# def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + +# AbstractEvaluation.__init__(self, dataflow, record_provenance) +# # a property to specify if the node has already been evaluated +# self._evaluated = set() + +# def is_stopped(self, vid, actor): +# """ Return True if evaluation must be stop at this vertex """ + +# if vid in self._evaluated: +# return True + +# try: +# if actor.block: +# status = True +# n = actor.get_nb_output() +# outputs = [i for i in range(n) if +# actor.get_output(i) is not None] +# if not outputs: +# status = False +# return status +# except: +# pass +# return False + +# def eval_vertex(self, vid, *args, **kwargs): +# """ Evaluate the vertex vid """ + +# df = self._dataflow +# actor = df.actor(vid) +# self._evaluated.add(vid) + +# # For each inputs +# for pid in df.in_ports(vid): +# inputs = [] + +# cpt = 0 +# # For each connected node +# for npid, nvid, nactor in self.get_parent_nodes(pid): +# if not self.is_stopped(nvid, nactor): +# self.eval_vertex(nvid) + +# inputs.append(nactor.get_output(df.local_id(npid))) +# cpt += 1 + +# # set input as a list or a simple value +# if (cpt == 1): +# inputs = inputs[0] +# if (cpt > 0): +# actor.set_input(df.local_id(pid), inputs) + +# # Eval the node +# self.eval_vertex_code(vid) + +# def eval(self, *args, **kwargs): +# """ Evaluate the whole dataflow starting from leaves""" + +# t0 = time.time() +# df = self._dataflow + +# if self._prov is not None: +# self._prov.init(df) +# self._prov.time_init = t0 + + +# # Unvalidate all the nodes +# self._evaluated.clear() + +# # Eval from the leaf +# for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): +# self.eval_vertex(vid) + +# t1 = time.time() + +# if self._prov is not None: +# self._prov.time_end = t1 +# # Save the provenance in a file +# wf_id = str(df.factory.uid) + ".json" +# home = os.path.expanduser("~") +# provenance_path = os.path.join(home, ".openalea/provenance", wf_id) +# if not os.path.exists(os.path.dirname(provenance_path)): +# os.makedirs(provenance_path) +# provenance = self._prov.as_wlformat() +# with open(provenance_path, "a+") as f: +# json.dump(provenance, f, indent=4) + +# if quantify: +# print "Evaluation time: %s" % (t1 - t0) + + +class FragmentEvaluation(AbstractEvaluation): + """ Evaluation with By fragments """ + __evaluators__.append("FragmentEvaluation") + + # TODO: It doesn't work with provenance + def __init__(self, dataflow, record_provenance=False, fragment_infos=None, *args, **kwargs): + + AbstractEvaluation.__init__(self, dataflow, record_provenance) + # a property to specify if the node has already been evaluated + self._evaluated = set() + self._fragment_infos = fragment_infos + + def is_stopped(self, vid, actor): + """ Return True if evaluation must be stop at this vertex """ + + if vid in self._evaluated: + return True + + try: + if actor.block: + status = True + n = actor.get_nb_output() + outputs = [i for i in range(n) if + actor.get_output(i) is not None] + if not outputs: + status = False + return status + except: + pass + return False + + def eval_vertex(self, vid, *args, **kwargs): + """ Evaluate the vertex vid """ + + df = self._dataflow + actor = df.actor(vid) + + self._evaluated.add(vid) + + # For each inputs + for pid in df.in_ports(vid): + inputs = [] + + # check if the data has to be loaded + tmp_path = check_data_to_load(vid, pid, self._fragment_infos) + if tmp_path: + cpt = 1 + inputs.append(load_data(tmp_path)) + else: + cpt = 0 + # For each connected node + for npid, nvid, nactor in self.get_parent_nodes(pid): + if not self.is_stopped(nvid, nactor): + self.eval_vertex(nvid) + + + inputs.append(nactor.get_output(df.local_id(npid))) + cpt += 1 + + # set input as a list or a simple value + if (cpt == 1): + inputs = inputs[0] + if (cpt > 0): + actor.set_input(df.local_id(pid), inputs) + + # Eval the node + self.eval_vertex_code(vid) + + def eval(self, *args, **kwargs): + """ Evaluate the whole dataflow starting from leaves""" + + t0 = time.time() + + df = self._dataflow + + if self._prov is not None: + self._prov.init(df) + self._prov.time_init = t0 + + + # Unvalidate all the nodes + self._evaluated.clear() + # Start by stoping all parents node of input fragments nodes + if self._fragment_infos: + for ivid, ipid in self._fragment_infos['inputs_vid']: + for p in df.in_ports(ivid): + for npid, nvid, nactor in self.get_parent_nodes(p): + self._evaluated.add(nvid) + print "Set : ", nvid, " as EVALUATED" + + # Eval from the leaf for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): self.eval_vertex(vid) t1 = time.time() + # Save the outputs of the fragment into file + for i, vid in enumerate([v[0] for v in self._fragment_infos['outputs_vid']]): + home = os.path.expanduser("~") + cache_path = os.path.join(home, ".openalea", "fragments_tmp_data") + if not os.path.exists(os.path.dirname(cache_path)): + os.makedirs(cache_path) + write_outputs(self._dataflow, vid, cache_path) + + if self._prov is not None: self._prov.time_end = t1 # Save the provenance in a file diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index b0848b2d..9afebb03 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -499,7 +499,7 @@ def set_output(self, index_key, val): return self.node(self.id_out).set_output(index_key, val) - def get_eval_algo(self, record_provenance=False): + def get_eval_algo(self, record_provenance=False, fragment_infos=None): """ Return the evaluation algo instance """ try: algo_str = self.eval_algo @@ -511,7 +511,7 @@ def get_eval_algo(self, record_provenance=False): baseimp = "algo.dataflow_evaluation" module = __import__(baseimp, globals(), locals(), [algo_str]) classobj = module.__dict__[algo_str] - return classobj(self, record_provenance=record_provenance) + return classobj(self, record_provenance=record_provenance, fragment_infos=fragment_infos) except Exception, e: from openalea.core.algo.dataflow_evaluation import \ @@ -521,7 +521,7 @@ def get_eval_algo(self, record_provenance=False): return self.eval_algo def eval_as_expression(self, vtx_id=None, step=False, - record_provenance=False): + record_provenance=False, fragment_infos=None): """ Evaluate a vtx_id @@ -533,7 +533,7 @@ def eval_as_expression(self, vtx_id=None, step=False, return if vtx_id is not None: self.node(vtx_id).modified = True - algo = self.get_eval_algo(record_provenance) + algo = self.get_eval_algo(record_provenance, fragment_infos) try: self.evaluating = True From 05668d0d6356b7ae7ec7c3f68a818b1d4f063c30 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Wed, 11 Dec 2019 19:00:59 +0100 Subject: [PATCH 25/45] Write and load data with dill --- src/openalea/core/data_manager.py | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 src/openalea/core/data_manager.py diff --git a/src/openalea/core/data_manager.py b/src/openalea/core/data_manager.py new file mode 100644 index 00000000..c7f82f6e --- /dev/null +++ b/src/openalea/core/data_manager.py @@ -0,0 +1,42 @@ +import dill +import os + + +def write_data(vid, path): + """ + vid: vid of the node whose output will be stored + path: path where the data will be stored + """ + + return +def write_outputs(df, vid, cache_path): + for port in range(df.node(vid).get_nb_output()): + data_id = str(vid) + "_" + str(port) + with open(os.path.join(cache_path,data_id), "w") as f: + dill.dump(df.node(vid).get_output(port), f) + +def load_data(path): + """ + vid: vid of the node whose input will be fetched + path: path of the data to get + """ + with open(path, "r") as f: + data = dill.load(f) + return data + + +def check_data_to_load(vid, pid, fragment_infos): + """ + Return the path if the data has to be loaded + Return None otherwise + + """ + if not fragment_infos: + return None + if (vid, pid) in fragment_infos['input_data'].keys(): + # the data is computed by other fragments + return fragment_infos['input_data'][(vid, pid)] + if (vid, pid) in fragment_infos['cached_data'].keys(): + # the data is get from cache + return fragment_infos['cached_data'][(vid, pid)] + return None \ No newline at end of file From 6f82a0b8297972722ce99fade02140152e27fe67 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 12 Dec 2019 12:59:46 +0100 Subject: [PATCH 26/45] remove old functions --- src/openalea/core/data_manager.py | 10 ++- src/openalea/core/metadata/cache_index.py | 38 ---------- src/openalea/core/metadata/cloud_sites.py | 76 ------------------- src/openalea/core/metadata/provenance_data.py | 49 ------------ 4 files changed, 6 insertions(+), 167 deletions(-) delete mode 100644 src/openalea/core/metadata/cache_index.py delete mode 100644 src/openalea/core/metadata/cloud_sites.py delete mode 100644 src/openalea/core/metadata/provenance_data.py diff --git a/src/openalea/core/data_manager.py b/src/openalea/core/data_manager.py index c7f82f6e..203a6dad 100644 --- a/src/openalea/core/data_manager.py +++ b/src/openalea/core/data_manager.py @@ -7,14 +7,16 @@ def write_data(vid, path): vid: vid of the node whose output will be stored path: path where the data will be stored """ - return + + def write_outputs(df, vid, cache_path): for port in range(df.node(vid).get_nb_output()): data_id = str(vid) + "_" + str(port) with open(os.path.join(cache_path,data_id), "w") as f: dill.dump(df.node(vid).get_output(port), f) + def load_data(path): """ vid: vid of the node whose input will be fetched @@ -33,10 +35,10 @@ def check_data_to_load(vid, pid, fragment_infos): """ if not fragment_infos: return None - if (vid, pid) in fragment_infos['input_data'].keys(): - # the data is computed by other fragments - return fragment_infos['input_data'][(vid, pid)] if (vid, pid) in fragment_infos['cached_data'].keys(): # the data is get from cache return fragment_infos['cached_data'][(vid, pid)] + if (vid, pid) in fragment_infos['input_data'].keys(): + # the data is computed by other fragments + return fragment_infos['input_data'][(vid, pid)] return None \ No newline at end of file diff --git a/src/openalea/core/metadata/cache_index.py b/src/openalea/core/metadata/cache_index.py deleted file mode 100644 index a508efc6..00000000 --- a/src/openalea/core/metadata/cache_index.py +++ /dev/null @@ -1,38 +0,0 @@ -# Cache index : { vid: time/ cost to get} - if vid in index -> cache exist - -class Cache_item(): - def __init__(self, vid=None, size=0, site=None): - self.vid = vid - self.size = size - self.site = site - - def __repr__(self): - return "Cache item - vid : " + str(self.vid) + " | size : " + str(self.size) + " | site : " + str(self.site) - - -class Cache_index(): - def __init__(self): - self.cache_index = dict() - - def add_cache_index_item(self, item): - self.cache_index[item.vid] = item - - def add_cache_index_itemlist(self, items): - for item in items: - self.cache_index[item.vid] = item - - def generate_fake(self): - list_items = [] - item1 = Cache_item(vid="2", size=100, site="s2") - item2 = Cache_item(vid="5", size=50000, site="s1") - - list_items.append(item1) - list_items.append(item2) - - self.add_cache_index_itemlist(list_items) - - def check_index(self, vid): - if vid in self.cache_index: - return self.cache_index[vid] - else: - return None \ No newline at end of file diff --git a/src/openalea/core/metadata/cloud_sites.py b/src/openalea/core/metadata/cloud_sites.py deleted file mode 100644 index 847101d4..00000000 --- a/src/openalea/core/metadata/cloud_sites.py +++ /dev/null @@ -1,76 +0,0 @@ -from random import choice -from string import ascii_letters - - -# sites: -class Site(): - def __init__(self, sid="", compute_cost=0., storage_cost=0., compute_power=0., buzyness=0.): - if not sid: - sid = ''.join(choice(ascii_letters) for i in range(12)) - self.sid = sid - self.compute_cost = compute_cost - self.compute_power = compute_power - self.storage_cost = storage_cost - self.buzyness = buzyness - - self.transfer_cost = dict() - self.transfer_cost[self.sid] = 0. - # list task id, whose input data is on the site - self.input_storage = set() - - def __repr__(self): - return "Site - sid : " + str(self.sid) + " | compute_cost : " + str(self.compute_cost) \ - + " | storage_cost : " + str(self.storage_cost) + " | buzyness : " + str(self.buzyness) \ - + " | input data : " + str(self.input_storage) + " | transfer : " + str(self.transfer_cost) - - def free(self): - self.buzyness = 0 - - def increase_workload(self, inc=0): - self.buzyness += inc - - def add_transfer_site(self, nsite_sid, t_cost): - self.transfer_cost[nsite_sid] = t_cost - - def add_input_data(self, vid): - self.input_storage.add(vid) - - def check_input(self, vid): - return vid in self.input_storage - - -def link_two_sites(site1, site2, transfer_cost): - site1.add_transfer_site(site2.sid, transfer_cost) - site2.add_transfer_site(site1.sid, transfer_cost) - - -class MultiSiteCloud(): - def __init__(self, list_sites=dict()): - self.list_sites = list_sites - - def add_site(self, site): - self.list_sites[site.sid] = site - - def add_sitelist(self, sites): - for site in sites: - self.list_sites[site.sid] = site - - def generate_fake(self): - # start 3 sites - list_items = [] - site1 = Site(sid='s0', compute_cost=10., storage_cost=10., compute_power=10.) - site2 = Site(sid='s1', compute_cost=100., storage_cost=1., compute_power=10.) - site3 = Site(sid='s2', compute_cost=1., storage_cost=100., compute_power=10.) - - list_items.append(site1) - list_items.append(site2) - list_items.append(site3) - - self.add_sitelist(list_items) - - # generate transfer cost - link_two_sites(site1, site2, 2) - link_two_sites(site1, site3, 4) - link_two_sites(site3, site2, 6) - - diff --git a/src/openalea/core/metadata/provenance_data.py b/src/openalea/core/metadata/provenance_data.py deleted file mode 100644 index 0b73f444..00000000 --- a/src/openalea/core/metadata/provenance_data.py +++ /dev/null @@ -1,49 +0,0 @@ -# fake provenance for WF test "add_test" -# 6 vertices -# 3 edges -# input vid = 2 et 4 -# output vid = 5 -# 0 et 1 = auto input et output "node" - not used -# 4 = add node - - -class Prov_item(): - def __init__(self, vid=None, exec_time=0, input_size=0, output_size=0): - self.vid = vid - self.exec_time = exec_time - self.input_size = input_size - self.output_size = output_size - - def __repr__(self): - return "Provenance item - vid : " + str(self.vid) + " | exec_time : " + str(self.exec_time) + \ - " | input_size : " + str(self.input_size) + " | output_size : " + str(self.output_size) - - -class Prov(): - def __init__(self): - self.prov = dict() - - def add_prov_item(self, item): - self.prov[item.vid] = item - - def add_prov_itemlist(self, items): - for item in items: - self.prov[item.vid] = item - - def check_prov(self, vid): - if vid in self.prov: - return self.prov[vid] - else: - return None - - def generate_fake(self): - list_items = [] - item1 = Prov_item("2", 10, 5000, 300) - item2 = Prov_item("4", 3, 200, 1000) - item3 = Prov_item("5", 5, 1300, 100) - - list_items.append(item1) - list_items.append(item2) - list_items.append(item3) - - self.add_prov_itemlist(list_items) \ No newline at end of file From 27e23b307b5f3126ebbe88ddb0139107a833d389 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Thu, 12 Dec 2019 15:41:43 +0100 Subject: [PATCH 27/45] add cassandra workbench --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 9ffbc614..7091eacb 100644 --- a/.gitignore +++ b/.gitignore @@ -93,4 +93,6 @@ doc/_dvlpt/ # #} # user custom filters +# ignore cassandra files +.cassandraWorkbench* From fedb427d6feebb4ff44a81edc6fea7c869266fa0 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Mon, 16 Dec 2019 11:42:17 +0100 Subject: [PATCH 28/45] add vscode paramters --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7091eacb..182e1300 100644 --- a/.gitignore +++ b/.gitignore @@ -95,4 +95,4 @@ doc/_dvlpt/ # user custom filters # ignore cassandra files .cassandraWorkbench* - +.vscode/ From 79c87546dbf85859e5460f9ae1ca71df8eb7dea0 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Mon, 16 Dec 2019 14:23:56 +0100 Subject: [PATCH 29/45] . --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 182e1300..f974705e 100644 --- a/.gitignore +++ b/.gitignore @@ -95,4 +95,4 @@ doc/_dvlpt/ # user custom filters # ignore cassandra files .cassandraWorkbench* -.vscode/ +.vscode/* \ No newline at end of file From 91765eb217b73a3385aaad02b748ef43164db655 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Mon, 16 Dec 2019 14:26:25 +0100 Subject: [PATCH 30/45] use global pahts --- src/openalea/core/algo/dataflow_evaluation.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 964a90a3..36184ce9 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -34,6 +34,8 @@ from openalea.core.data_manager import load_data, check_data_to_load, write_outputs +from distributed.cloud_infos.tmp_path import TMP_PATH, PROVENANCE_PATH + # test for distributed executions # from openalea.core.metadata.provenance_data import Prov # from openalea.core.metadata.cache_index import Cache_index @@ -1372,9 +1374,12 @@ def eval_vertex(self, vid, *args, **kwargs): # check if the data has to be loaded tmp_path = check_data_to_load(vid, pid, self._fragment_infos) + print "le noeud à récupérer : ", tmp_path if tmp_path: cpt = 1 - inputs.append(load_data(tmp_path)) + for npid, nvid, nactor in self.get_parent_nodes(pid): + print npid, nvid, nactor + # inputs.append(load_data(tmp_path)) else: cpt = 0 # For each connected node @@ -1426,19 +1431,16 @@ def eval(self, *args, **kwargs): # Save the outputs of the fragment into file for i, vid in enumerate([v[0] for v in self._fragment_infos['outputs_vid']]): - home = os.path.expanduser("~") - cache_path = os.path.join(home, ".openalea", "fragments_tmp_data") - if not os.path.exists(os.path.dirname(cache_path)): - os.makedirs(cache_path) - write_outputs(self._dataflow, vid, cache_path) + if not os.path.exists(os.path.dirname(TMP_PATH)): + os.makedirs(TMP_PATH) + write_outputs(self._dataflow, vid, TMP_PATH) if self._prov is not None: self._prov.time_end = t1 # Save the provenance in a file wf_id = str(df.factory.uid) + ".json" - home = os.path.expanduser("~") - provenance_path = os.path.join(home, ".openalea/provenance", wf_id) + provenance_path = os.path.join(PROVENANCE_PATH, wf_id) if not os.path.exists(os.path.dirname(provenance_path)): os.makedirs(provenance_path) provenance = self._prov.as_wlformat() From 9992069a3707110e066e36a5e0ca210e9811535b Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Mon, 16 Dec 2019 15:11:04 +0100 Subject: [PATCH 31/45] add zmq evaluation --- src/openalea/core/algo/dataflow_evaluation.py | 250 ++++++++++++------ 1 file changed, 174 insertions(+), 76 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 36184ce9..bf1c5306 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -112,8 +112,11 @@ def __init__(self, dataflow, record_provenance=False): if record_provenance: self._prov = RVProvenance() + self._provdb = ProvMongo() else: self._prov = None + self._provdb = None + def eval(self, *args): """todo""" @@ -123,7 +126,7 @@ def is_stopped(self, vid, actor): """ Return True if evaluation must be stop at this vertex. """ return actor.block - def eval_vertex_code(self, vid): + def eval_vertex_code(self, vid, *args, **kwargs): """ Evaluate the vertex vid. Can raise an exception if evaluation failed. @@ -1234,100 +1237,195 @@ def eval(self, *args, **kwargs): print "Evaluation time: %s" % (t1 - t0) -# class ZMQEvaluation(AbstractEvaluation): -# """ Evaluation with ZMQ """ -# __evaluators__.append("ZMQEvaluation") +class ZMQEvaluation(AbstractEvaluation): + """ Basic evaluation algorithm """ + __evaluators__.append("ZMQEvaluation") + + def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + + AbstractEvaluation.__init__(self, dataflow, record_provenance) + # a property to specify if the node has already been evaluated + self._evaluated = set() + self.socket=None + + def is_stopped(self, vid, actor): + """ Return True if evaluation must be stop at this vertex """ + + if vid in self._evaluated: + return True + + try: + if actor.block: + status = True + n = actor.get_nb_output() + outputs = [i for i in range(n) if + actor.get_output(i) is not None] + if not outputs: + status = False + return status + except: + pass + return False + + def eval_vertex(self, vid, *args, **kwargs): + """ Evaluate the vertex vid """ + + df = self._dataflow + actor = df.actor(vid) + + self._evaluated.add(vid) -# def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + # For each inputs + for pid in df.in_ports(vid): + inputs = [] + + cpt = 0 + # For each connected node + for npid, nvid, nactor in self.get_parent_nodes(pid): + if not self.is_stopped(nvid, nactor): + self.eval_vertex(nvid) + + inputs.append(nactor.get_output(df.local_id(npid))) + cpt += 1 + + # set input as a list or a simple value + if (cpt == 1): + inputs = inputs[0] + if (cpt > 0): + actor.set_input(df.local_id(pid), inputs) -# AbstractEvaluation.__init__(self, dataflow, record_provenance) -# # a property to specify if the node has already been evaluated -# self._evaluated = set() + # Get the best worker + # TODO + worker_id = 0 -# def is_stopped(self, vid, actor): -# """ Return True if evaluation must be stop at this vertex """ + # Eval the node + self.eval_vertex_code(vid, worker_id) -# if vid in self._evaluated: -# return True + def eval(self, *args, **kwargs): + """ Evaluate the whole dataflow starting from leaves""" -# try: -# if actor.block: -# status = True -# n = actor.get_nb_output() -# outputs = [i for i in range(n) if -# actor.get_output(i) is not None] -# if not outputs: -# status = False -# return status -# except: -# pass -# return False + # Init the provenance + t0 = clock() + df = self._dataflow + if self._prov is not None: + self._prov.init(df) + self._prov.time_init = t0 -# def eval_vertex(self, vid, *args, **kwargs): -# """ Evaluate the vertex vid """ + self._provdb.init( + remote=cloud_info.REMOTE, + path=cloud_info.FILEPATH, + ssh_ip_addr=cloud_info.PROVDB_SSH_ADDR, + ssh_pkey=cloud_info.SSH_PKEY, + ssh_username=cloud_info.SSU_USERNAME, + remote_bind_address=(cloud_info.MONGO_ADDR, cloud_info.MONGO_PORT), + mongo_ip_addr=cloud_info.MONGO_ADDR, + mongo_port=cloud_info.MONGO_PORT + ) + # Init the workers + # context = zmq.Context() + # socket = context.socket(zmq.REQ) + # socket.connect("tcp://localhost:5559") + # self.socket = socket + # TODO: FIND A WAY TO INIT WF ON ALL WORKER - for now the worker are started by hand + # msg=dict() + # msg['pkg_name'] = self._dataflow.get_factory().package.name + # msg['wf_name'] = self._dataflow.get_factory().name + # socket.send(json.dumps(msg)) + # state=socket.recv() + # state=json.loads(state) + # if not state['Initialization']=="success": + # print "Eval failed due to failed init workers" + # return -# df = self._dataflow -# actor = df.actor(vid) + # Unvalidate all the nodes + self._evaluated.clear() -# self._evaluated.add(vid) + # Eval from the leaf + for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): + self.eval_vertex(vid) -# # For each inputs -# for pid in df.in_ports(vid): -# inputs = [] + # Update workflow provenance + t1 = clock() + if self._prov is not None: + self._prov.time_end = t1 + wfitem = self._prov.as_wlformat() + self._provdb.add_wf_item(wfitem) -# cpt = 0 -# # For each connected node -# for npid, nvid, nactor in self.get_parent_nodes(pid): -# if not self.is_stopped(nvid, nactor): -# self.eval_vertex(nvid) + # close remote connections + self._provdb.close() -# inputs.append(nactor.get_output(df.local_id(npid))) -# cpt += 1 + if quantify: + print "Evaluation time: %s" % (t1 - t0) -# # set input as a list or a simple value -# if (cpt == 1): -# inputs = inputs[0] -# if (cpt > 0): -# actor.set_input(df.local_id(pid), inputs) + def eval_vertex_code(self, vid, *args, **kwargs): + """ + Evaluate the vertex vid. + Can raise an exception if evaluation failed. + """ -# # Eval the node -# self.eval_vertex_code(vid) + node = self._dataflow.actor(vid) -# def eval(self, *args, **kwargs): -# """ Evaluate the whole dataflow starting from leaves""" + try: + t0 = clock() + if self._prov is not None: + self._prov.before_eval(self._dataflow, vid) -# t0 = time.time() -# df = self._dataflow + # Send value to worker + context = zmq.Context() + socket = context.socket(zmq.REQ) + print "Start evaluating node : ", vid + socket.connect("tcp://localhost:5559") + + msg = dict() + # msg['pkg_name'] = self._dataflow.get_factory().package.name + # msg['wf_name'] = self._dataflow.get_factory().name + msg['vid'] = vid + inputs = node.input_desc + for inp in inputs: + inp['interface']=None + inp['value']=node.get_input(inp['name']) + msg['inputs'] = inputs + + if (vid != 0) & (vid != 1): + socket.send(json.dumps(msg)) + outputs = socket.recv() + outputs = json.loads(outputs) + for out in outputs: + node.set_output(out['name'], val=out['value']) + else: + ret = node.eval() + # ret = node.eval() -# if self._prov is not None: -# self._prov.init(df) -# self._prov.time_init = t0 + dt = clock() - t0 + if self._prov is not None: + taskitem=self._prov.after_eval(self._dataflow, vid, dt) + if taskitem: + self._provdb.add_task_item(taskitem) + # When an exception is raised, a flag is set. + # So we remove it when evaluation is ok. + node.raise_exception = False + # if hasattr(node, 'raise_exception'): + # del node.raise_exception + node.notify_listeners(('data_modified', None, None)) + # return ret + return -# # Unvalidate all the nodes -# self._evaluated.clear() - -# # Eval from the leaf -# for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): -# self.eval_vertex(vid) - -# t1 = time.time() - -# if self._prov is not None: -# self._prov.time_end = t1 -# # Save the provenance in a file -# wf_id = str(df.factory.uid) + ".json" -# home = os.path.expanduser("~") -# provenance_path = os.path.join(home, ".openalea/provenance", wf_id) -# if not os.path.exists(os.path.dirname(provenance_path)): -# os.makedirs(provenance_path) -# provenance = self._prov.as_wlformat() -# with open(provenance_path, "a+") as f: -# json.dump(provenance, f, indent=4) - -# if quantify: -# print "Evaluation time: %s" % (t1 - t0) + except EvaluationException, e: + e.vid = vid + e.node = node + # When an exception is raised, a flag is set. + node.raise_exception = True + node.notify_listeners(('data_modified', None, None)) + raise e + except Exception, e: + # When an exception is raised, a flag is set. + node.raise_exception = True + node.notify_listeners(('data_modified', None, None)) + raise EvaluationException(vid, node, e, \ + tb.format_tb(sys.exc_info()[2])) + return class FragmentEvaluation(AbstractEvaluation): """ Evaluation with By fragments """ From d3f426ad7e867445f09b7d7e8ea18cb33bf0f6dc Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Mon, 16 Dec 2019 15:23:58 +0100 Subject: [PATCH 32/45] add mongo as provenance database --- src/openalea/core/algo/dataflow_evaluation.py | 40 ++++- src/openalea/core/compositenode.py | 3 +- src/openalea/core/metadata/cloud_info.py | 18 ++ src/openalea/core/metadata/provenanceDB.py | 159 ++++++++++++++++++ 4 files changed, 210 insertions(+), 10 deletions(-) create mode 100644 src/openalea/core/metadata/cloud_info.py create mode 100644 src/openalea/core/metadata/provenanceDB.py diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index bf1c5306..b4362f64 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -144,7 +144,9 @@ def eval_vertex_code(self, vid, *args, **kwargs): t1 = clock() if self._prov is not None: - self._prov.after_eval(self._dataflow, vid) + taskitem = self._prov.after_eval(self._dataflow, vid, dt) + if self._provdb and taskitem: + self._provdb.add_task_item(taskitem) # print self._prov.as_wlformat() @@ -260,6 +262,18 @@ def eval(self, *args, **kwargs): self._prov.init(df) self._prov.time_init = t0 + if self._provdb is not None: + self._provdb.init( + remote=cloud_info.REMOTE, + path=cloud_info.FILEPATH, + ssh_ip_addr=cloud_info.PROVDB_SSH_ADDR, + ssh_pkey=cloud_info.SSH_PKEY, + ssh_username=cloud_info.SSU_USERNAME, + remote_bind_address=(cloud_info.MONGO_ADDR, cloud_info.MONGO_PORT), + mongo_ip_addr=cloud_info.MONGO_ADDR, + mongo_port=cloud_info.MONGO_PORT + ) + # Unvalidate all the nodes self._evaluated.clear() @@ -272,15 +286,23 @@ def eval(self, *args, **kwargs): if self._prov is not None: self._prov.time_end = t1 + wfitem = self._prov.as_wlformat() + if self._provdb is not None: + self._provdb.add_wf_item(wfitem) + # self._provdb.add_list_task_item(taskitemslist) + + # close remote connections + self._provdb.close() + # Save the provenance in a file - wf_id = str(df.factory.uid) + ".json" - home = os.path.expanduser("~") - provenance_path = os.path.join(home, ".openalea/provenance", wf_id) - if not os.path.exists(os.path.dirname(provenance_path)): - os.makedirs(provenance_path) - provenance = self._prov.as_wlformat() - with open(provenance_path, "a+") as f: - json.dump(provenance, f, indent=4) + # wf_id = str(df.factory.uid) + ".json" + # home = os.path.expanduser("~") + # provenance_path = os.path.join(home, ".openalea/provenance", wf_id) + # if not os.path.exists(os.path.dirname(provenance_path)): + # os.makedirs(provenance_path) + # provenance = self._prov.as_wlformat() + # with open(provenance_path, "a+") as f: + # json.dump(provenance, f, indent=4) if quantify: print "Evaluation time: %s" % (t1 - t0) diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index 9afebb03..460a5cc8 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -521,7 +521,8 @@ def get_eval_algo(self, record_provenance=False, fragment_infos=None): return self.eval_algo def eval_as_expression(self, vtx_id=None, step=False, - record_provenance=False, fragment_infos=None): + record_provenance=False, fragment_infos=None, + *args, **kwargs): """ Evaluate a vtx_id diff --git a/src/openalea/core/metadata/cloud_info.py b/src/openalea/core/metadata/cloud_info.py new file mode 100644 index 00000000..ec1e9d14 --- /dev/null +++ b/src/openalea/core/metadata/cloud_info.py @@ -0,0 +1,18 @@ +from openalea.core.path import path +from openalea.core import settings + +# infos about provenance db +REMOTE = False + +# SSH +PROVDB_SSH_ADDR = "134.158.247.32" +SSU_USERNAME="ubuntu" +SSH_PKEY="/home/gaetan/.ssh/id_rsa" +# REMOTE_DB_ADDR=('127.0.0.1', 27017) + +# Mongo +MONGO_ADDR='127.0.0.1' +MONGO_PORT = 27017 + +# file infos +FILEPATH = path(settings.get_openalea_home_dir()) / 'provenance' \ No newline at end of file diff --git a/src/openalea/core/metadata/provenanceDB.py b/src/openalea/core/metadata/provenanceDB.py new file mode 100644 index 00000000..5cb151bf --- /dev/null +++ b/src/openalea/core/metadata/provenanceDB.py @@ -0,0 +1,159 @@ +from sshtunnel import SSHTunnelForwarder +import pymongo +from pymongo.errors import ConnectionFailure +from sshtunnel import BaseSSHTunnelForwarderError +import json + +from openalea.core.path import path + + + +class ProvFile(): + def __init__(self, *args, **kwargs): + self.localpath = "" + + def add_prov_item(self, item, *args, **kwargs): + print "START WRITING" + prov_path = path(self.localpath) / item["workflow"] + '.json' + print self.localpath + with open(prov_path, 'a+') as fp: + json.dump(item, fp, indent=4) + + def init(self, *args, **kwargs): + self.localpath = kwargs["path"] + + def close(self): + pass + + +class ProvMongo(): + def __init__(self, *args, **kwargs): + self.client = None + self.server = None + # self.provdb = None + self.taskdb = None + self.wfdb = None + self.remote = False + + def __set__(self, instance, value): + self.instance = value + + def is_in(self, task_id=None, wf_id=None): + # check if in mongodb + if task_id: + if self.taskdb.find_one({"task_id": task_id}): + return self.taskdb.find_one({"task_id": task_id}) + else: + return False + if wf_id: + if self.wfdb.find_one({"workflow": wf_id}): + return self.wfdb.find_one({"workflow": wf_id}) + else: + return False + else: + return False + + def add_task_item(self, item, *args, **kwargs): + # add element to index db + self.taskdb.insert_one(item) + + def add_list_task_item(self, itemlist, *args, **kwargs): + for item in itemlist: + self.taskdb.insert_one(item) + + def add_wf_item(self, item, *args, **kwargs): + # add element to index db + self.wfdb.insert_one(item) + + def add_list_wf_item(self, itemlist, *args, **kwargs): + for item in itemlist: + self.wfdb.insert_one(item) + + def show(self, task_id=None, wf_id=None): + print("the task provenance has : ", self.taskdb.count(), " entries :") + for doc in self.taskdb.find({}): + print(doc) + + def start_sshtunnel(self, *args, **kwargs): + try: + self.server = SSHTunnelForwarder( + ssh_address_or_host=kwargs['ssh_ip_addr'], + ssh_pkey=kwargs['ssh_pkey'], + ssh_username=kwargs['ssh_username'], + remote_bind_address=kwargs['remote_bind_address'] + # , + # *args, + # **kwargs + ) + + self.server.start() + except BaseSSHTunnelForwarderError: + print "Fail to connect to ssh device" + + def start_client(self, *args, **kwargs): + if self.remote: + if not self.server: + print "SSH Server not started - cannot connect to Mongo" + return + try: + client = pymongo.MongoClient(host=kwargs['mongo_ip_addr'], + port=self.server.local_bind_port + # , + # , # server.local_bind_port is assigned local port + # username='admin', + # password='admin' + # *args, + # **kwargs + ) + + self.client = client + db = self.client.provdb + self.taskdb = db.task_collection + self.wfdb = db.workflow_collection + except ConnectionFailure: + print "failed to connect to mongodb" + else: + try: + client = pymongo.MongoClient(host=kwargs['mongo_ip_addr'], + port=kwargs['mongo_port'] + # , + # , # server.local_bind_port is assigned local port + # username='admin', + # password='admin' + # *args, + # **kwargs + ) + + self.client = client + db = self.client.provdb + self.taskdb = db.task_collection + self.wfdb = db.workflow_collection + except ConnectionFailure: + print "failed to connect to mongodb" + + def close_sshtunel(self): + return self.server.stop() + + def close_client(self): + return self.client.close() + + def remove_all_item(self): + self.taskdb.drop() + self.wfdb.drop() + return 0 + + def init(self, *args, **kwargs): + if kwargs['remote']: + self.remote=True + self.start_sshtunnel(*args, **kwargs) + self.start_client(*args, **kwargs) + else: + self.remote=False + self.start_client(*args, **kwargs) + + def close(self): + if self.remote: + self.close_client() + self.close_sshtunel() + else: + self.close_client() From ac20b8b7507526490dcba88c13f6c2910be8b7fd Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Tue, 17 Dec 2019 09:04:53 +0100 Subject: [PATCH 33/45] . --- src/openalea/core/algo/dataflow_evaluation.py | 2 +- src/openalea/core/metadata/data_size.py | 30 ++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index b4362f64..2a900d09 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -34,7 +34,7 @@ from openalea.core.data_manager import load_data, check_data_to_load, write_outputs -from distributed.cloud_infos.tmp_path import TMP_PATH, PROVENANCE_PATH +from openalea.distributed.cloud_infos.paths import TMP_PATH, PROVENANCE_PATH # test for distributed executions # from openalea.core.metadata.provenance_data import Prov diff --git a/src/openalea/core/metadata/data_size.py b/src/openalea/core/metadata/data_size.py index 441df453..9c3b5b33 100644 --- a/src/openalea/core/metadata/data_size.py +++ b/src/openalea/core/metadata/data_size.py @@ -2,6 +2,10 @@ from sys import getsizeof, stderr from itertools import chain from collections import deque +import sys +from types import ModuleType, FunctionType +from gc import get_referents + try: from reprlib import repr except ImportError: @@ -45,4 +49,28 @@ def sizeof(o): break return s - return sizeof(o) \ No newline at end of file + return sizeof(o) + + +# Custom objects know their class. +# Function objects seem to know way too much, including modules. +# Exclude modules as well. +BLACKLIST = type, ModuleType, FunctionType + + +def getsize(obj): + """sum size of object & members.""" + if isinstance(obj, BLACKLIST): + raise TypeError('getsize() does not take argument of type: '+ str(type(obj))) + seen_ids = set() + size = 0 + objects = [obj] + while objects: + need_referents = [] + for obj in objects: + if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids: + seen_ids.add(id(obj)) + size += sys.getsizeof(obj) + need_referents.append(obj) + objects = get_referents(*need_referents) + return size \ No newline at end of file From 0eca1cca28157604891c5359e8c7ce0b648d0666 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Wed, 18 Dec 2019 17:27:01 +0100 Subject: [PATCH 34/45] update for fragment eval + cassandra --- src/openalea/core/algo/dataflow_evaluation.py | 47 ++++++++++--------- src/openalea/core/data_manager.py | 21 +++++---- src/openalea/core/metadata/cloud_info.py | 4 +- 3 files changed, 39 insertions(+), 33 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 2a900d09..3dab4629 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -32,18 +32,13 @@ import time import json -from openalea.core.data_manager import load_data, check_data_to_load, write_outputs +from openalea.core.data_manager import load_data, check_data_to_load, write_data from openalea.distributed.cloud_infos.paths import TMP_PATH, PROVENANCE_PATH +import openalea.core.metadata.cloud_info -# test for distributed executions -# from openalea.core.metadata.provenance_data import Prov -# from openalea.core.metadata.cache_index import Cache_index -# from openalea.core.metadata.cloud_sites import Site, MultiSiteCloud, link_two_sites - -# from openalea.core.metadata.costs import minimum_cost_site -# from openalea.core.metadata.scheduling_plan import SchedulingPlan -# from openalea.core.metadata.data_size import total_size +from openalea.distributed.index.cacheIndex import IndexCassandra +from openalea.distributed.index.id import get_id # This variable has to be retrieve by the settings @@ -1460,6 +1455,7 @@ def __init__(self, dataflow, record_provenance=False, fragment_infos=None, *args # a property to specify if the node has already been evaluated self._evaluated = set() self._fragment_infos = fragment_infos + self._index = None def is_stopped(self, vid, actor): """ Return True if evaluation must be stop at this vertex """ @@ -1492,14 +1488,14 @@ def eval_vertex(self, vid, *args, **kwargs): for pid in df.in_ports(vid): inputs = [] - # check if the data has to be loaded - tmp_path = check_data_to_load(vid, pid, self._fragment_infos) - print "le noeud à récupérer : ", tmp_path - if tmp_path: + # check if the data has to be loaded | and get path + ituple = check_data_to_load(vid, pid, self._fragment_infos) + if ituple: cpt = 1 for npid, nvid, nactor in self.get_parent_nodes(pid): - print npid, nvid, nactor - # inputs.append(load_data(tmp_path)) + data_id = get_id(ituple[0], ituple[1]) + row = self._index.find_one(data_id=data_id) + inputs.append(load_data(row[0].path[0])) else: cpt = 0 # For each connected node @@ -1522,8 +1518,11 @@ def eval_vertex(self, vid, *args, **kwargs): def eval(self, *args, **kwargs): """ Evaluate the whole dataflow starting from leaves""" - + print "START fragment evaluation" t0 = time.time() + + self._index = IndexCassandra() + self._index.initialize() df = self._dataflow @@ -1537,10 +1536,9 @@ def eval(self, *args, **kwargs): # Start by stoping all parents node of input fragments nodes if self._fragment_infos: for ivid, ipid in self._fragment_infos['inputs_vid']: - for p in df.in_ports(ivid): - for npid, nvid, nactor in self.get_parent_nodes(p): - self._evaluated.add(nvid) - print "Set : ", nvid, " as EVALUATED" + for npid, nvid, nactor in self.get_parent_nodes(ipid): + self._evaluated.add(nvid) + print "Set : ", nvid, " as EVALUATED" # Eval from the leaf @@ -1550,10 +1548,13 @@ def eval(self, *args, **kwargs): t1 = time.time() # Save the outputs of the fragment into file - for i, vid in enumerate([v[0] for v in self._fragment_infos['outputs_vid']]): - if not os.path.exists(os.path.dirname(TMP_PATH)): + if not os.path.exists(os.path.dirname(TMP_PATH)): os.makedirs(TMP_PATH) - write_outputs(self._dataflow, vid, TMP_PATH) + for i, vid in enumerate([v[0] for v in self._fragment_infos['outputs_vid']]): + for port in range(df.node(vid).get_nb_output()): + data_id = get_id(vid, port) + write_data(data_id=data_id, data=df.node(vid).get_output(port), path=TMP_PATH) + self._index.add_data(data_id=data_id, path=str(os.path.join(TMP_PATH, data_id))) if self._prov is not None: diff --git a/src/openalea/core/data_manager.py b/src/openalea/core/data_manager.py index 203a6dad..d3b3d829 100644 --- a/src/openalea/core/data_manager.py +++ b/src/openalea/core/data_manager.py @@ -1,20 +1,22 @@ import dill import os +from openalea.core.metadata.cloud_info import CACHE_PATH, TMP_PATH +from openalea.distributed.index.id import get_id - -def write_data(vid, path): +def write_data(data_id, data, path): """ vid: vid of the node whose output will be stored path: path where the data will be stored """ - return + with open(os.path.join(path,data_id), "w") as f: + dill.dump(data, f) -def write_outputs(df, vid, cache_path): - for port in range(df.node(vid).get_nb_output()): - data_id = str(vid) + "_" + str(port) - with open(os.path.join(cache_path,data_id), "w") as f: - dill.dump(df.node(vid).get_output(port), f) +# def write_outputs(data_id, cache_path): +# for port in range(df.node(vid).get_nb_output()): +# data_id = get_id(vid, port) +# with open(os.path.join(cache_path,data_id), "w") as f: +# dill.dump(df.node(vid).get_output(port), f) def load_data(path): @@ -41,4 +43,5 @@ def check_data_to_load(vid, pid, fragment_infos): if (vid, pid) in fragment_infos['input_data'].keys(): # the data is computed by other fragments return fragment_infos['input_data'][(vid, pid)] - return None \ No newline at end of file + return None + diff --git a/src/openalea/core/metadata/cloud_info.py b/src/openalea/core/metadata/cloud_info.py index ec1e9d14..bab89609 100644 --- a/src/openalea/core/metadata/cloud_info.py +++ b/src/openalea/core/metadata/cloud_info.py @@ -15,4 +15,6 @@ MONGO_PORT = 27017 # file infos -FILEPATH = path(settings.get_openalea_home_dir()) / 'provenance' \ No newline at end of file +PROVENANCE_PATH = path(settings.get_openalea_home_dir()) / 'provenance' +TMP_PATH = path(settings.get_openalea_home_dir()) / "execution_data" +CACHE_PATH = path(settings.get_openalea_home_dir()) / "cached_data" From 38c336e3b2ac6c779996480d256c3d145aa96430 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Thu, 19 Dec 2019 10:19:51 +0100 Subject: [PATCH 35/45] . --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index f974705e..4cbba462 100644 --- a/.gitignore +++ b/.gitignore @@ -95,4 +95,4 @@ doc/_dvlpt/ # user custom filters # ignore cassandra files .cassandraWorkbench* -.vscode/* \ No newline at end of file +.vscode/ \ No newline at end of file From e88a28f9e8428820dab268c36ecab660f559ddb8 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Mon, 13 Jan 2020 14:18:49 +0100 Subject: [PATCH 36/45] refactor - put source in distributed pkg --- src/openalea/core/algo/dataflow_evaluation.py | 42 ++--- src/openalea/core/data_manager.py | 47 ------ src/openalea/core/metadata/cloud_info.py | 32 ++-- src/openalea/core/metadata/costs.py | 90 +++++----- src/openalea/core/metadata/data_size.py | 76 --------- src/openalea/core/metadata/provenanceDB.py | 159 ------------------ src/openalea/core/metadata/scheduling_plan.py | 13 -- 7 files changed, 84 insertions(+), 375 deletions(-) delete mode 100644 src/openalea/core/data_manager.py delete mode 100644 src/openalea/core/metadata/data_size.py delete mode 100644 src/openalea/core/metadata/provenanceDB.py delete mode 100644 src/openalea/core/metadata/scheduling_plan.py diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 3dab4629..88201c1a 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -32,10 +32,14 @@ import time import json -from openalea.core.data_manager import load_data, check_data_to_load, write_data +from openalea.distributed.data.data_manager import load_data, check_data_to_load, write_data -from openalea.distributed.cloud_infos.paths import TMP_PATH, PROVENANCE_PATH -import openalea.core.metadata.cloud_info +from openalea.distributed.cloud_infos.paths import (TMP_PATH, PROVENANCE_PATH, CACHE_PATH) +from openalea.distributed.cloud_infos.ssh_info import (PROVDB_SSH_ADDR, SSH_PKEY, SSU_USERNAME) +from openalea.distributed.cloud_infos.mongo_info import REMOTE, MONGO_PORT, MONGO_ADDR +# import openalea.core.metadata.cloud_info + +from openalea.distributed.provenance.provenanceDB import ProvMongo from openalea.distributed.index.cacheIndex import IndexCassandra from openalea.distributed.index.id import get_id @@ -259,14 +263,14 @@ def eval(self, *args, **kwargs): if self._provdb is not None: self._provdb.init( - remote=cloud_info.REMOTE, - path=cloud_info.FILEPATH, - ssh_ip_addr=cloud_info.PROVDB_SSH_ADDR, - ssh_pkey=cloud_info.SSH_PKEY, - ssh_username=cloud_info.SSU_USERNAME, - remote_bind_address=(cloud_info.MONGO_ADDR, cloud_info.MONGO_PORT), - mongo_ip_addr=cloud_info.MONGO_ADDR, - mongo_port=cloud_info.MONGO_PORT + remote=REMOTE, + path=CACHE_PATH, + ssh_ip_addr=PROVDB_SSH_ADDR, + ssh_pkey=SSH_PKEY, + ssh_username=SSU_USERNAME, + remote_bind_address=(MONGO_ADDR, MONGO_PORT), + mongo_ip_addr=MONGO_ADDR, + mongo_port=MONGO_PORT ) @@ -1329,14 +1333,14 @@ def eval(self, *args, **kwargs): self._prov.time_init = t0 self._provdb.init( - remote=cloud_info.REMOTE, - path=cloud_info.FILEPATH, - ssh_ip_addr=cloud_info.PROVDB_SSH_ADDR, - ssh_pkey=cloud_info.SSH_PKEY, - ssh_username=cloud_info.SSU_USERNAME, - remote_bind_address=(cloud_info.MONGO_ADDR, cloud_info.MONGO_PORT), - mongo_ip_addr=cloud_info.MONGO_ADDR, - mongo_port=cloud_info.MONGO_PORT + remote=REMOTE, + path=CACHE_PATH, + ssh_ip_addr=PROVDB_SSH_ADDR, + ssh_pkey=SSH_PKEY, + ssh_username=SSU_USERNAME, + remote_bind_address=(MONGO_ADDR, MONGO_PORT), + mongo_ip_addr=MONGO_ADDR, + mongo_port=MONGO_PORT ) # Init the workers # context = zmq.Context() diff --git a/src/openalea/core/data_manager.py b/src/openalea/core/data_manager.py deleted file mode 100644 index d3b3d829..00000000 --- a/src/openalea/core/data_manager.py +++ /dev/null @@ -1,47 +0,0 @@ -import dill -import os -from openalea.core.metadata.cloud_info import CACHE_PATH, TMP_PATH -from openalea.distributed.index.id import get_id - -def write_data(data_id, data, path): - """ - vid: vid of the node whose output will be stored - path: path where the data will be stored - """ - with open(os.path.join(path,data_id), "w") as f: - dill.dump(data, f) - - -# def write_outputs(data_id, cache_path): -# for port in range(df.node(vid).get_nb_output()): -# data_id = get_id(vid, port) -# with open(os.path.join(cache_path,data_id), "w") as f: -# dill.dump(df.node(vid).get_output(port), f) - - -def load_data(path): - """ - vid: vid of the node whose input will be fetched - path: path of the data to get - """ - with open(path, "r") as f: - data = dill.load(f) - return data - - -def check_data_to_load(vid, pid, fragment_infos): - """ - Return the path if the data has to be loaded - Return None otherwise - - """ - if not fragment_infos: - return None - if (vid, pid) in fragment_infos['cached_data'].keys(): - # the data is get from cache - return fragment_infos['cached_data'][(vid, pid)] - if (vid, pid) in fragment_infos['input_data'].keys(): - # the data is computed by other fragments - return fragment_infos['input_data'][(vid, pid)] - return None - diff --git a/src/openalea/core/metadata/cloud_info.py b/src/openalea/core/metadata/cloud_info.py index bab89609..12e8a2ba 100644 --- a/src/openalea/core/metadata/cloud_info.py +++ b/src/openalea/core/metadata/cloud_info.py @@ -1,20 +1,20 @@ -from openalea.core.path import path -from openalea.core import settings +# from openalea.core.path import path +# from openalea.core import settings -# infos about provenance db -REMOTE = False +# # infos about provenance db +# REMOTE = False -# SSH -PROVDB_SSH_ADDR = "134.158.247.32" -SSU_USERNAME="ubuntu" -SSH_PKEY="/home/gaetan/.ssh/id_rsa" -# REMOTE_DB_ADDR=('127.0.0.1', 27017) +# # SSH +# PROVDB_SSH_ADDR = "134.158.247.32" +# SSU_USERNAME="ubuntu" +# SSH_PKEY="/home/gaetan/.ssh/id_rsa" +# # REMOTE_DB_ADDR=('127.0.0.1', 27017) -# Mongo -MONGO_ADDR='127.0.0.1' -MONGO_PORT = 27017 +# # Mongo +# MONGO_ADDR='127.0.0.1' +# MONGO_PORT = 27017 -# file infos -PROVENANCE_PATH = path(settings.get_openalea_home_dir()) / 'provenance' -TMP_PATH = path(settings.get_openalea_home_dir()) / "execution_data" -CACHE_PATH = path(settings.get_openalea_home_dir()) / "cached_data" +# # file infos +# PROVENANCE_PATH = path(settings.get_openalea_home_dir()) / 'provenance' +# TMP_PATH = path(settings.get_openalea_home_dir()) / "execution_data" +# CACHE_PATH = path(settings.get_openalea_home_dir()) / "cached_data" diff --git a/src/openalea/core/metadata/costs.py b/src/openalea/core/metadata/costs.py index ffe691c2..f9b815ab 100644 --- a/src/openalea/core/metadata/costs.py +++ b/src/openalea/core/metadata/costs.py @@ -1,45 +1,45 @@ -# function to get the site and minimal cost for a task, with prov + sites -def minimum_cost_site(vid, provenance, multisites): - vid = str(vid) - # if no provenance data for task vid -> NOT TAKEN INTO ACCOUNT - # TODO: manage if no prov data -> average? best site? random? .... - if vid not in provenance.prov: - print("Not enough provenance information on task: " + str(vid)) - return None, None - - # find the sites where the input data exist: - input_site = [] - for s in multisites.list_sites: - if multisites.list_sites[s].check_input(vid): - input_site.append(s) - # IF No site has the input data -> cannot be computed - if not input_site: - print('Input data not found') - return None, None - - possible_cost = dict() - # for each site get the cost - for s1 in multisites.list_sites: - # if the Input data is not on s1 -> get the minimal cost to transfer the intput data to the site: - min_transfer_cost = [] - for s2 in input_site: - transfer_cost = multisites.list_sites[s1].transfer_cost[s2] - # get the input data size | here from provenance, in real cases the scheduler knows it - input_data_size = provenance.prov[vid].input_size - - get_input_cost_tmp = transfer_cost * input_data_size - min_transfer_cost.append(get_input_cost_tmp) - get_input_cost = min(min_transfer_cost) - - # the cost to compute the task on this site: - compute_cost = multisites.list_sites[s1].compute_cost * provenance.prov[vid].exec_time - - total_cost = get_input_cost + compute_cost - # print s1, "total_cost : " ,total_cost, 'get_input_cost : ', get_input_cost, 'compute_cost', compute_cost - - possible_cost[s1] = total_cost - - best_site = min(possible_cost) - best_cost = possible_cost[best_site] - - return best_site, best_cost \ No newline at end of file +# # function to get the site and minimal cost for a task, with prov + sites +# def minimum_cost_site(vid, provenance, multisites): +# vid = str(vid) +# # if no provenance data for task vid -> NOT TAKEN INTO ACCOUNT +# # TODO: manage if no prov data -> average? best site? random? .... +# if vid not in provenance.prov: +# print("Not enough provenance information on task: " + str(vid)) +# return None, None + +# # find the sites where the input data exist: +# input_site = [] +# for s in multisites.list_sites: +# if multisites.list_sites[s].check_input(vid): +# input_site.append(s) +# # IF No site has the input data -> cannot be computed +# if not input_site: +# print('Input data not found') +# return None, None + +# possible_cost = dict() +# # for each site get the cost +# for s1 in multisites.list_sites: +# # if the Input data is not on s1 -> get the minimal cost to transfer the intput data to the site: +# min_transfer_cost = [] +# for s2 in input_site: +# transfer_cost = multisites.list_sites[s1].transfer_cost[s2] +# # get the input data size | here from provenance, in real cases the scheduler knows it +# input_data_size = provenance.prov[vid].input_size + +# get_input_cost_tmp = transfer_cost * input_data_size +# min_transfer_cost.append(get_input_cost_tmp) +# get_input_cost = min(min_transfer_cost) + +# # the cost to compute the task on this site: +# compute_cost = multisites.list_sites[s1].compute_cost * provenance.prov[vid].exec_time + +# total_cost = get_input_cost + compute_cost +# # print s1, "total_cost : " ,total_cost, 'get_input_cost : ', get_input_cost, 'compute_cost', compute_cost + +# possible_cost[s1] = total_cost + +# best_site = min(possible_cost) +# best_cost = possible_cost[best_site] + +# return best_site, best_cost \ No newline at end of file diff --git a/src/openalea/core/metadata/data_size.py b/src/openalea/core/metadata/data_size.py deleted file mode 100644 index 9c3b5b33..00000000 --- a/src/openalea/core/metadata/data_size.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import print_function -from sys import getsizeof, stderr -from itertools import chain -from collections import deque -import sys -from types import ModuleType, FunctionType -from gc import get_referents - -try: - from reprlib import repr -except ImportError: - pass - -def total_size(o, handlers={}, verbose=False): - """ Returns the approximate memory footprint an object and all of its contents. - - Automatically finds the contents of the following builtin containers and - their subclasses: tuple, list, deque, dict, set and frozenset. - To search other containers, add handlers to iterate over their contents: - - handlers = {SomeContainerClass: iter, - OtherContainerClass: OtherContainerClass.get_elements} - - """ - dict_handler = lambda d: chain.from_iterable(d.items()) - all_handlers = {tuple: iter, - list: iter, - deque: iter, - dict: dict_handler, - set: iter, - frozenset: iter, - } - all_handlers.update(handlers) # user handlers take precedence - seen = set() # track which object id's have already been seen - default_size = getsizeof(0) # estimate sizeof object without __sizeof__ - - def sizeof(o): - if id(o) in seen: # do not double count the same object - return 0 - seen.add(id(o)) - s = getsizeof(o, default_size) - - if verbose: - print(s, type(o), repr(o), file=stderr) - - for typ, handler in all_handlers.items(): - if isinstance(o, typ): - s += sum(map(sizeof, handler(o))) - break - return s - - return sizeof(o) - - -# Custom objects know their class. -# Function objects seem to know way too much, including modules. -# Exclude modules as well. -BLACKLIST = type, ModuleType, FunctionType - - -def getsize(obj): - """sum size of object & members.""" - if isinstance(obj, BLACKLIST): - raise TypeError('getsize() does not take argument of type: '+ str(type(obj))) - seen_ids = set() - size = 0 - objects = [obj] - while objects: - need_referents = [] - for obj in objects: - if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids: - seen_ids.add(id(obj)) - size += sys.getsizeof(obj) - need_referents.append(obj) - objects = get_referents(*need_referents) - return size \ No newline at end of file diff --git a/src/openalea/core/metadata/provenanceDB.py b/src/openalea/core/metadata/provenanceDB.py deleted file mode 100644 index 5cb151bf..00000000 --- a/src/openalea/core/metadata/provenanceDB.py +++ /dev/null @@ -1,159 +0,0 @@ -from sshtunnel import SSHTunnelForwarder -import pymongo -from pymongo.errors import ConnectionFailure -from sshtunnel import BaseSSHTunnelForwarderError -import json - -from openalea.core.path import path - - - -class ProvFile(): - def __init__(self, *args, **kwargs): - self.localpath = "" - - def add_prov_item(self, item, *args, **kwargs): - print "START WRITING" - prov_path = path(self.localpath) / item["workflow"] + '.json' - print self.localpath - with open(prov_path, 'a+') as fp: - json.dump(item, fp, indent=4) - - def init(self, *args, **kwargs): - self.localpath = kwargs["path"] - - def close(self): - pass - - -class ProvMongo(): - def __init__(self, *args, **kwargs): - self.client = None - self.server = None - # self.provdb = None - self.taskdb = None - self.wfdb = None - self.remote = False - - def __set__(self, instance, value): - self.instance = value - - def is_in(self, task_id=None, wf_id=None): - # check if in mongodb - if task_id: - if self.taskdb.find_one({"task_id": task_id}): - return self.taskdb.find_one({"task_id": task_id}) - else: - return False - if wf_id: - if self.wfdb.find_one({"workflow": wf_id}): - return self.wfdb.find_one({"workflow": wf_id}) - else: - return False - else: - return False - - def add_task_item(self, item, *args, **kwargs): - # add element to index db - self.taskdb.insert_one(item) - - def add_list_task_item(self, itemlist, *args, **kwargs): - for item in itemlist: - self.taskdb.insert_one(item) - - def add_wf_item(self, item, *args, **kwargs): - # add element to index db - self.wfdb.insert_one(item) - - def add_list_wf_item(self, itemlist, *args, **kwargs): - for item in itemlist: - self.wfdb.insert_one(item) - - def show(self, task_id=None, wf_id=None): - print("the task provenance has : ", self.taskdb.count(), " entries :") - for doc in self.taskdb.find({}): - print(doc) - - def start_sshtunnel(self, *args, **kwargs): - try: - self.server = SSHTunnelForwarder( - ssh_address_or_host=kwargs['ssh_ip_addr'], - ssh_pkey=kwargs['ssh_pkey'], - ssh_username=kwargs['ssh_username'], - remote_bind_address=kwargs['remote_bind_address'] - # , - # *args, - # **kwargs - ) - - self.server.start() - except BaseSSHTunnelForwarderError: - print "Fail to connect to ssh device" - - def start_client(self, *args, **kwargs): - if self.remote: - if not self.server: - print "SSH Server not started - cannot connect to Mongo" - return - try: - client = pymongo.MongoClient(host=kwargs['mongo_ip_addr'], - port=self.server.local_bind_port - # , - # , # server.local_bind_port is assigned local port - # username='admin', - # password='admin' - # *args, - # **kwargs - ) - - self.client = client - db = self.client.provdb - self.taskdb = db.task_collection - self.wfdb = db.workflow_collection - except ConnectionFailure: - print "failed to connect to mongodb" - else: - try: - client = pymongo.MongoClient(host=kwargs['mongo_ip_addr'], - port=kwargs['mongo_port'] - # , - # , # server.local_bind_port is assigned local port - # username='admin', - # password='admin' - # *args, - # **kwargs - ) - - self.client = client - db = self.client.provdb - self.taskdb = db.task_collection - self.wfdb = db.workflow_collection - except ConnectionFailure: - print "failed to connect to mongodb" - - def close_sshtunel(self): - return self.server.stop() - - def close_client(self): - return self.client.close() - - def remove_all_item(self): - self.taskdb.drop() - self.wfdb.drop() - return 0 - - def init(self, *args, **kwargs): - if kwargs['remote']: - self.remote=True - self.start_sshtunnel(*args, **kwargs) - self.start_client(*args, **kwargs) - else: - self.remote=False - self.start_client(*args, **kwargs) - - def close(self): - if self.remote: - self.close_client() - self.close_sshtunel() - else: - self.close_client() diff --git a/src/openalea/core/metadata/scheduling_plan.py b/src/openalea/core/metadata/scheduling_plan.py deleted file mode 100644 index ef59d19a..00000000 --- a/src/openalea/core/metadata/scheduling_plan.py +++ /dev/null @@ -1,13 +0,0 @@ -class SchedulingPlan(): - def __init__(self): - self.plan = [] - self.cost = 0. - - def add_to_plan(self, task, site): - self.plan.append((task, site)) - - def add_to_cost(self, new_cost): - if isinstance(new_cost, float): - self.cost+=new_cost - else: - pass \ No newline at end of file From 1520b99956d4854e21a491057ab67cca330044a2 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Tue, 14 Jan 2020 16:54:17 +0100 Subject: [PATCH 37/45] add path for writing tmp data --- src/openalea/core/algo/dataflow_evaluation.py | 22 ++++++++++++------- src/openalea/core/compositenode.py | 6 ++--- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 88201c1a..c0e61b6c 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -34,9 +34,9 @@ from openalea.distributed.data.data_manager import load_data, check_data_to_load, write_data -from openalea.distributed.cloud_infos.paths import (TMP_PATH, PROVENANCE_PATH, CACHE_PATH) -from openalea.distributed.cloud_infos.ssh_info import (PROVDB_SSH_ADDR, SSH_PKEY, SSU_USERNAME) -from openalea.distributed.cloud_infos.mongo_info import REMOTE, MONGO_PORT, MONGO_ADDR +from openalea.distributed.cloud_infos.cloud_infos import (TMP_PATH, PROVENANCE_PATH, CACHE_PATH, + PROVDB_SSH_ADDR, SSH_PKEY, SSU_USERNAME, + REMOTE, MONGO_PORT, MONGO_ADDR) # import openalea.core.metadata.cloud_info from openalea.distributed.provenance.provenanceDB import ProvMongo @@ -1454,12 +1454,18 @@ class FragmentEvaluation(AbstractEvaluation): # TODO: It doesn't work with provenance def __init__(self, dataflow, record_provenance=False, fragment_infos=None, *args, **kwargs): - + AbstractEvaluation.__init__(self, dataflow, record_provenance) # a property to specify if the node has already been evaluated self._evaluated = set() self._fragment_infos = fragment_infos self._index = None + if "tmp_path" in kwargs: + print("use ", kwargs.get("tmp_path"), " as temporary file path") + self._tmp_path = kwargs.get("tmp_path") + else: + print("use ", TMP_PATH, " as temporary file path") + self._tmp_path = TMP_PATH def is_stopped(self, vid, actor): """ Return True if evaluation must be stop at this vertex """ @@ -1552,13 +1558,13 @@ def eval(self, *args, **kwargs): t1 = time.time() # Save the outputs of the fragment into file - if not os.path.exists(os.path.dirname(TMP_PATH)): - os.makedirs(TMP_PATH) + if not os.path.exists(os.path.dirname(self._tmp_path)): + os.makedirs(self._tmp_path) for i, vid in enumerate([v[0] for v in self._fragment_infos['outputs_vid']]): for port in range(df.node(vid).get_nb_output()): data_id = get_id(vid, port) - write_data(data_id=data_id, data=df.node(vid).get_output(port), path=TMP_PATH) - self._index.add_data(data_id=data_id, path=str(os.path.join(TMP_PATH, data_id))) + write_data(data_id=data_id, data=df.node(vid).get_output(port), path=self._tmp_path) + self._index.add_data(data_id=data_id, path=str(os.path.join(self._tmp_path, data_id))) if self._prov is not None: diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index 460a5cc8..dc18eb93 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -499,7 +499,7 @@ def set_output(self, index_key, val): return self.node(self.id_out).set_output(index_key, val) - def get_eval_algo(self, record_provenance=False, fragment_infos=None): + def get_eval_algo(self, record_provenance=False, fragment_infos=None, *args, **kwargs): """ Return the evaluation algo instance """ try: algo_str = self.eval_algo @@ -511,7 +511,7 @@ def get_eval_algo(self, record_provenance=False, fragment_infos=None): baseimp = "algo.dataflow_evaluation" module = __import__(baseimp, globals(), locals(), [algo_str]) classobj = module.__dict__[algo_str] - return classobj(self, record_provenance=record_provenance, fragment_infos=fragment_infos) + return classobj(self, record_provenance=record_provenance, fragment_infos=fragment_infos, *args, **kwargs) except Exception, e: from openalea.core.algo.dataflow_evaluation import \ @@ -534,7 +534,7 @@ def eval_as_expression(self, vtx_id=None, step=False, return if vtx_id is not None: self.node(vtx_id).modified = True - algo = self.get_eval_algo(record_provenance, fragment_infos) + algo = self.get_eval_algo(record_provenance, fragment_infos, *args, **kwargs) try: self.evaluating = True From e2eee0895ee68f1d6ff975f37646956026ca1a8f Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Wed, 15 Jan 2020 18:41:26 +0100 Subject: [PATCH 38/45] fake eval that print ports id --- src/openalea/core/algo/dataflow_evaluation.py | 90 ++++++++++++++++++- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index c0e61b6c..dd9b575d 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -1552,8 +1552,12 @@ def eval(self, *args, **kwargs): # Eval from the leaf - for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): - self.eval_vertex(vid) + # for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): + # self.eval_vertex(vid) + + # Eval from the outputs node of the fragment: + for ovid in self._fragment_infos['outputs_vid']: + self.eval_vertex(ovid[0]) t1 = time.time() @@ -1578,5 +1582,87 @@ def eval(self, *args, **kwargs): with open(provenance_path, "a+") as f: json.dump(provenance, f, indent=4) + if quantify: + print "Evaluation time: %s" % (t1 - t0) + + +class FakeEvaluation(AbstractEvaluation): + """ Evaluation to get id of egdes """ + __evaluators__.append("FakeEvaluation") + + # TODO: It doesn't work with provenance + def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + + AbstractEvaluation.__init__(self, dataflow, record_provenance) + # a property to specify if the node has already been evaluated + self._evaluated = set() + + def is_stopped(self, vid, actor): + """ Return True if evaluation must be stop at this vertex """ + + if vid in self._evaluated: + return True + + try: + if actor.block: + status = True + n = actor.get_nb_output() + outputs = [i for i in range(n) if + actor.get_output(i) is not None] + if not outputs: + status = False + return status + except: + pass + return False + + def eval_vertex(self, vid, *args, **kwargs): + """ Evaluate the vertex vid """ + + df = self._dataflow + actor = df.actor(vid) + print("eval node: ", vid) + self._evaluated.add(vid) + + # For each inputs + for pid in df.in_ports(vid): + inputs = [] + + cpt = 0 + # For each connected node + for npid, nvid, nactor in self.get_parent_nodes(pid): + print("node: ", vid, " - input port : ", pid, ' - to output port: ', + npid, " - from node: ", nvid) + if not self.is_stopped(nvid, nactor): + self.eval_vertex(nvid) + + inputs.append(nactor.get_output(df.local_id(npid))) + cpt += 1 + + # set input as a list or a simple value + if (cpt == 1): + inputs = inputs[0] + if (cpt > 0): + actor.set_input(df.local_id(pid), inputs) + + # Eval the node + # self.eval_vertex_code(vid) + + def eval(self, *args, **kwargs): + """ Evaluate the whole dataflow starting from leaves""" + + t0 = time.time() + df = self._dataflow + + + # Unvalidate all the nodes + self._evaluated.clear() + + # Eval from the leaf + for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): + self.eval_vertex(vid) + + t1 = time.time() + if quantify: print "Evaluation time: %s" % (t1 - t0) \ No newline at end of file From 7f23bee7eb9aa15d83743084ef2e293e7e2df459 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Fri, 17 Jan 2020 18:32:17 +0100 Subject: [PATCH 39/45] fix names --- src/openalea/core/algo/dataflow_evaluation.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index dd9b575d..4cb7dbef 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -34,9 +34,7 @@ from openalea.distributed.data.data_manager import load_data, check_data_to_load, write_data -from openalea.distributed.cloud_infos.cloud_infos import (TMP_PATH, PROVENANCE_PATH, CACHE_PATH, - PROVDB_SSH_ADDR, SSH_PKEY, SSU_USERNAME, - REMOTE, MONGO_PORT, MONGO_ADDR) +from openalea.distributed.cloud_infos.cloud_infos import * # import openalea.core.metadata.cloud_info from openalea.distributed.provenance.provenanceDB import ProvMongo @@ -267,7 +265,7 @@ def eval(self, *args, **kwargs): path=CACHE_PATH, ssh_ip_addr=PROVDB_SSH_ADDR, ssh_pkey=SSH_PKEY, - ssh_username=SSU_USERNAME, + ssh_username=SSH_USERNAME, remote_bind_address=(MONGO_ADDR, MONGO_PORT), mongo_ip_addr=MONGO_ADDR, mongo_port=MONGO_PORT @@ -1337,7 +1335,7 @@ def eval(self, *args, **kwargs): path=CACHE_PATH, ssh_ip_addr=PROVDB_SSH_ADDR, ssh_pkey=SSH_PKEY, - ssh_username=SSU_USERNAME, + ssh_username=SSH_USERNAME, remote_bind_address=(MONGO_ADDR, MONGO_PORT), mongo_ip_addr=MONGO_ADDR, mongo_port=MONGO_PORT @@ -1532,8 +1530,14 @@ def eval(self, *args, **kwargs): t0 = time.time() self._index = IndexCassandra() - self._index.initialize() - + self._index.initialize( + remote=REMOTE, + ssh_pkey=SSH_PKEY, + ssh_ip_addr=CASSANDRA_SSH_IP, + ssh_username=SSH_USERNAME, + remote_bind_address=("localhost", CASSANDRA_PORT), + ) + print "INDEX loaded" df = self._dataflow if self._prov is not None: From 182487bf23c042a2a491d6f527f9d37df3acfca5 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Mon, 20 Jan 2020 16:52:13 +0100 Subject: [PATCH 40/45] Provenance works with Files, Mongo --- src/openalea/core/algo/dataflow_evaluation.py | 62 ++++++++++--------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index 4cb7dbef..b1e7663e 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -34,10 +34,10 @@ from openalea.distributed.data.data_manager import load_data, check_data_to_load, write_data -from openalea.distributed.cloud_infos.cloud_infos import * +# from openalea.distributed.cloud_infos.cloud_infos import * # import openalea.core.metadata.cloud_info -from openalea.distributed.provenance.provenanceDB import ProvMongo +from openalea.distributed.provenance.provenanceDB import start_provdb from openalea.distributed.index.cacheIndex import IndexCassandra from openalea.distributed.index.id import get_id @@ -101,7 +101,7 @@ def cmp_posx(x, y): class AbstractEvaluation(object): - def __init__(self, dataflow, record_provenance=False): + def __init__(self, dataflow, record_provenance=False, *args, **kwargs): """ :param dataflow: to be done """ @@ -109,7 +109,9 @@ def __init__(self, dataflow, record_provenance=False): if record_provenance: self._prov = RVProvenance() - self._provdb = ProvMongo() + self._provdb = start_provdb(provenance_config=kwargs.get('provenance_config', None), + provenance_type=kwargs.get('provenance_type', "Files")) + else: self._prov = None self._provdb = None @@ -138,8 +140,8 @@ def eval_vertex_code(self, vid, *args, **kwargs): t0 = clock() ret = node.eval() - t1 = clock() + dt = clock() - t0 if self._prov is not None: taskitem = self._prov.after_eval(self._dataflow, vid, dt) if self._provdb and taskitem: @@ -259,17 +261,18 @@ def eval(self, *args, **kwargs): self._prov.init(df) self._prov.time_init = t0 - if self._provdb is not None: - self._provdb.init( - remote=REMOTE, - path=CACHE_PATH, - ssh_ip_addr=PROVDB_SSH_ADDR, - ssh_pkey=SSH_PKEY, - ssh_username=SSH_USERNAME, - remote_bind_address=(MONGO_ADDR, MONGO_PORT), - mongo_ip_addr=MONGO_ADDR, - mongo_port=MONGO_PORT - ) + + # if self._provdb is not None: + # self._provdb.init( + # remote=REMOTE_PROV, + # path=CACHE_PATH, + # ssh_ip_addr=PROVDB_SSH_ADDR, + # ssh_pkey=SSH_PKEY, + # ssh_username=SSH_USERNAME, + # remote_bind_address=(MONGO_ADDR, MONGO_PORT), + # mongo_ip_addr=MONGO_ADDR, + # mongo_port=MONGO_PORT + # ) # Unvalidate all the nodes @@ -1244,8 +1247,9 @@ def eval(self, *args, **kwargs): self._prov.time_end = t1 # Save the provenance in a file wf_id = str(df.factory.uid) + ".json" - home = os.path.expanduser("~") - provenance_path = os.path.join(home, ".openalea/provenance", wf_id) + # Save the provenance in a file + wf_id = str(df.factory.uid) + ".json" + provenance_path = os.path.join(PROVENANCE_PATH, wf_id) if not os.path.exists(os.path.dirname(provenance_path)): os.makedirs(provenance_path) provenance = self._prov.as_wlformat() @@ -1330,16 +1334,16 @@ def eval(self, *args, **kwargs): self._prov.init(df) self._prov.time_init = t0 - self._provdb.init( - remote=REMOTE, - path=CACHE_PATH, - ssh_ip_addr=PROVDB_SSH_ADDR, - ssh_pkey=SSH_PKEY, - ssh_username=SSH_USERNAME, - remote_bind_address=(MONGO_ADDR, MONGO_PORT), - mongo_ip_addr=MONGO_ADDR, - mongo_port=MONGO_PORT - ) + # self._provdb.init( + # remote=REMOTE_PROV, + # path=CACHE_PATH, + # ssh_ip_addr=PROVDB_SSH_ADDR, + # ssh_pkey=SSH_PKEY, + # ssh_username=SSH_USERNAME, + # remote_bind_address=(MONGO_ADDR, MONGO_PORT), + # mongo_ip_addr=MONGO_ADDR, + # mongo_port=MONGO_PORT + # ) # Init the workers # context = zmq.Context() # socket = context.socket(zmq.REQ) @@ -1531,7 +1535,7 @@ def eval(self, *args, **kwargs): self._index = IndexCassandra() self._index.initialize( - remote=REMOTE, + remote=REMOTE_INDEX, ssh_pkey=SSH_PKEY, ssh_ip_addr=CASSANDRA_SSH_IP, ssh_username=SSH_USERNAME, From 5a57be22cbc1403bed77ba04b578f5c77376ed73 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Mon, 20 Jan 2020 19:35:43 +0100 Subject: [PATCH 41/45] add cassandra provenance --- src/openalea/core/algo/dataflow_evaluation.py | 30 ++----------------- src/openalea/core/compositenode.py | 2 +- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index b1e7663e..c1060ce9 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -111,13 +111,12 @@ def __init__(self, dataflow, record_provenance=False, *args, **kwargs): self._prov = RVProvenance() self._provdb = start_provdb(provenance_config=kwargs.get('provenance_config', None), provenance_type=kwargs.get('provenance_type', "Files")) - else: self._prov = None self._provdb = None - def eval(self, *args): + def eval(self, *args, **kwargs): """todo""" raise NotImplementedError() @@ -146,7 +145,6 @@ def eval_vertex_code(self, vid, *args, **kwargs): taskitem = self._prov.after_eval(self._dataflow, vid, dt) if self._provdb and taskitem: self._provdb.add_task_item(taskitem) - # print self._prov.as_wlformat() # When an exception is raised, a flag is set. @@ -198,7 +196,7 @@ class BrutEvaluation(AbstractEvaluation): def __init__(self, dataflow, record_provenance=False, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, record_provenance) + AbstractEvaluation.__init__(self, dataflow, record_provenance, *args, **kwargs) # a property to specify if the node has already been evaluated self._evaluated = set() @@ -260,21 +258,7 @@ def eval(self, *args, **kwargs): if self._prov is not None: self._prov.init(df) self._prov.time_init = t0 - - # if self._provdb is not None: - # self._provdb.init( - # remote=REMOTE_PROV, - # path=CACHE_PATH, - # ssh_ip_addr=PROVDB_SSH_ADDR, - # ssh_pkey=SSH_PKEY, - # ssh_username=SSH_USERNAME, - # remote_bind_address=(MONGO_ADDR, MONGO_PORT), - # mongo_ip_addr=MONGO_ADDR, - # mongo_port=MONGO_PORT - # ) - - # Unvalidate all the nodes self._evaluated.clear() @@ -294,16 +278,6 @@ def eval(self, *args, **kwargs): # close remote connections self._provdb.close() - # Save the provenance in a file - # wf_id = str(df.factory.uid) + ".json" - # home = os.path.expanduser("~") - # provenance_path = os.path.join(home, ".openalea/provenance", wf_id) - # if not os.path.exists(os.path.dirname(provenance_path)): - # os.makedirs(provenance_path) - # provenance = self._prov.as_wlformat() - # with open(provenance_path, "a+") as f: - # json.dump(provenance, f, indent=4) - if quantify: print "Evaluation time: %s" % (t1 - t0) diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index dc18eb93..968ed7c5 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -538,7 +538,7 @@ def eval_as_expression(self, vtx_id=None, step=False, try: self.evaluating = True - algo.eval(vtx_id, step=step) + algo.eval(vtx_id, step=step, *args, **kwargs) finally: self.evaluating = False t1 = time.time() From 6b784b3d9ab9511d643ecfa78408ab344b9d4744 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Tue, 21 Jan 2020 18:10:50 +0100 Subject: [PATCH 42/45] data index working --- src/openalea/core/algo/dataflow_evaluation.py | 116 ++++++++++-------- 1 file changed, 67 insertions(+), 49 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index c1060ce9..d14177f3 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -28,18 +28,17 @@ from openalea.core.dataflow import SubDataflow from openalea.core.interface import IFunction +from openalea.core.path import path +from openalea.core import settings + import os import time import json from openalea.distributed.data.data_manager import load_data, check_data_to_load, write_data -# from openalea.distributed.cloud_infos.cloud_infos import * -# import openalea.core.metadata.cloud_info - from openalea.distributed.provenance.provenanceDB import start_provdb - -from openalea.distributed.index.cacheIndex import IndexCassandra +from openalea.distributed.index.indexDB import start_index from openalea.distributed.index.id import get_id @@ -115,6 +114,12 @@ def __init__(self, dataflow, record_provenance=False, *args, **kwargs): self._prov = None self._provdb = None + if kwargs.get('use_index'): + self._index = start_index(index_config=kwargs.get('index_config', None), + index_type=kwargs.get('index_type', "Cassandra")) + else: + self._index = None + def eval(self, *args, **kwargs): """todo""" @@ -145,7 +150,8 @@ def eval_vertex_code(self, vid, *args, **kwargs): taskitem = self._prov.after_eval(self._dataflow, vid, dt) if self._provdb and taskitem: self._provdb.add_task_item(taskitem) - + # if self._index is not None: + # self._index.add # When an exception is raised, a flag is set. # So we remove it when evaluation is ok. @@ -258,7 +264,6 @@ def eval(self, *args, **kwargs): if self._prov is not None: self._prov.init(df) self._prov.time_init = t0 - # if self._provdb is not None: # Unvalidate all the nodes self._evaluated.clear() @@ -273,7 +278,6 @@ def eval(self, *args, **kwargs): wfitem = self._prov.as_wlformat() if self._provdb is not None: self._provdb.add_wf_item(wfitem) - # self._provdb.add_list_task_item(taskitemslist) # close remote connections self._provdb.close() @@ -550,12 +554,15 @@ def eval(self, vtx_id=None, context=None, is_subdataflow=False, step=False): is_subdataflow=is_subdataflow) self.lambda_value.clear() # do not keep context in memory - # if PROVENANCE: - # self.provenance.end_time() - t1 = clock() if self._prov is not None: self._prov.time_end = t1 + wfitem = self._prov.as_wlformat() + if self._provdb is not None: + self._provdb.add_wf_item(wfitem) + + # close remote connections + self._provdb.close() if quantify: print "Evaluation time: %s" % (t1 - t0) @@ -1144,7 +1151,7 @@ class TestEvaluation(AbstractEvaluation): def __init__(self, dataflow, record_provenance=False, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, record_provenance) + AbstractEvaluation.__init__(self, dataflow, record_provenance, *args, **kwargs) # a property to specify if the node has already been evaluated self._evaluated = set() @@ -1219,16 +1226,24 @@ def eval(self, *args, **kwargs): if self._prov is not None: self._prov.time_end = t1 - # Save the provenance in a file - wf_id = str(df.factory.uid) + ".json" - # Save the provenance in a file - wf_id = str(df.factory.uid) + ".json" - provenance_path = os.path.join(PROVENANCE_PATH, wf_id) - if not os.path.exists(os.path.dirname(provenance_path)): - os.makedirs(provenance_path) - provenance = self._prov.as_wlformat() - with open(provenance_path, "a+") as f: - json.dump(provenance, f, indent=4) + wfitem = self._prov.as_wlformat() + if self._provdb is not None: + self._provdb.add_wf_item(wfitem) + + # close remote connections + self._provdb.close() + + + # # Save the provenance in a file + # wf_id = str(df.factory.uid) + ".json" + # # Save the provenance in a file + # wf_id = str(df.factory.uid) + ".json" + # provenance_path = os.path.join(PROVENANCE_PATH, wf_id) + # if not os.path.exists(os.path.dirname(provenance_path)): + # os.makedirs(provenance_path) + # provenance = self._prov.as_wlformat() + # with open(provenance_path, "a+") as f: + # json.dump(provenance, f, indent=4) if quantify: print "Evaluation time: %s" % (t1 - t0) @@ -1346,6 +1361,7 @@ def eval(self, *args, **kwargs): if self._prov is not None: self._prov.time_end = t1 wfitem = self._prov.as_wlformat() + if self._provdb is not None: self._provdb.add_wf_item(wfitem) # close remote connections @@ -1428,20 +1444,23 @@ class FragmentEvaluation(AbstractEvaluation): """ Evaluation with By fragments """ __evaluators__.append("FragmentEvaluation") - # TODO: It doesn't work with provenance def __init__(self, dataflow, record_provenance=False, fragment_infos=None, *args, **kwargs): AbstractEvaluation.__init__(self, dataflow, record_provenance) # a property to specify if the node has already been evaluated self._evaluated = set() self._fragment_infos = fragment_infos - self._index = None - if "tmp_path" in kwargs: - print("use ", kwargs.get("tmp_path"), " as temporary file path") - self._tmp_path = kwargs.get("tmp_path") - else: - print("use ", TMP_PATH, " as temporary file path") - self._tmp_path = TMP_PATH + + # Define the path where execution data is store during execution - delete after + tpath = path(settings.get_openalea_home_dir()) / "execution_data" + print("use ", kwargs.get("tmp_path", tpath), " as temporary file path") + self._tmp_path = kwargs.get("tmp_path", tpath) + + # If the data index is not use - force its init + if self._index is None: + self._index = start_index(index_config=kwargs.get('index_config', None), + index_type=kwargs.get('index_type', "Cassandra")) + def is_stopped(self, vid, actor): """ Return True if evaluation must be stop at this vertex """ @@ -1507,15 +1526,6 @@ def eval(self, *args, **kwargs): print "START fragment evaluation" t0 = time.time() - self._index = IndexCassandra() - self._index.initialize( - remote=REMOTE_INDEX, - ssh_pkey=SSH_PKEY, - ssh_ip_addr=CASSANDRA_SSH_IP, - ssh_username=SSH_USERNAME, - remote_bind_address=("localhost", CASSANDRA_PORT), - ) - print "INDEX loaded" df = self._dataflow if self._prov is not None: @@ -1550,19 +1560,17 @@ def eval(self, *args, **kwargs): for port in range(df.node(vid).get_nb_output()): data_id = get_id(vid, port) write_data(data_id=data_id, data=df.node(vid).get_output(port), path=self._tmp_path) - self._index.add_data(data_id=data_id, path=str(os.path.join(self._tmp_path, data_id))) + self._index.add_data(data_id=data_id, path=str(os.path.join(self._tmp_path, data_id)), exec_data=True, cache_data=False) if self._prov is not None: self._prov.time_end = t1 - # Save the provenance in a file - wf_id = str(df.factory.uid) + ".json" - provenance_path = os.path.join(PROVENANCE_PATH, wf_id) - if not os.path.exists(os.path.dirname(provenance_path)): - os.makedirs(provenance_path) - provenance = self._prov.as_wlformat() - with open(provenance_path, "a+") as f: - json.dump(provenance, f, indent=4) + wfitem = self._prov.as_wlformat() + if self._provdb is not None: + self._provdb.add_wf_item(wfitem) + + # close remote connections + self._provdb.close() if quantify: print "Evaluation time: %s" % (t1 - t0) @@ -1635,7 +1643,9 @@ def eval(self, *args, **kwargs): t0 = time.time() df = self._dataflow - + if self._prov is not None: + self._prov.init(df) + self._prov.time_init = t0 # Unvalidate all the nodes self._evaluated.clear() @@ -1646,5 +1656,13 @@ def eval(self, *args, **kwargs): t1 = time.time() + if self._prov is not None: + self._prov.time_end = t1 + wfitem = self._prov.as_wlformat() + if self._provdb is not None: + self._provdb.add_wf_item(wfitem) + # close remote connections + self._provdb.close() + if quantify: print "Evaluation time: %s" % (t1 - t0) \ No newline at end of file From 15307149e8c6d3168e66e1d27a6ab85bbf293b86 Mon Sep 17 00:00:00 2001 From: pomme-abricot Date: Wed, 22 Jan 2020 22:59:51 +0100 Subject: [PATCH 43/45] remove provenance_parameter in eval --- src/openalea/core/algo/dataflow_evaluation.py | 34 +++++++++---------- src/openalea/core/compositenode.py | 9 +++-- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index d14177f3..b90bea03 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -100,13 +100,13 @@ def cmp_posx(x, y): class AbstractEvaluation(object): - def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + def __init__(self, dataflow, *args, **kwargs): """ :param dataflow: to be done """ self._dataflow = dataflow - if record_provenance: + if kwargs.get("record_provenance"): self._prov = RVProvenance() self._provdb = start_provdb(provenance_config=kwargs.get('provenance_config', None), provenance_type=kwargs.get('provenance_type', "Files")) @@ -200,9 +200,9 @@ class BrutEvaluation(AbstractEvaluation): """ Basic evaluation algorithm """ __evaluators__.append("BrutEvaluation") - def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + def __init__(self, dataflow, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, record_provenance, *args, **kwargs) + AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) # a property to specify if the node has already been evaluated self._evaluated = set() @@ -325,9 +325,9 @@ class GeneratorEvaluation(AbstractEvaluation): """ Evaluation algorithm with generator / priority and selection""" __evaluators__.append("GeneratorEvaluation") - def __init__(self, dataflow, record_provenance=False): + def __init__(self, dataflow, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, record_provenance) + AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) # a property to specify if the node has already been evaluated self._evaluated = set() self.reeval = False # Flag to force reevaluation (for generator) @@ -414,8 +414,8 @@ class LambdaEvaluation(PriorityEvaluation): """ Evaluation algorithm with support of lambda / priority and selection""" __evaluators__.append("LambdaEvaluation") - def __init__(self, dataflow, record_provenance=False): - PriorityEvaluation.__init__(self, dataflow, record_provenance) + def __init__(self, dataflow, *args, **kwargs): + PriorityEvaluation.__init__(self, dataflow, *args, **kwargs) self.lambda_value = {} # lambda resolution dictionary self._resolution_node = set() @@ -1149,9 +1149,9 @@ class TestEvaluation(AbstractEvaluation): """ Basic evaluation algorithm with provenance capture in file """ __evaluators__.append("TestEvaluation") - def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + def __init__(self, dataflow, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, record_provenance, *args, **kwargs) + AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) # a property to specify if the node has already been evaluated self._evaluated = set() @@ -1253,9 +1253,9 @@ class ZMQEvaluation(AbstractEvaluation): """ Basic evaluation algorithm """ __evaluators__.append("ZMQEvaluation") - def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + def __init__(self, dataflow, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, record_provenance) + AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) # a property to specify if the node has already been evaluated self._evaluated = set() self.socket=None @@ -1444,12 +1444,12 @@ class FragmentEvaluation(AbstractEvaluation): """ Evaluation with By fragments """ __evaluators__.append("FragmentEvaluation") - def __init__(self, dataflow, record_provenance=False, fragment_infos=None, *args, **kwargs): + def __init__(self, dataflow, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, record_provenance) + AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) # a property to specify if the node has already been evaluated self._evaluated = set() - self._fragment_infos = fragment_infos + self._fragment_infos = kwargs.get("fragment_infos", None) # Define the path where execution data is store during execution - delete after tpath = path(settings.get_openalea_home_dir()) / "execution_data" @@ -1581,9 +1581,9 @@ class FakeEvaluation(AbstractEvaluation): __evaluators__.append("FakeEvaluation") # TODO: It doesn't work with provenance - def __init__(self, dataflow, record_provenance=False, *args, **kwargs): + def __init__(self, dataflow, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, record_provenance) + AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) # a property to specify if the node has already been evaluated self._evaluated = set() diff --git a/src/openalea/core/compositenode.py b/src/openalea/core/compositenode.py index 968ed7c5..e8d63ad1 100644 --- a/src/openalea/core/compositenode.py +++ b/src/openalea/core/compositenode.py @@ -499,7 +499,7 @@ def set_output(self, index_key, val): return self.node(self.id_out).set_output(index_key, val) - def get_eval_algo(self, record_provenance=False, fragment_infos=None, *args, **kwargs): + def get_eval_algo(self, *args, **kwargs): """ Return the evaluation algo instance """ try: algo_str = self.eval_algo @@ -511,7 +511,7 @@ def get_eval_algo(self, record_provenance=False, fragment_infos=None, *args, **k baseimp = "algo.dataflow_evaluation" module = __import__(baseimp, globals(), locals(), [algo_str]) classobj = module.__dict__[algo_str] - return classobj(self, record_provenance=record_provenance, fragment_infos=fragment_infos, *args, **kwargs) + return classobj(self, *args, **kwargs) except Exception, e: from openalea.core.algo.dataflow_evaluation import \ @@ -521,7 +521,6 @@ def get_eval_algo(self, record_provenance=False, fragment_infos=None, *args, **k return self.eval_algo def eval_as_expression(self, vtx_id=None, step=False, - record_provenance=False, fragment_infos=None, *args, **kwargs): """ Evaluate a vtx_id @@ -534,7 +533,7 @@ def eval_as_expression(self, vtx_id=None, step=False, return if vtx_id is not None: self.node(vtx_id).modified = True - algo = self.get_eval_algo(record_provenance, fragment_infos, *args, **kwargs) + algo = self.get_eval_algo(*args, **kwargs) try: self.evaluating = True @@ -546,7 +545,7 @@ def eval_as_expression(self, vtx_id=None, step=False, logger.info('Evaluation time: %s' % (t1 - t0)) print 'Evaluation time: %s' % (t1 - t0) - if record_provenance: + if kwargs.get("record_provenance"): return algo._prov # Functions used by the node evaluator From 150e296373926c6a66c115cdb44017d543a44db1 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Wed, 29 Jan 2020 18:32:56 +0100 Subject: [PATCH 44/45] FakeEval is generating tasks UIDs --- src/openalea/core/algo/dataflow_evaluation.py | 120 +++++++++++------- 1 file changed, 75 insertions(+), 45 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index b90bea03..a924b9f4 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # -*- python -*- # # OpenAlea.Core @@ -39,6 +40,8 @@ from openalea.distributed.provenance.provenanceDB import start_provdb from openalea.distributed.index.indexDB import start_index +from openalea.distributed.index.graph_id import Task_UID_graph +# TODO: remove this id method - used in fragment evaluation - to get a general one from openalea.distributed.index.id import get_id @@ -115,10 +118,21 @@ def __init__(self, dataflow, *args, **kwargs): self._provdb = None if kwargs.get('use_index'): - self._index = start_index(index_config=kwargs.get('index_config', None), - index_type=kwargs.get('index_type', "Cassandra")) + #  Connect to the index db + # self._indexdb = start_index(index_config=kwargs.get('index_config', None), + # index_type=kwargs.get('index_type', "Cassandra")) + # Eval the workflow with a fake evaluation to get the tasks ids of each task + real_eval_algo = dataflow.eval_algo + dataflow.eval_algo= "FakeEvaluation" + dataflow.eval() + tid = dataflow.node(1).get_output("task_ids") + print tid + dataflow.eval_algo= real_eval_algo + # self._index = + # self._index else: self._index = None + self._indexdb = None def eval(self, *args, **kwargs): @@ -1233,18 +1247,6 @@ def eval(self, *args, **kwargs): # close remote connections self._provdb.close() - - # # Save the provenance in a file - # wf_id = str(df.factory.uid) + ".json" - # # Save the provenance in a file - # wf_id = str(df.factory.uid) + ".json" - # provenance_path = os.path.join(PROVENANCE_PATH, wf_id) - # if not os.path.exists(os.path.dirname(provenance_path)): - # os.makedirs(provenance_path) - # provenance = self._prov.as_wlformat() - # with open(provenance_path, "a+") as f: - # json.dump(provenance, f, indent=4) - if quantify: print "Evaluation time: %s" % (t1 - t0) @@ -1323,16 +1325,6 @@ def eval(self, *args, **kwargs): self._prov.init(df) self._prov.time_init = t0 - # self._provdb.init( - # remote=REMOTE_PROV, - # path=CACHE_PATH, - # ssh_ip_addr=PROVDB_SSH_ADDR, - # ssh_pkey=SSH_PKEY, - # ssh_username=SSH_USERNAME, - # remote_bind_address=(MONGO_ADDR, MONGO_PORT), - # mongo_ip_addr=MONGO_ADDR, - # mongo_port=MONGO_PORT - # ) # Init the workers # context = zmq.Context() # socket = context.socket(zmq.REQ) @@ -1457,8 +1449,8 @@ def __init__(self, dataflow, *args, **kwargs): self._tmp_path = kwargs.get("tmp_path", tpath) # If the data index is not use - force its init - if self._index is None: - self._index = start_index(index_config=kwargs.get('index_config', None), + if self._indexdb is None: + self._indexdb = start_index(index_config=kwargs.get('index_config', None), index_type=kwargs.get('index_type', "Cassandra")) @@ -1499,7 +1491,7 @@ def eval_vertex(self, vid, *args, **kwargs): cpt = 1 for npid, nvid, nactor in self.get_parent_nodes(pid): data_id = get_id(ituple[0], ituple[1]) - row = self._index.find_one(data_id=data_id) + row = self._indexdb.find_one(data_id=data_id) inputs.append(load_data(row[0].path[0])) else: cpt = 0 @@ -1560,7 +1552,7 @@ def eval(self, *args, **kwargs): for port in range(df.node(vid).get_nb_output()): data_id = get_id(vid, port) write_data(data_id=data_id, data=df.node(vid).get_output(port), path=self._tmp_path) - self._index.add_data(data_id=data_id, path=str(os.path.join(self._tmp_path, data_id)), exec_data=True, cache_data=False) + self._indexdb.add_data(data_id=data_id, path=str(os.path.join(self._tmp_path, data_id)), exec_data=True, cache_data=False) if self._prov is not None: @@ -1583,10 +1575,12 @@ class FakeEvaluation(AbstractEvaluation): # TODO: It doesn't work with provenance def __init__(self, dataflow, *args, **kwargs): - AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) + AbstractEvaluation.__init__(self, dataflow) # a property to specify if the node has already been evaluated self._evaluated = set() + self._index = Task_UID_graph(dataflow) + def is_stopped(self, vid, actor): """ Return True if evaluation must be stop at this vertex """ @@ -1606,12 +1600,55 @@ def is_stopped(self, vid, actor): pass return False + def eval_vertex_code(self, vid, *args, **kwargs): + """ + Evaluate the vertex vid. + Can raise an exception if evaluation failed. + """ + + node = self._dataflow.actor(vid) + + try: + + if self._index is not None: + self._index.before_eval(self._dataflow, vid) + + t0 = clock() + ret = 0 + + dt = clock() - t0 + if self._index is not None: + self._index.after_eval(self._dataflow, vid) + + # When an exception is raised, a flag is set. + # So we remove it when evaluation is ok. + node.raise_exception = False + # if hasattr(node, 'raise_exception'): + # del node.raise_exception + node.notify_listeners(('data_modified', None, None)) + return ret + + except EvaluationException, e: + e.vid = vid + e.node = node + # When an exception is raised, a flag is set. + node.raise_exception = True + node.notify_listeners(('data_modified', None, None)) + raise e + + except Exception, e: + # When an exception is raised, a flag is set. + node.raise_exception = True + node.notify_listeners(('data_modified', None, None)) + raise EvaluationException(vid, node, e, \ + tb.format_tb(sys.exc_info()[2])) + + def eval_vertex(self, vid, *args, **kwargs): """ Evaluate the vertex vid """ df = self._dataflow actor = df.actor(vid) - print("eval node: ", vid) self._evaluated.add(vid) # For each inputs @@ -1621,8 +1658,6 @@ def eval_vertex(self, vid, *args, **kwargs): cpt = 0 # For each connected node for npid, nvid, nactor in self.get_parent_nodes(pid): - print("node: ", vid, " - input port : ", pid, ' - to output port: ', - npid, " - from node: ", nvid) if not self.is_stopped(nvid, nactor): self.eval_vertex(nvid) @@ -1634,18 +1669,14 @@ def eval_vertex(self, vid, *args, **kwargs): inputs = inputs[0] if (cpt > 0): actor.set_input(df.local_id(pid), inputs) - # Eval the node - # self.eval_vertex_code(vid) + self.eval_vertex_code(vid) def eval(self, *args, **kwargs): """ Evaluate the whole dataflow starting from leaves""" t0 = time.time() df = self._dataflow - if self._prov is not None: - self._prov.init(df) - self._prov.time_init = t0 # Unvalidate all the nodes self._evaluated.clear() @@ -1656,13 +1687,12 @@ def eval(self, *args, **kwargs): t1 = time.time() - if self._prov is not None: - self._prov.time_end = t1 - wfitem = self._prov.as_wlformat() - if self._provdb is not None: - self._provdb.add_wf_item(wfitem) - # close remote connections - self._provdb.close() + if self._index is not None: + tid = self._index.as_dict() if quantify: - print "Evaluation time: %s" % (t1 - t0) \ No newline at end of file + print "Evaluation time: %s" % (t1 - t0) + self._dataflow.node(1).add_input(name="task_ids") + self._dataflow.node(1).set_output("task_ids", tid) + return + \ No newline at end of file From 842554d867372a778e900912c683e6425cdbb739 Mon Sep 17 00:00:00 2001 From: gaetan heidsieck Date: Fri, 31 Jan 2020 10:20:35 +0100 Subject: [PATCH 45/45] remove test evaluation --- src/openalea/core/algo/dataflow_evaluation.py | 273 ------------------ 1 file changed, 273 deletions(-) diff --git a/src/openalea/core/algo/dataflow_evaluation.py b/src/openalea/core/algo/dataflow_evaluation.py index a924b9f4..0dc1d33e 100644 --- a/src/openalea/core/algo/dataflow_evaluation.py +++ b/src/openalea/core/algo/dataflow_evaluation.py @@ -1159,279 +1159,6 @@ def eval(self, vtx_id=None, **kwds): ############################################################ -class TestEvaluation(AbstractEvaluation): - """ Basic evaluation algorithm with provenance capture in file """ - __evaluators__.append("TestEvaluation") - - def __init__(self, dataflow, *args, **kwargs): - - AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) - # a property to specify if the node has already been evaluated - self._evaluated = set() - - def is_stopped(self, vid, actor): - """ Return True if evaluation must be stop at this vertex """ - - if vid in self._evaluated: - return True - - try: - if actor.block: - status = True - n = actor.get_nb_output() - outputs = [i for i in range(n) if - actor.get_output(i) is not None] - if not outputs: - status = False - return status - except: - pass - return False - - def eval_vertex(self, vid, *args, **kwargs): - """ Evaluate the vertex vid """ - - df = self._dataflow - actor = df.actor(vid) - - self._evaluated.add(vid) - - # For each inputs - for pid in df.in_ports(vid): - inputs = [] - - cpt = 0 - # For each connected node - for npid, nvid, nactor in self.get_parent_nodes(pid): - if not self.is_stopped(nvid, nactor): - self.eval_vertex(nvid) - - inputs.append(nactor.get_output(df.local_id(npid))) - cpt += 1 - - # set input as a list or a simple value - if (cpt == 1): - inputs = inputs[0] - if (cpt > 0): - actor.set_input(df.local_id(pid), inputs) - - # Eval the node - self.eval_vertex_code(vid) - - def eval(self, *args, **kwargs): - """ Evaluate the whole dataflow starting from leaves""" - - t0 = time.time() - df = self._dataflow - - if self._prov is not None: - self._prov.init(df) - self._prov.time_init = t0 - - - # Unvalidate all the nodes - self._evaluated.clear() - - # Eval from the leaf - for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): - self.eval_vertex(vid) - - t1 = time.time() - - if self._prov is not None: - self._prov.time_end = t1 - wfitem = self._prov.as_wlformat() - if self._provdb is not None: - self._provdb.add_wf_item(wfitem) - - # close remote connections - self._provdb.close() - - if quantify: - print "Evaluation time: %s" % (t1 - t0) - - -class ZMQEvaluation(AbstractEvaluation): - """ Basic evaluation algorithm """ - __evaluators__.append("ZMQEvaluation") - - def __init__(self, dataflow, *args, **kwargs): - - AbstractEvaluation.__init__(self, dataflow, *args, **kwargs) - # a property to specify if the node has already been evaluated - self._evaluated = set() - self.socket=None - - def is_stopped(self, vid, actor): - """ Return True if evaluation must be stop at this vertex """ - - if vid in self._evaluated: - return True - - try: - if actor.block: - status = True - n = actor.get_nb_output() - outputs = [i for i in range(n) if - actor.get_output(i) is not None] - if not outputs: - status = False - return status - except: - pass - return False - - def eval_vertex(self, vid, *args, **kwargs): - """ Evaluate the vertex vid """ - - df = self._dataflow - actor = df.actor(vid) - - self._evaluated.add(vid) - - # For each inputs - for pid in df.in_ports(vid): - inputs = [] - - cpt = 0 - # For each connected node - for npid, nvid, nactor in self.get_parent_nodes(pid): - if not self.is_stopped(nvid, nactor): - self.eval_vertex(nvid) - - inputs.append(nactor.get_output(df.local_id(npid))) - cpt += 1 - - # set input as a list or a simple value - if (cpt == 1): - inputs = inputs[0] - if (cpt > 0): - actor.set_input(df.local_id(pid), inputs) - - # Get the best worker - # TODO - worker_id = 0 - - # Eval the node - self.eval_vertex_code(vid, worker_id) - - def eval(self, *args, **kwargs): - """ Evaluate the whole dataflow starting from leaves""" - - # Init the provenance - t0 = clock() - df = self._dataflow - if self._prov is not None: - self._prov.init(df) - self._prov.time_init = t0 - - # Init the workers - # context = zmq.Context() - # socket = context.socket(zmq.REQ) - # socket.connect("tcp://localhost:5559") - # self.socket = socket - # TODO: FIND A WAY TO INIT WF ON ALL WORKER - for now the worker are started by hand - # msg=dict() - # msg['pkg_name'] = self._dataflow.get_factory().package.name - # msg['wf_name'] = self._dataflow.get_factory().name - # socket.send(json.dumps(msg)) - # state=socket.recv() - # state=json.loads(state) - # if not state['Initialization']=="success": - # print "Eval failed due to failed init workers" - # return - - # Unvalidate all the nodes - self._evaluated.clear() - - # Eval from the leaf - for vid in (vid for vid in df.vertices() if df.nb_out_edges(vid) == 0): - self.eval_vertex(vid) - - # Update workflow provenance - t1 = clock() - if self._prov is not None: - self._prov.time_end = t1 - wfitem = self._prov.as_wlformat() - if self._provdb is not None: - self._provdb.add_wf_item(wfitem) - - # close remote connections - self._provdb.close() - - if quantify: - print "Evaluation time: %s" % (t1 - t0) - - def eval_vertex_code(self, vid, *args, **kwargs): - """ - Evaluate the vertex vid. - Can raise an exception if evaluation failed. - """ - - node = self._dataflow.actor(vid) - - try: - t0 = clock() - if self._prov is not None: - self._prov.before_eval(self._dataflow, vid) - - # Send value to worker - context = zmq.Context() - socket = context.socket(zmq.REQ) - print "Start evaluating node : ", vid - socket.connect("tcp://localhost:5559") - - msg = dict() - # msg['pkg_name'] = self._dataflow.get_factory().package.name - # msg['wf_name'] = self._dataflow.get_factory().name - msg['vid'] = vid - inputs = node.input_desc - for inp in inputs: - inp['interface']=None - inp['value']=node.get_input(inp['name']) - msg['inputs'] = inputs - - if (vid != 0) & (vid != 1): - socket.send(json.dumps(msg)) - outputs = socket.recv() - outputs = json.loads(outputs) - for out in outputs: - node.set_output(out['name'], val=out['value']) - else: - ret = node.eval() - # ret = node.eval() - - dt = clock() - t0 - if self._prov is not None: - taskitem=self._prov.after_eval(self._dataflow, vid, dt) - if taskitem: - self._provdb.add_task_item(taskitem) - - # When an exception is raised, a flag is set. - # So we remove it when evaluation is ok. - node.raise_exception = False - # if hasattr(node, 'raise_exception'): - # del node.raise_exception - node.notify_listeners(('data_modified', None, None)) - # return ret - return - - except EvaluationException, e: - e.vid = vid - e.node = node - # When an exception is raised, a flag is set. - node.raise_exception = True - node.notify_listeners(('data_modified', None, None)) - raise e - - except Exception, e: - # When an exception is raised, a flag is set. - node.raise_exception = True - node.notify_listeners(('data_modified', None, None)) - raise EvaluationException(vid, node, e, \ - tb.format_tb(sys.exc_info()[2])) - return - class FragmentEvaluation(AbstractEvaluation): """ Evaluation with By fragments """ __evaluators__.append("FragmentEvaluation")