"""
This is a module that contains the SuperNode class.
"""
from typing import Any, Callable, Union, Hashable
import yaml
import pandas as pd
from supernodes.operations import InEquality
from copy import copy
yaml.Dumper.ignore_aliases = lambda *args : True
[docs]class SuperNode:
"""A node used to create a tree data structure.
Attributes
----------
name: str, int, hashable, default = None
The name of the node.
value: Any, default = None
The data stored in a node.
id: Any, default = None
An attribute that should be unique for every node. None of the nodes in the whole
tree should to have the same id attribute.
other_attrs: dict
Here you can store additional attributes.
children: list
A list of children nodes.
function: Callable, str
A function to be called when running the tree as a decision tree. This attribute can
also an inequality as a string. For example: "x >= 10" or "x[1] < 9". Slicing is currently
not supported.
child_name_if_true: str, int, hashable
If this node was run in a binary tree, this attribute will determine which child
to be chosen next. If the return value of the `function` is ``True``, the child
with the `name` that is same as `child_name_if_true` will be chosen.
See :py:meth:`~SuperNode.run_as_binary_tree` for more information.
child_name_if_false: str, int, hashable
Specified the child to be chosen if `function` returns ``False``.
See :py:meth:`~SuperNode.run_as_binary_tree` for more information.
Examples
--------
Importing this class:
>>> from supernodes import SuperNode
Making a simple node:
>>> node = SuperNode()
Making a node with name, value, and id:
>>> node = SuperNode(name="main node", value=30, id="node-0")
>>> node
(name=main node, value: int, id=node-0)
Changing the name of the node:
>>> node.name = "new name"
Adding children:
>>> child_1 = SuperNode(name="child-1")
>>> child_2 = SuperNode(name="child-2")
>>> node.append(child_1)
>>> node.append(child_2)
>>> node
(name=new name, value: int, id=node-0)
|__ (name=child-1)
|__ (name=child-2)
"""
def __init__(self, name: Union[str, int, Hashable] = None, value: Any = None, id: Any = None,
children: list = None, function: Callable = None,
child_name_if_true: Union[str, int, Hashable] = None,
child_name_if_false: Union[str, int, Hashable] = None, **other_attrs):
self.name = name
self.value = value
self.id = id
if children:
self.children = children
else:
self.children = []
self.function = function
self.child_name_if_true = child_name_if_true
self.child_name_if_false = child_name_if_false
self.other_attrs = other_attrs
def _object_to_node(self, object):
if type(object) is SuperNode:
return object
else:
node = SuperNode(value=object)
return node
[docs] def has_children(self):
"""
Checks if the node has children.
Returns
-------
bool
"""
return len(self.children) > 0
[docs] def get_children_names(self):
"""
Lists the names of all children nodes that are directly under this node.
Returns
-------
children: list of str, int, or hashable
"""
return [child.name for child in self.children if not child.name is None]
[docs] def get_child_from_name(self, name: Union[str, int, Hashable]) -> Union['SuperNode', None]:
"""
Gets the child node from its name.
Parameters
----------
name: str, int, hashable
Returns
-------
child: SuperNode
If a child node having the specified name was found.
None
If the child node was not found.
"""
for child in self.children:
if child.name == name:
return child
[docs] def append(self, node: Union['SuperNode', Any]):
"""
Adds a child node.
Parameters
----------
node: SuperNode or any object
If the `node` parameter is not a `SuperNode` object, it will create a new `SuperNode` object.
The value of the `SuperNode` object will be the value of the `node` parameter.
"""
node = self._object_to_node(node)
if node.name in self.get_children_names():
raise ValueError("Two children of the same Node cannot have the same 'name' attribute.")
self.children.append(node)
[docs] def insert(self, index, node):
"""
Adds a child before the specified index in the `children` list.
Parameters
----------
index: int
node: SuperNode or any object
If the `node` parameter is not a `SuperNode` object, it will create a new `SuperNode` object.
The value of the `SuperNode` object will be the value of the `node` parameter.
"""
node = self._object_to_node(node)
if node.name in self.get_children_names():
raise ValueError("Two children of the same Node cannot have the same 'name' attribute.")
self.insert(index, node)
[docs] def get_attributes(self, none_attrs=True):
"""
Returns all attributes of the node including those stored in `other_attrs` variable.
Parameters
----------
none_attrs: bool
If ``False``, the attributes that have the value ``None`` will not be included.
Returns
-------
dict
"""
attrs ={k:v for k, v in self.__dict__.items() if not k.startswith('_') and not k == "other_attrs"}
for k, v in self.other_attrs:
attrs[k] = v
if not none_attrs:
attrs = {k:v for k,v in attrs.items() if v is not None}
return attrs
def __iadd__(self, node):
self.append(node)
return self
def __copy__(self):
node = SuperNode()
node.from_node_dict(self.to_node_dict())
return node
def __str__(self):
return self.to_str()
def __repr__(self):
return self.to_str()
def _short(self, text):
text = str(text)
if len(text) <= 20 and not "\n" in text:
return text
new_text = ""
text = text.strip()
for char_num in range(20):
char = text[char_num]
if char == "\n":
break
new_text += char
new_text += " ..."
return new_text
[docs] def to_str(self):
"""
Converts the node and its descendants to a string.
Returns
-------
str
"""
as_string = "("
for attr_name, attr in self.get_attributes(none_attrs=False).items():
if attr_name == "children":
continue
elif attr_name == "value":
as_string += f"{attr_name}: {attr.__class__.__name__}, "
else:
as_string += f"{attr_name}={self._short(attr)}, "
if as_string.endswith(", "):
as_string = as_string.removesuffix(", ")
as_string += ")"
spaces = f"{' ' * 4}"
for num, child in enumerate(self.children):
as_string += '\n'
child_str = child.to_str()
as_string += f"|"
as_string += "__ "
as_string += child_str.splitlines()[0]
for line in child_str.splitlines()[1:]:
if num < len(self.children) - 1:
as_string += "\n" + "|" + spaces + line
else:
as_string += "\n" + " " + spaces + line
return as_string
[docs] def to_node_dict(self):
"""
Converts the node and its descendants to a dictionary.
Returns
-------
dict
"""
dictionary = {key:value for key, value in self.__dict__.items() if not key.startswith("_") and not key == "children"}
dictionary["children"] = [child.to_node_dict() for child in self.children]
return dictionary
[docs] def from_node_dict(self, dictionary):
"""
Creates a node from a dictionary. The dictionary should have keys that are the same
as the attributes of the `SuperNode` object (i.e. `name`, `value`, `id`, etc.).
Parameters
----------
dictionary: dict
"""
for key, value in dictionary.items():
if not key == "children":
if not key in self.__dict__.keys():
raise KeyError(f"'{key}' is not an attribute of `Node` object.")
self.__dict__[key] = value
self.children = [SuperNode().from_node_dict(child) for child in dictionary['children']]
[docs] def to_yaml(self, file_path):
"""
Converts the node to a YAML file.
Parameters
----------
file_path: str
Path to the YAML file.
"""
with open(file_path, "w") as file:
yaml.dump(self.to_node_dict(), file, sort_keys=False)
[docs] def from_yaml(self, file_path):
"""
Creates a node from a YAML file. The YAML file should have keys that are the same
as the attributes of the `SuperNode` object (i.e. `name`, `value`, `id`, etc.).
Parameters
----------
file_path: str
Path to the YAML file.
"""
with open(file_path, "rt") as file:
dictionary = yaml.safe_load(file)
self.from_node_dict(dictionary)
[docs] def split(self, num: int=2, names: list=None, values: list=None, ids: list=None, functions: list=None,
**other_attrs_lists):
"""
Splits the node into children. The number of children created from the node is specified
by the `num` parameter.
Parameters
----------
num: int, default = 2
The number of children to be created.
names: list (optional)
If used, the `name` attributes of the children will be the values of the `names` list.
Its length should be the same as the parameter `num`.
values: list (optional)
If used, the `value` attributes of the children will be the values of the `values` list.
Its length should be the same as the parameter `num`.
ids: list (optional)
If used, the `id` attributes of the children will be the values of the `ids` list.
Its length should be the same as the parameter `num`.
functions: list (optional)
If used, the `function` attributes of the children will be the values of the `functions` list.
Its length should be the same as the parameter `num`.
other_attrs_list: list (optional)
If used, the `other_attrs` attributes of the children will be the values of the `other_attrs_lists`.
Each list length should be the same as the parameter `num`.
Returns
-------
new_children: list of nodes
"""
new_children = []
attrs = {"names": [None for _ in range(num)],
"values": [None for _ in range(num)],
"ids": [None for _ in range(num)],
"functions": [None for _ in range(num)],
"other_attrs_lists": [{} for _ in range(num)]}
for attr in [names, values, ids, functions, other_attrs_lists]:
if attr:
if len(attr) != num:
raise ValueError(f"Length of `{attr.__name__}` should be same as `num`.")
attrs[attr.__name__] = attr
for i in range(num):
node = SuperNode(name=attrs['names'][i],
value=attrs['values'][i],
id=attrs['ids'][i],
function=attrs['functions'][i],
**attrs['other_attrs_lists'][i])
new_children.append(node)
self.children += new_children
return new_children
[docs] def to_list(self, attr=None):
"""
This method will convert the tree to a list of nodes names. Each row in the list is a path from the
current node to a leaf.
Returns
-------
arr: list
List of rows. Each row consists of nodes names.
"""
arr = []
row = []
for row in self._rows_iter(row, attr):
arr.append(row)
return arr
[docs] def split_on_df_column(self, df, column):
"""
Splits the node based on the columns of a `pandas` `DataFrame`. The number of children nodes
will depend on the number of unique values in the column. Each unique value will be the `name`
attribute of a child node.
Parameters
----------
df: pandas.DataFrame
column
One of the columns of the `DataFrame`.
Returns
-------
nodes: list of children nodes
Examples
--------
>>> import pandas as pd
>>> df = pd.DataFrame({"Column-1": ["column-1 row-1", "column-1 row-2"],
... "Column-2": ["column-2 row-1", "column-2 row-2"]})
>>> node = SuperNode("DataFrame Node")
>>> node.split_on_df_column(df, "Column-1")
[(name=column-1 row-1, value: DataFrame), (name=column-1 row-2, value: DataFrame)]
>>> node
(name=DataFrame Node)
|__ (name=column-1 row-1, value: DataFrame)
|__ (name=column-1 row-2, value: DataFrame)
"""
nodes = []
unique_values = df[column].unique()
for unique_value in unique_values:
node = SuperNode(name=unique_value, value=df[df[column] == unique_value])
nodes.append(node)
self.append(node)
return nodes
[docs] def from_df(self, df):
"""
Creates a tree from a `pandas` `DataFrame`. It will apply the method: :py:meth:`~SuperNode.split_on_df_column`.
to each column in the `DataFrame`.
Parameters
----------
df: pandas.DataFrame
Examples
--------
>>> import pandas as pd
>>> df = pd.DataFrame({"Column-1": ["column-1 row-1", "column-1 row-2"],
... "Column-2": ["column-2 row-1", "column-2 row-2"]})
>>> node = SuperNode("DataFrame Node")
>>> node.from_df(df)
>>> node
(name=DataFrame Node)
|__ (name=column-1 row-1, value: DataFrame)
| |__ (name=column-2 row-1, value: DataFrame)
|__ (name=column-1 row-2, value: DataFrame)
|__ (name=column-2 row-2, value: DataFrame)
"""
if len(df.columns) > 0:
nodes = self.split_on_df_column(df, column=df.columns[0])
for node in nodes:
smaller_df = node.value[df.columns[1:]]
node.from_df(smaller_df)
def _rows_iter(self, row, attr):
row = copy(row)
if not attr:
row.append(self)
else:
if attr in [key for key in self.__dict__.keys() if not key.startswith("_")]:
row.append(self.__dict__[attr])
elif attr in self.other_attrs.keys():
row.append(self.other_attrs[attr])
if not self.has_children():
yield row
for child in self.children:
for new_row in child._rows_iter(row=row, attr=attr):
yield new_row
[docs] def to_df(self, columns=None, ignore_first=True, attr="name"):
"""
Creates a `pandas` `DataFrame` from the tree. Each layer beneath this node will become a column.
The values under the columns are the `name` attributes of the nodes in the tree.
Parameters
----------
columns: list, default = None
The column headers for the `DataFrame`.
ignore_first: bool, default=True
If ``True``, the current node will not be included in the `DataFrame`.
Returns
-------
df: pandas.DataFrame
"""
if ignore_first:
data = [l[1:] for l in self.to_list(attr=attr)]
else:
data = self.to_list(attr=attr)
df = pd.DataFrame(data, columns=columns)
return df
[docs] def find_node(self, id):
"""
Returns a descendant node that has the specified `id` attribute.
Parameters
----------
id: Any
The `id` attribute of the node.
Returns
-------
child: SuperNode
None
"""
for child in self.children:
if child.id == id:
return child
if child.find_node(id=id):
return child
[docs] def find_nodes(self, name=None, value=None, function=None, **other_attrs):
"""
Returns a list of descendant nodes that match the specified criteria.
Parameters
----------
name: str, int, hashable
value: Any
function: Callable, str
Returns
-------
descendants: list of nodes
"""
descendants = []
for child in self.children:
add_child = True
if name and child.name != name:
add_child = False
if value and child.value != value:
add_child = False
if function and child.function != function:
add_child = False
if other_attrs != {}:
for key, value in other_attrs.items():
if not key in child.other_attrs.keys():
add_child = False
elif value != child.other_attrs[key]:
add_child = False
if add_child:
descendants.append(child)
descendants += child.find_nodes(name=name, value=value, function=function, **other_attrs)
return descendants
def __getitem__(self, name):
"""
Gets a child from its name.
Parameters
----------
name: str, int, hashable
Returns
-------
node
"""
return self.get_child_from_name(name)
def __setitem__(self, name, node):
"""
Adds a new child and giving it the specified `name`.
Parameters
----------
name: str, int, hashable
node: SuperNode, Any
"""
for i, child in enumerate(self.children):
if child.name == name:
self.children.pop(i)
break
node = self._object_to_node(node)
node.name = name
self.append(node)
[docs] def run_as_binary_tree(self, **kwargs):
"""
Runs the tree as a binary decision tree. It will run the `function` attribute of this node,
then chooses a child node and runs its `function`, then chooses one of the child's
children, etc. It chooses the children based on the attributes `child_name_if_true` and
`child_name_if_false`. If the output of `function` is ``True``, the next child will be
the node whose `name` is same as `child_name_if_true`. If the output of `function` is ``False``,
the next child will be the node whose `name` is same as `child_name_if_false`.
Creating a tree that can be used as a binary decision tree:
>>> main_node = SuperNode(name="main-node", function="x > 10")
>>> main_node['first-child'] = SuperNode()
>>> main_node['second-child'] = SuperNode()
>>> main_node.child_name_if_true = "first-child"
>>> main_node.child_name_if_false = "second-child"
Running the binary decision tree:
>>> leaf = main_node.run_as_binary_tree(x=11)
>>> leaf
(name=first-child)
Parameters
----------
kwargs
Keyword arguments that the functions inside the tree accepts.
Returns
-------
node: SuperNode
Either the leaf node will be returned or a node that has no function.
"""
if not self.function:
return self
if type(self.function) is str:
output = InEquality(self.function)(**kwargs)
else:
output = self.function(**kwargs)
if output:
if self.child_name_if_true:
child = self.get_child_from_name(self.child_name_if_true)
if child:
return child.run_as_binary_tree(**kwargs)
elif output is False:
if self.child_name_if_false:
child = self.get_child_from_name(self.child_name_if_false)
if child:
return child.run_as_binary_tree(**kwargs)
return self
if __name__ == "__main__":
import doctest
doctest.testmod()