Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
kfrancischen committed Feb 7, 2020
0 parents commit 2b74eb4
Show file tree
Hide file tree
Showing 36 changed files with 1,540 additions and 0 deletions.
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# pslx
Python Standard Library eXtension
Empty file added pslx/__init__.py
Empty file.
Empty file added pslx/batching/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions pslx/compile_protos.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

for file in schema/*; do
if [[ $file == *.proto ]]
then
echo "Compiling: $file"
protoc --proto_path="schema" --python_out=schema/ "$file"
fi

done
Empty file added pslx/config/__init__.py
Empty file.
15 changes: 15 additions & 0 deletions pslx/config/general_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pytz


class TimeZoneObj(object):
EASTERN_TIMEZONE = pytz.timezone('US/Eastern')
WESTERN_TIMEZONE = pytz.timezone('US/Pacific')
UTC_TIMEZONE = pytz.timezone('UTC')


class SearchDirObj(object):
TOOL_DIR = 'tool/'
UTIL_DIR = 'util/'
CONFIG_DIR = 'config/'
SERVICE_DIR = 'service/'
SCHEMA_DIR = 'schema/'
2 changes: 2 additions & 0 deletions pslx/config/tool_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
LOGGING_TOOL:
DISK_LOG_FILE_DIR: "log/"
Empty file added pslx/core/__init__.py
Empty file.
43 changes: 43 additions & 0 deletions pslx/core/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import inspect
from pslx.schema.enums_pb2 import ModeType
from pslx.util.timezone_util import cur_time_in_pst
from pslx.util.color_util import ColorsUtil


class Base(object):
CONFIG_PATH = ''
LOG_EVERYTHING = os.getenv('LOG_EVERYTHING', False)

def __init__(self):
if 'TEST' not in os.environ or not os.environ['TEST']:
self._mode = ModeType.TEST
else:
self._mode = ModeType.PROD
return

@classmethod
def get_class_name(cls):
return cls.__name__

@classmethod
def get_full_class_name(cls):
file_path = inspect.getmodule(cls).__name__
return '.'.join(file_path.replace('.py', '').split('/') + [cls.__name__])

@classmethod
def get_inheritance_level(cls):
mro = cls.mro()
inheritance_level = []
for class_obj in mro[::-1][1:]:
inheritance_level.append(class_obj.__name__)
return '->'.join(inheritance_level)

@classmethod
def log_print(cls, string):
if cls.LOG_EVERYTHING:
print(ColorsUtil.BOLD + 'class' + ColorsUtil.RESET + ' ' +
ColorsUtil.Foreground.GREEN + '[%s]' % cls.get_class_name() + ColorsUtil.RESET + ' & ' +
ColorsUtil.BOLD + 'Timestamp' + ColorsUtil.RESET + ' ' +
ColorsUtil.Foreground.RED + '[%s]' % str(cur_time_in_pst()) + ColorsUtil.RESET +
': ' + string)
17 changes: 17 additions & 0 deletions pslx/core/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from pslx.core.base import Base


class ExceptionBase(Base, Exception):
pass


class ProtobufException(ExceptionBase):
pass


class ProtobufNameNotExistException(ProtobufException):
pass


class ProtobufValueNotExistException(ProtobufException):
pass
Empty file added pslx/core/graph_base.py
Empty file.
124 changes: 124 additions & 0 deletions pslx/core/node_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from collections import OrderedDict, defaultdict
from pslx.core.base import Base
from pslx.schema.enums_pb2 import SortOrder


class NodeBase(Base):
CHILDREN_DATA_STRUCT = None
PARENTS_DATA_STRUCT = None
IS_ORDERED = False

def __init__(self, node_name):
super().__init__()
self._node_name = node_name
self._children = self.CHILDREN_DATA_STRUCT()
self._parents = self.PARENTS_DATA_STRUCT()

@classmethod
def is_children_ordered(cls):
return cls.IS_ORDERED

def get_node_name(self):
return self._node_name

def get_children(self):
return self._children

def get_children_names(self):
return list(self._children.keys())

def get_children_nodes(self):
return list(self._children.values())

def get_num_children(self):
return len(self._children)

def get_parents(self):
return self._parents

def get_parents_names(self):
return list(self._parents.keys())

def get_parents_nodes(self):
return list(self._parents.values())

def get_num_parents(self):
return len(self._parents)

def add_child(self, child_node):
raise NotImplementedError

def delete_child(self, child_node):
self._children.pop(child_node.get_node_name(), None)
if child_node.has_parent(self._node_name):
child_node.delete_parent(parent_node=self)

def add_parent(self, parent_node):
raise NotImplementedError

def delete_parent(self, parent_node):
self._parents.pop(parent_node.get_node_name(), None)
if parent_node.has_child(self._node_name):
parent_node.delete_child(child_node=self)

def get_child(self, child_name):
return self._children.get(child_name, None)

def has_child(self, child_name):
return child_name in self._children

def get_parent(self, parent_name):
return self._parents.get(parent_name, None)

def has_parent(self, parent_name):
return parent_name in self._parents


class UnorderedNodeBase(NodeBase):
CHILDREN_DATA_STRUCT = defaultdict
PARENTS_DATA_STRUCT = defaultdict

def __init__(self, node_name):
super().__init__(node_name=node_name)

def add_child(self, child_node):
if self.has_child(child_name=child_node.get_node_name()):
self.log_print("Node name duplicated. Will overwrite the previous node.")
self._children[child_node.get_node_name()] = child_node
if not child_node.has_parent(parent_name=self._node_name):
child_node.add_parent(parent_node=self)

def add_parent(self, parent_node):
if self.has_parent(parent_name=parent_node.get_node_name()):
self.log_print("Node name duplicated. Will overwrite the previous node.")
self._parents[parent_node.get_node_name()] = parent_node
if not parent_node.has_child(child_name=self._node_name):
parent_node.add_child(child_node=self)


class OrderedNodeBase(NodeBase):
CHILDREN_DATA_STRUCT = OrderedDict
PARENTS_DATA_STRUCT = OrderedDict
IS_ORDERED = True

def __init__(self, node_name, order=SortOrder.ORDER):
super().__init__(node_name=node_name)
self._order = order

def add_child(self, child_node):
if self.has_child(child_name=child_node.get_node_name()):
self.log_print("Node name duplicated. Will overwrite the previous node.")
self._children[child_node.get_node_name()] = child_node
if self._order == SortOrder.REVERSE:
self._children.move_to_end(key=child_node.get_node_name(), last=False)
if not child_node.has_parent(parent_name=self._node_name):
child_node.add_parent(parent_node=self)

def add_parent(self, parent_node):
if self.has_parent(parent_name=parent_node.get_node_name()):
self.log_print("Node name duplicated. Will overwrite the previous node.")
self._parents[parent_node.get_node_name()] = parent_node
if self._order == SortOrder.REVERSE:
self._parents.move_to_end(key=parent_node.get_node_name(), last=False)
if not parent_node.has_child(child_name=self._node_name):
parent_node.add_child(child_node=self)
138 changes: 138 additions & 0 deletions pslx/core/tree_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from collections import deque
from collections import OrderedDict
from pslx.core.base import Base


class TreeBase(Base):
def __init__(self, root, max_dict_size=-1):
super().__init__()
self._root = root
self._node_name_to_node_dict = OrderedDict(
{root.get_node_name(): root}
)
self._max_dict_size = max_dict_size

def add_node(self, parent_node, child_node):
assert child_node.get_num_parents() == 0
if parent_node != self._root:
assert parent_node.get_num_parents() != 0

parent_node.add_child(child_node)
if parent_node.get_node_name() in self._node_name_to_node_dict or \
child_node.get_node_name() in self._node_name_to_node_dict:
self.log_print(string='Attention: node names need to be unique.')

if parent_node.get_node_name() not in self._node_name_to_node_dict:
self._node_name_to_node_dict[parent_node.get_node_name()] = parent_node
if child_node.get_node_name() not in self._node_name_to_node_dict:
self._node_name_to_node_dict[child_node.get_node_name()] = child_node
self._clean_dict()

def _clean_dict(self):
while len(self._node_name_to_node_dict) > self._max_dict_size > 0:
self._node_name_to_node_dict.popitem(last=False)

def find_node(self, node_name):
if node_name in self._node_name_to_node_dict:
return self._node_name_to_node_dict[node_name]
search_queue = deque()
search_queue.append(self._root)
while search_queue:
search_node = search_queue.popleft()
if search_node.get_node_name() == node_name:
return search_node
child_nodes = search_node.get_children_nodes()
for child_node in child_nodes:
search_node.append(child_node)
return None

def get_tree_size(self):
return self.get_subtree_size(node=self._root)

def get_subtree_size(self, node):
if node.get_num_children() == 0:
return 0
else:
result = 1
for child_node in node.get_children_nodes():
result += self.get_subtree_size(node=child_node)
return result

def bfs_search(self, max_num_node=-1):
result_node_names, num_result_nodes = [], 0
search_queue = deque()
search_queue.append(self._root)
while search_queue and max_num_node > 0 and num_result_nodes < max_num_node:
search_node = search_queue.popleft()
result_node_names.append(search_node.get_node_name())
child_nodes = search_node.get_children_nodes()
for child_node in child_nodes:
search_node.append(child_node)
return result_node_names

def dfs_search(self, max_num_node=-1):
result_node_names, num_result_nodes = [], 0
search_stack = [self._root]
while search_stack and max_num_node > 0 and num_result_nodes < max_num_node:
search_node = search_stack.pop()
result_node_names.append(search_node.get_node_name())
child_nodes = search_node.get_children_nodes()
for child_node in child_nodes:
search_node.append(child_node)
return result_node_names

def _trim_tree(self, node, max_capacity=-1):
if not node.is_children_ordered():
self.log_print(string=node.get_node_name() + ' is not ordered. Be careful when you trim the tree.')

if max_capacity <= 0 or self.get_subtree_size(node=node) <= max_capacity:
return
if max_capacity < 1 + node.get_num_children():
num_children_to_trim = 1 + node.get_num_children() - max_capacity
for child_node in node.get_children_nodes()[:num_children_to_trim]:
child_node.delete_parent(parent_node=node)
return
else:
children_nodes = node.get_num_children()
cumulative_size = 1
pivot_index = len(children_nodes) - 1
while pivot_index >= 0:
child_node = children_nodes[pivot_index]
child_node_subtree_size = self.get_subtree_size(node=child_node)

if cumulative_size + child_node_subtree_size < max_capacity:
cumulative_size += child_node_subtree_size
pivot_index -= 1
else:
self._trim_tree(
node=child_node,
max_capacity=max_capacity-cumulative_size
)
break

for index in range(pivot_index):
child_node = children_nodes[index]
child_node.delete_parent(parent_node=node)
return

def trim_tree(self, max_capacity=-1):
self._trim_tree(
node=self._root,
max_capacity=max_capacity
)
return

def get_leaves(self):
leaf_node_names = []
search_queue = deque()
search_queue.append(self._root)
while search_queue:
search_node = search_queue.popleft()
child_nodes = search_node.get_children_nodes()

if len(child_nodes) == 0:
leaf_node_names.append(search_node.get_node_name())

for child_node in child_nodes:
search_node.append(child_node)
return leaf_node_names
Empty file added pslx/experimental/__init__.py
Empty file.
Empty file added pslx/message_queue/__init__.py
Empty file.
Empty file added pslx/micro_service/__init__.py
Empty file.
Empty file.
Empty file.
Empty file added pslx/schema/__init__.py
Empty file.
39 changes: 39 additions & 0 deletions pslx/schema/enums.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
syntax = "proto3";

package schema;

// the next will be 2
enum ModeType {
TEST = 0;
PROD = 1;
}

// the next will be 2
enum SortOrder {
ORDER = 0;
REVERSE = 1;
}

// the next will be 3
enum DataModelType {
DEFAULT = 0;
BATCH = 1;
STREAMING = 2;
}

// the next will be 4
enum DiskLoggerLevel {
INFO = 0;
DEBUG = 1;
WARNING = 2;
NOTSET = 3;
}

// the next will be 5
enum PartitionerType {
YEARLY = 0;
MONTHLY = 1;
DAILY = 2;
HOURLY = 3;
MINUTELY = 4;
}
Empty file added pslx/storage/__init__.py
Empty file.
Empty file added pslx/streaming/__init__.py
Empty file.
Empty file added pslx/test/__init__.py
Empty file.
Empty file added pslx/test/core/__init__.py
Empty file.
Loading

0 comments on commit 2b74eb4

Please sign in to comment.