{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "14dbed7f", "metadata": {}, "outputs": [], "source": [ "# binary search tree (version without balancing)\n", "#\n", "# *** this version includes a method for deleting an element (cf. tutorial 3.1 problem) ***\n", "#\n", "# we are using the same data structure a) for a tree and b) for a node in that tree\n", "#\n", "# note that this is a sorted data structure:\n", "# for each node in the tree (including the root node), our implementation needs to make sure that\n", "# a) all items within the self._left subtree are smaller than self._item\n", "# b) all items within the self._right subtree are greater than or equal to self._item\n", "#\n", "class BinarySearchTree:\n", " def __init__(self):\n", " self._left = None\n", " self._item = None\n", " self._right = None\n", " \n", " # *** this is the new method, deleting an occurrence of \"value\", if there is any ***\n", " #\n", " # if there are multiple elements with the respective\n", " # value, one of them is deleted and the others are retained\n", " #\n", " # the method returns True if such an element existed and was deleted,\n", " # and False otherwise\n", " #\n", " def delete(self, value):\n", " if self._item is None:\n", " return False\n", " elif self._item == value:\n", " if self._left is not None:\n", " closest_left_value = self._left.get_greatest()\n", " self._item = closest_left_value\n", " self._left.delete(closest_left_value)\n", " if self._left._item is None:\n", " self._left = None\n", " elif self._right is not None:\n", " closest_right_value = self._right.get_smallest()\n", " self._item = closest_right_value\n", " self._right.delete(closest_right_value)\n", " if self._right._item is None:\n", " self._right = None\n", " else:\n", " self._item = None\n", " return True\n", " elif value < self._item:\n", " if self._left is None:\n", " return False\n", " else:\n", " success = self._left.delete(value)\n", " if self._left._item is None:\n", " self._left = None\n", " return success\n", " elif value > self._item:\n", " if self._right is None:\n", " return False\n", " else:\n", " success = self._right.delete(value)\n", " if self._right._item is None:\n", " self._right = None\n", " return success\n", "\n", " # returns True if the tree contains value, False otherwise\n", " #\n", " def contains(self, value):\n", " return self._find_node(value) is not None\n", " \n", " # returns the/a node containing an item with a certain value\n", " #\n", " # if no such node exists, None is returned\n", " #\n", " def _find_node(self, value):\n", " if self._item is None:\n", " return None\n", " elif self._item == value:\n", " return self\n", " elif value < self._item:\n", " if self._left is None:\n", " return None\n", " else:\n", " return self._left._find_node(value)\n", " else:\n", " if self._right is None:\n", " return None\n", " else:\n", " return self._right._find_node(value)\n", "\n", " def get_root_item(self):\n", " return self._item\n", " \n", " def is_empty(self):\n", " return self._item is None\n", " \n", " # returns the number of data items stored in the tree\n", " #\n", " def get_size(self):\n", " if self._item is None:\n", " return 0\n", " size = 1\n", " if self._left is not None:\n", " size += self._left.get_size()\n", " if self._right is not None:\n", " size += self._right.get_size()\n", " return size\n", "\n", " # returns the smallest value stored in the tree data structure\n", " #\n", " def get_smallest(self):\n", " if self._left is None:\n", " return self._item\n", " else:\n", " return self._left.get_smallest()\n", " \n", " # returns the greatest value stored in the tree data structure\n", " #\n", " def get_greatest(self):\n", " if self._right is None:\n", " return self._item\n", " else:\n", " return self._right.get_greatest()\n", " \n", " # removes all content from the tree\n", " #\n", " def clear(self):\n", " if self._left is not None:\n", " self._left.clear()\n", " if self._right is not None:\n", " self._right.clear()\n", " self._left = None\n", " self._item = None\n", " self._right = None\n", " \n", " # attach an additional node to the tree, containing the value passed as an argument\n", " #\n", " # if the tree already contains the value, it will be added nonetheless\n", " #\n", " def insert(self, value):\n", " if self._item is None:\n", " self._item = value\n", " elif value < self._item:\n", " if self._left is None:\n", " self._left = BinarySearchTree()\n", " self._left._item = value\n", " else:\n", " self._left.insert(value)\n", " else:\n", " if self._right is None:\n", " self._right = BinarySearchTree()\n", " self._right._item = value\n", " else:\n", " self._right.insert(value)\n", " \n", " # replaces the content of self with the content of a dyn. array (Python list)\n", " #\n", " def copy_from_dynarray(self, dynarray):\n", " self.clear()\n", " for el in dynarray:\n", " self.insert(el)\n", " \n", " # traversal of the binary search tree\n", " #\n", " # appends the content of self, in ascending order, to a Python list (dyn. array)\n", " #\n", " def append_to_dynarray(self, dynarray):\n", " if self._left is not None:\n", " self._left.append_to_dynarray(dynarray)\n", " dynarray.append(self._item)\n", " if self._right is not None:\n", " self._right.append_to_dynarray(dynarray)\n", " \n", " # returns a string representing the tree structure\n", " #\n", " def to_string(self):\n", " if self._item is None:\n", " return \"[]\"\n", " if self._left is None and self._right is None:\n", " return str(self._item)\n", " content = str(self._item)\n", " if self._left is not None:\n", " content = content + \" -> [\" + self._left.to_string() + \"]\"\n", " else:\n", " content = content + \" -> []\"\n", " if self._right is not None:\n", " content = content + \", [\" + self._right.to_string() + \"]\"\n", " else:\n", " content = content + \", []\"\n", " return content" ] }, { "cell_type": "code", "execution_count": 2, "id": "eb4e80c4", "metadata": {}, "outputs": [], "source": [ "# balanced binary search tree\n", "#\n", "# *** matching the standard BST above, this version now also includes a method for deleting an element ***\n", "#\n", "# we are using the same data structure a) for a tree and b) for a node in that tree\n", "#\n", "# note that this is a sorted data structure:\n", "# for each node in the tree (including the root node), our implementation needs to make sure that\n", "# a) all items within the self._left subtree are smaller than self._item\n", "# b) all items within the self._right subtree are greater than or equal to self._item\n", "#\n", "import numpy as np\n", "\n", "class BalancedBST:\n", " def __init__(self):\n", " self._left = None\n", " self._item = None\n", " self._right = None\n", " self._size = 0\n", "\n", " # *** this is the new method, deleting an occurrence of \"value\", if there is any ***\n", " #\n", " # if there are multiple elements with the respective\n", " # value, one of them is deleted and the others are retained\n", " #\n", " # the method returns True if such an element existed and was deleted,\n", " # and False otherwise\n", " #\n", " def delete(self, value):\n", " if self._item is None:\n", " return False\n", " elif self._item == value:\n", " if self._left is not None:\n", " closest_left_value = self._left.get_greatest()\n", " self._item = closest_left_value\n", " self._left.delete(closest_left_value)\n", " if self._left._item is None:\n", " self._left = None\n", " elif self._right is not None:\n", " closest_right_value = self._right.get_smallest()\n", " self._item = closest_right_value\n", " self._right.delete(closest_right_value)\n", " if self._right._item is None:\n", " self._right = None\n", " else:\n", " self._item = None\n", " self._size -= 1\n", " return True\n", " elif value < self._item:\n", " if self._left is None:\n", " return False\n", " else:\n", " success = self._left.delete(value)\n", " if self._left._item is None:\n", " self._left = None\n", " elif value > self._item:\n", " if self._right is None:\n", " return False\n", " else:\n", " success = self._right.delete(value)\n", " if self._right._item is None:\n", " self._right = None\n", " if success:\n", " self._size -= 1\n", " self.balance_if_needed() # now rebalance the tree if appropriate\n", " return success\n", "\n", " def get_root_item(self):\n", " return self._item\n", " \n", " # returns True if the tree contains value, False otherwise\n", " #\n", " def contains(self, value):\n", " return self._find_node(value) is not None\n", " \n", " def is_empty(self):\n", " return self._item is None\n", " \n", " # returns the number of data items stored in the tree\n", " #\n", " def get_size(self):\n", " return self._size\n", "\n", " # returns the smallest value stored in the tree data structure\n", " #\n", " def get_smallest(self):\n", " if self._left is None:\n", " return self._item\n", " else:\n", " return self._left.get_smallest()\n", " \n", " # returns the greatest value stored in the tree data structure\n", " #\n", " def get_greatest(self):\n", " if self._right is None:\n", " return self._item\n", " else:\n", " return self._right.get_greatest()\n", " \n", " # removes all content from the tree\n", " #\n", " def clear(self):\n", " # if self._left is not None:\n", " # self._left.clear()\n", " # if self._right is not None:\n", " # self._right.clear()\n", " self._left = None\n", " self._item = None\n", " self._right = None\n", " self._size = 0\n", " \n", " # attach an additional node to the tree, containing the value passed as an argument\n", " #\n", " # if the tree already contains the value, it will be added nonetheless\n", " #\n", " def insert(self, value):\n", " if self._item is None:\n", " self._item = value\n", " elif value < self._item:\n", " if self._left is None:\n", " self._left = BalancedBST()\n", " self._left._item = value\n", " else:\n", " self._left.insert(value)\n", " else:\n", " if self._right is None:\n", " self._right = BalancedBST()\n", " self._right._item = value\n", " else:\n", " self._right.insert(value)\n", " self._size += 1\n", " if self._size < 10:\n", " return # no need to balance trees with fewer than ten elements\n", " \n", " # now rebalance the tree if appropriate\n", " #\n", " self.balance_if_needed()\n", " \n", " # checks whether a rebalancing is appropriate\n", " # and carries it out in that case, returning True if a rebalancing\n", " # was done and False otherwise\n", " #\n", " def balance_if_needed(self):\n", " if self._size < 8: # unnecessary to rebalance smaller binary search trees\n", " return False\n", " if self._left is None or self._right is None:\n", " self._balance()\n", " return True\n", " elif self._left._size >= 4*self._right._size or self._right._size >= 4*self._left._size:\n", " self._balance()\n", " return True\n", " else:\n", " return False\n", " \n", " # replaces the content of self with the content of a dyn. array (Python list)\n", " #\n", " def copy_from_dynarray(self, dynarray):\n", " self.clear()\n", " for el in dynarray:\n", " self.insert(el)\n", " \n", " # traversal of the binary search tree\n", " #\n", " # appends the content of self, in ascending order, to a Python list (dyn. array)\n", " #\n", " def append_to_dynarray(self, dynarray):\n", " if self._left is not None:\n", " self._left.append_to_dynarray(dynarray)\n", " dynarray.append(self._item)\n", " if self._right is not None:\n", " self._right.append_to_dynarray(dynarray)\n", " \n", " # traversal of the binary search tree\n", " #\n", " def write_into_array(self, arr, idx):\n", " if self._left is not None:\n", " idx = self._left.write_into_array(arr, idx)\n", " if self._item is not None:\n", " arr[idx] = self._item\n", " idx += 1\n", " if self._right is not None:\n", " idx = self._right.write_into_array(arr, idx)\n", " return idx\n", " \n", " # returns a string representing the tree structure\n", " #\n", " def to_string(self):\n", " if self._item is None:\n", " return \"[]\"\n", " if self._left is None and self._right is None:\n", " return str(self._item)\n", " content = str(self._item)\n", " if self._left is not None:\n", " content = content + \" -> [\" + self._left.to_string() + \"]\"\n", " else:\n", " content = content + \" -> []\"\n", " if self._right is not None:\n", " content = content + \", [\" + self._right.to_string() + \"]\"\n", " else:\n", " content = content + \", []\"\n", " return content\n", "\n", " # returns the/a node containing an item with a certain value\n", " #\n", " # if no such node exists, None is returned\n", " #\n", " def _find_node(self, value):\n", " if self._item is None:\n", " return None\n", " elif self._item == value:\n", " return self\n", " elif value < self._item:\n", " if self._left is None:\n", " return None\n", " else:\n", " return self._left._find_node(value)\n", " else:\n", " if self._right is None:\n", " return None\n", " else:\n", " return self._right._find_node(value)\n", " \n", " # rebalance the tree\n", " #\n", " def _balance(self):\n", " size = self._size\n", " arr = np.empty(size+1, dtype=int)\n", " self.write_into_array(arr, 0) # note that this array is already sorted\n", " self.clear() # now we have all in the temporary storage and can rebuild the tree\n", " self._rebuild_from_sorted_array(arr, 0, size-1) # recursively insert all data from the array\n", "\n", " def _rebuild_from_sorted_array(self, x, idx_min, idx_max): # range x[idx_min: idx_max+1]\n", " # print(\"\\treinserting\", x[idx_min: idx_max+1], \"; \", idx_min, idx_max)\n", " idx_mid = (idx_min + idx_max) // 2 # mid point as in binary search\n", " # print(\"\\t\\treinserting\", x[idx_mid], \"; \", idx_mid)\n", " self._item = x[idx_mid]\n", " if idx_mid-1 >= idx_min:\n", " self._left = BalancedBST()\n", " self._left._rebuild_from_sorted_array(x, idx_min, idx_mid-1)\n", " if idx_max >= idx_mid+1:\n", " self._right = BalancedBST()\n", " self._right._rebuild_from_sorted_array(x, idx_mid+1, idx_max)\n", " self._size = idx_max - idx_min + 1" ] }, { "cell_type": "code", "execution_count": 3, "id": "6297adf5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "test_list: [8, 4, 2, 11, 13, 10, 5, 6, 1, 0, 12, 9, 3, 7]\n", "\n", "test_tree: 8 -> [4 -> [2 -> [1 -> [0], []], [3]], [5 -> [], [6 -> [], [7]]]], [11 -> [10 -> [9], []], [13 -> [12], []]]\n", "root node item: 8\n", "\n", "does the tree contain 4?: True\n", "test that the node with 4 is found: 4\n", "can 4 be successfully deleted?: True\n", "\n", "test_tree: 8 -> [3 -> [2 -> [1 -> [0], []], []], [5 -> [], [6 -> [], [7]]]], [11 -> [10 -> [9], []], [13 -> [12], []]]\n", "root node item: 8\n", "does the tree contain 4?: False\n" ] } ], "source": [ "# test for the simple BST\n", "#\n", "import random\n", "\n", "test_list = [i for i in range(14)]\n", "random.shuffle(test_list)\n", "print(\"test_list:\", test_list)\n", "\n", "test_tree = BinarySearchTree()\n", "test_tree.copy_from_dynarray(test_list)\n", "\n", "print(\"\\ntest_tree:\", test_tree.to_string())\n", "print(\"root node item:\", test_tree.get_root_item())\n", "\n", "print(\"\\ndoes the tree contain 4?:\", test_tree.contains(4))\n", "print(\"test that the node with 4 is found:\", test_tree._find_node(4).get_root_item())\n", "\n", "print(\"can 4 be successfully deleted?:\", test_tree.delete(4))\n", "\n", "print(\"\\ntest_tree:\", test_tree.to_string())\n", "print(\"root node item:\", test_tree.get_root_item())\n", "\n", "print(\"does the tree contain 4?:\", test_tree.contains(4))" ] }, { "cell_type": "code", "execution_count": 4, "id": "115a55d7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "test_list: [11, 7, 13, 1, 6, 10, 8, 9, 2, 12, 0, 4, 5, 3]\n", "\n", "test_tree: 8 -> [2 -> [1 -> [0], []], [6 -> [4 -> [3], [5]], [7]]], [11 -> [9 -> [], [10]], [12 -> [], [13]]]\n", "root node item: 8\n", "rebalancing\n", "test_tree: 6 -> [2 -> [0 -> [], [1]], [4 -> [3], [5]]], [10 -> [8 -> [7], [9]], [12 -> [11], [13]]]\n", "root node item: 6\n", "\n", "does the tree contain 4?: True\n", "test that the node with 4 is found: 4\n", "can 4 be successfully deleted?: True\n", "\n", "test_tree: 6 -> [2 -> [0 -> [], [1]], [3 -> [], [5]]], [10 -> [8 -> [7], [9]], [12 -> [11], [13]]]\n", "root node item: 6\n", "rebalancing\n", "test_tree: 7 -> [2 -> [0 -> [], [1]], [5 -> [3], [6]]], [10 -> [8 -> [], [9]], [12 -> [11], [13]]]\n", "root node item: 7\n", "\n", "does the tree contain 4?: False\n" ] } ], "source": [ "# test for the balanced BST\n", "#\n", "\n", "import random\n", "\n", "test_list = [i for i in range(14)]\n", "random.shuffle(test_list)\n", "print(\"test_list:\", test_list)\n", "\n", "test_tree = BalancedBST()\n", "test_tree.copy_from_dynarray(test_list)\n", "\n", "print(\"\\ntest_tree:\", test_tree.to_string())\n", "print(\"root node item:\", test_tree.get_root_item())\n", "print(\"rebalancing\")\n", "test_tree._balance()\n", "print(\"test_tree:\", test_tree.to_string())\n", "print(\"root node item:\", test_tree.get_root_item())\n", "\n", "print(\"\\ndoes the tree contain 4?:\", test_tree.contains(4))\n", "print(\"test that the node with 4 is found:\", test_tree._find_node(4).get_root_item())\n", "\n", "print(\"can 4 be successfully deleted?:\", test_tree.delete(4))\n", "\n", "print(\"\\ntest_tree:\", test_tree.to_string())\n", "print(\"root node item:\", test_tree.get_root_item())\n", "print(\"rebalancing\")\n", "test_tree._balance()\n", "print(\"test_tree:\", test_tree.to_string())\n", "print(\"root node item:\", test_tree.get_root_item())\n", "\n", "print(\"\\ndoes the tree contain 4?:\", test_tree.contains(4))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }