diff --git a/.gitignore b/.gitignore index 13c103cd..6533464f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ __pycache__/ # C extensions *.so +# virtualenv +bnr/ + # Distribution / packaging .Python build/ diff --git a/BNReasoner.py b/BNReasoner.py index 34975ac6..4783ceab 100644 --- a/BNReasoner.py +++ b/BNReasoner.py @@ -1,5 +1,7 @@ from typing import Union from BayesNet import BayesNet +import pandas as pd +import itertools class BNReasoner: @@ -15,4 +17,128 @@ def __init__(self, net: Union[str, BayesNet]): else: self.bn = net - # TODO: This is where your methods should go + def query(self, query_var: str, evidence: dict) -> float: + """ + :param query_var: name of the query variable + :param evidence: dictionary of evidence variables and their values + :return: the probability of the query variable given the evidence + """ + pass + + def prune(self, query_var: str, evidence: dict) -> BayesNet: + """ Edge-prune the Bayesian network s.t. queries of the form P(Q|E) can still be correctly calculated. + :param evidence: dictionary of evidence variables and their values + :return: a new BayesNet object with the evidence variables removed + """ + pass + + def d_separation(self, x: str, y: str, z: list) -> bool: + """ determine whether X is d-separated of Y given Z. + + :param x: name of variable x + :param y: name of variable y + :param z: list of variables z + :return: True if x and y are d-separated given z, False otherwise + """ + pass + + def independence(self, x: str, y: str, z: list) -> bool: + """ determine whether X is independent of Y given Z. + + :param x: name of variable x + :param y: name of variable y + :param z: list of variables z + :return: True if x and y are independent given z, False otherwise + """ + pass + + def marginalization(self, x: str, factor: pd.DataFrame) -> pd.DataFrame: + """ + param x: name of variable x + param factor: CPT of variables + return: Factor in which X is summed-out + """ + updated_factor_variables = [v for v in factor.columns if v != x] + updated_factor_variables.pop() + + updated_factor = factor.groupby(updated_factor_variables).sum() + updated_factor.reset_index(inplace=True) + updated_factor.drop(columns = [x], inplace = True) + return updated_factor + + + def maxing_out(self, x: str, factor: pd.DataFrame) -> pd.DataFrame: + """ + :param x: name of variable x + :param factor: dictionary of evidence variables and their values + :return: the CPT in which X is maxed-out + """ + updated_factor_variables = [v for v in factor.columns if v != x] + updated_factor_variables.pop() + + updated_factor = factor.groupby(updated_factor_variables).max() + updated_factor.reset_index(inplace=True) + updated_factor.drop(columns = [x], inplace = True) + return updated_factor + + def factor_multiplication(self, f: pd.DataFrame, g: pd.DataFrame) -> pd.DataFrame: + """ Given two factors, compute the product of the two factors. h = fg + + :param f: factor 1 + :param g: factor 2 + :return: the product of the two factors + """ + pass + + def ordering(self, x: str, heuristic: str) -> list: + """ Given X, compute a good ordering for the elimination of X based on the heuristic + + :return: a topological ordering of the variables in the Bayesian network + """ + pass + + def variable_elimination(self, x: str, evidence: dict) -> float: + """ Given a variable X and evidence E, compute the probability of X given E using variable elimination. + + :param x: name of variable x + :param evidence: dictionary of evidence variables and their values + :return: the probability of x given the evidence + """ + pass + + def marginal_distributions(self, query_variables: str, evidence: dict) -> dict: + """ Given query variables Q, evidence E, compute the marginal distributions. + + :param evidence: dictionary of evidence variables and their values + :return: a dictionary of the marginal distributions of all variables in the Bayesian network + """ + pass + + def MAP(self, evidence: dict) -> dict: + """ Given evidence E, compute the MAP assignment of query variables in the Bayesian network. + + :param evidence: dictionary of evidence variables and their values + :return: a dictionary of the MAP assignment of query variables in the Bayesian network + """ + pass + + def MEP(self, evidence: dict) -> dict: + """ Given evidence E, compute the MEP assignment of query variables in the Bayesian network. + + :param evidence: dictionary of evidence variables and their values + :return: a dictionary of the MEP assignment of query variables in the Bayesian network + """ + pass + +if __name__ == '__main__': + # Playground for testing your code + rnr = BNReasoner('testing/lecture_example.bifxml') + a = rnr.bn + # a = BNReasoner('testing/lecture_example2.bifxml').bn + #a.draw_structure() + # print(a.get_all_cpts().keys()) + # print(a.get_all_cpts().values()) + print(a.get_cpt('Wet Grass?')) + rnr.marginalization('Wet Grass?', a.get_cpt('Wet Grass?')) + print(rnr.maxing_out('Wet Grass?', a.get_cpt('Wet Grass?'))) + # a.get_compatible_instantiations_table('B', {'A': 0, 'C': 1}) \ No newline at end of file diff --git a/BNReasoner1st.py b/BNReasoner1st.py new file mode 100644 index 00000000..68b46ba5 --- /dev/null +++ b/BNReasoner1st.py @@ -0,0 +1,64 @@ +from typing import Union +from BayesNet import BayesNet +import pandas as pd + +class BNReasoner: + def __init__(self, net: Union[str, BayesNet]): + """ + :param net: either file path of the bayesian network in BIFXML format or BayesNet object + """ + if type(net) == str: + # constructs a BN object + self.bn = BayesNet() + # Loads the BN from an BIFXML file + self.bn.load_from_bifxml(net) + else: + self.bn = net + + + + def NetworkPrune(self,query:list, evidence: pd.Series,) -> BayesNet: + """ + Edge-prunes and iteratively Node-prunes the Bayesian network s.t. queries of the form P(Q|E) can still be correctly calculated. + :param query: a list of variables (str) containing the query + :param evidence: a series of assignments as tuples. E.g.: pd.Series({"A": True, "B": False}) + :returns: The pruned version of the network w.r.t the query and evidence given. + """ + var_names = evidence.index.values + + # Performs edge pruning + for var in var_names: + childs = self.bn.get_children(var) + for child in childs: + # Removes edges + self.bn.del_edge((var,child)) + # Updates CPTs + self.bn.update_cpt(child,self.bn.get_compatible_instantiations_table(evidence,self.bn.get_cpt(child))) + + # Performs node pruning + union = list(var_names) + query + options = self.bn.get_all_variables() + + for var in union: # We only consider nodes that are neither in Q nor in e + options.remove(var) + + done = False + while not done: + done = True + for var in options: + childs = self.bn.get_children(var) + if childs == []: # If there are still leaf nodes, we delete them and iter one more time + self.bn.del_var(var) + options.remove(var) + done = False + +a = BNReasoner('testing/dog_problem.bifxml') +print(a.bn.get_children("light-on")) +a.NetworkPrune(["dog-out","family-out"],pd.Series({"bowel-problem":False})) +a.bn.draw_structure() +print(a.bn.get_cpt("dog-out")) + +# a = BNReasoner('testing/lecture_example2.bifxml').bn +# a.draw_structure() +# a.del_var("X") +# a.draw_structure() \ No newline at end of file