Skip to content

Reference for coalescent approach in multitask learning

bmm_multitask_learning.coalescent

coalescent_inference

CoalescentTree

Source code in bmm_multitask_learning/coalescent/coalescent_inference.py
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
class CoalescentTree:
    def __init__(self, leaves: list[Item], cov, dim):
        self.leaves = leaves
        self.items, _ = self._greedy_rate_brownian(leaves,  cov, dim)

    def iterate_over(self) -> Iterator[Item]:
        """
        iterates over all elements in tree
        """
        for level in self.items:
            for node in level:
                yield node

    @staticmethod
    def select_candidates(x, cov, t_m1, n, dim):
        """
        Realise greedy selection of candidates to coalesce.
        For this consider all pairs of items and select minimal delta for coalesce

        More details in [2]
        """
        n_cur = len(x)
        i_items = n - n_cur

        l, r = None, None
        min_time = float("inf")

        for i in range(n_cur):
            for j in range(i + 1, n_cur):
                t_new = Item.optimal_t(x[i], x[j], cov, i_items, t_m1, n, dim)
                if t_new < min_time:
                    min_time, l, r = t_new, i, j
        return l, r, min_time

    def _greedy_rate_brownian(self, x: list[Item], cov, dim)\
            -> tuple[list[list[Item]], list[tuple[int, int]]]:
        """
        Realises greedy coalescent three construction
        More details in [2]

        :return: first argument is a constructed coalesce structure, 
            the second argument is a pointer to child elements
        """

        n = len(x)
        y = [x]
        coalesced_items = []
        t = 0
        for _ in range(n-1):
            coalecse_candidates = copy(y[-1])
            l, r, delta = self.select_candidates(coalecse_candidates,
                                                cov,
                                                t,
                                                n,
                                                dim)
            coalesced_items.append((l, r))
            t = t - delta
            new_item = Item.coalesce(coalecse_candidates[l],
                                    coalecse_candidates[r],
                                    t)

            coalecse_candidates[l].parent = new_item
            coalecse_candidates[r].parent = new_item

            coalecse_candidates[l] = new_item
            del coalecse_candidates[r]
            y.append(coalecse_candidates)
        return y, coalesced_items
iterate_over()

iterates over all elements in tree

Source code in bmm_multitask_learning/coalescent/coalescent_inference.py
68
69
70
71
72
73
74
def iterate_over(self) -> Iterator[Item]:
    """
    iterates over all elements in tree
    """
    for level in self.items:
        for node in level:
            yield node
select_candidates(x, cov, t_m1, n, dim) staticmethod

Realise greedy selection of candidates to coalesce. For this consider all pairs of items and select minimal delta for coalesce

More details in [2]

Source code in bmm_multitask_learning/coalescent/coalescent_inference.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
@staticmethod
def select_candidates(x, cov, t_m1, n, dim):
    """
    Realise greedy selection of candidates to coalesce.
    For this consider all pairs of items and select minimal delta for coalesce

    More details in [2]
    """
    n_cur = len(x)
    i_items = n - n_cur

    l, r = None, None
    min_time = float("inf")

    for i in range(n_cur):
        for j in range(i + 1, n_cur):
            t_new = Item.optimal_t(x[i], x[j], cov, i_items, t_m1, n, dim)
            if t_new < min_time:
                min_time, l, r = t_new, i, j
    return l, r, min_time

Item dataclass

class to handle the items of coalescent_tree

Source code in bmm_multitask_learning/coalescent/coalescent_inference.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
@dataclass
class Item:
    """
    class to handle the items of coalescent_tree
    """
    mean: list
    cov: float = 0.
    t: float = 0.
    parent = None

    @staticmethod
    def coalesce(first, other, t,):
        """
        This method coalesces two Item objects. Works for brownian diffusion.
        For more see [2]

        [2] Y.W. Teh, H. Dauḿe III, and D. Roy. 
            Bayesian agglomerative clustering with coalescents. NIPS, 2007.
        """
        # messages coalesce
        # assert (self.cov + self.t - t) > 0
        # assert (other.cov + other.t - t) > 0

        l_inv = 1 / (first.cov + (first.t - t))
        r_inv = 1 / (other.cov + (other.t - t))
        new_cov = 1/(l_inv + r_inv)
        new_mean = (first.mean * l_inv + other.mean * r_inv) * new_cov

        new_t = t
        return Item(new_mean, new_cov, new_t)

    @staticmethod
    def optimal_t(first, other, cov, i, t_m1, n, dim):
        """
        computes optimal delta to coalesce for proposed pair of items
        """
        c_ni = ((n - i + 1) * (n-i)/2)

        a = 1/4 * 1/c_ni * \
            ((4 * c_ni * m_norm(first.mean - other.mean, cov) +
                dim*dim) ** 0.5 - dim)

        b = 1/2 * (first.cov + other.cov + first.t + other.t - 2 * t_m1)

        assert a - b > 0, f"t_m1: {t_m1}, a: {a}, b: {b}"
        return a - b
coalesce(first, other, t) staticmethod

This method coalesces two Item objects. Works for brownian diffusion. For more see [2]

[2] Y.W. Teh, H. Dauḿe III, and D. Roy. Bayesian agglomerative clustering with coalescents. NIPS, 2007.

Source code in bmm_multitask_learning/coalescent/coalescent_inference.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
@staticmethod
def coalesce(first, other, t,):
    """
    This method coalesces two Item objects. Works for brownian diffusion.
    For more see [2]

    [2] Y.W. Teh, H. Dauḿe III, and D. Roy. 
        Bayesian agglomerative clustering with coalescents. NIPS, 2007.
    """
    # messages coalesce
    # assert (self.cov + self.t - t) > 0
    # assert (other.cov + other.t - t) > 0

    l_inv = 1 / (first.cov + (first.t - t))
    r_inv = 1 / (other.cov + (other.t - t))
    new_cov = 1/(l_inv + r_inv)
    new_mean = (first.mean * l_inv + other.mean * r_inv) * new_cov

    new_t = t
    return Item(new_mean, new_cov, new_t)
optimal_t(first, other, cov, i, t_m1, n, dim) staticmethod

computes optimal delta to coalesce for proposed pair of items

Source code in bmm_multitask_learning/coalescent/coalescent_inference.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
@staticmethod
def optimal_t(first, other, cov, i, t_m1, n, dim):
    """
    computes optimal delta to coalesce for proposed pair of items
    """
    c_ni = ((n - i + 1) * (n-i)/2)

    a = 1/4 * 1/c_ni * \
        ((4 * c_ni * m_norm(first.mean - other.mean, cov) +
            dim*dim) ** 0.5 - dim)

    b = 1/2 * (first.cov + other.cov + first.t + other.t - 2 * t_m1)

    assert a - b > 0, f"t_m1: {t_m1}, a: {a}, b: {b}"
    return a - b

coalescent_learner

MultitaskProblem

bayessian optimizer for Multitask Learning based on Coalescent [1]

To use initialize with list of TaskData and call run. Then trained weights will be available by method get_weights()

[1] @article{daume2009bayesian, title={Bayesian multitask learning with latent hierarchies}, author={Daum{'e} III, Hal}, journal={arXiv preprint arXiv:0907.0783}, year={2009} }

Source code in bmm_multitask_learning/coalescent/coalescent_learner.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
class MultitaskProblem:
    """
    bayessian optimizer for Multitask Learning based on Coalescent [1]

    To use initialize with list of TaskData and call run. Then trained weights 
    will be available by method get_weights()




    [1] @article{daume2009bayesian,
            title={Bayesian multitask learning with latent hierarchies},
            author={Daum{\'e} III, Hal},
            journal={arXiv preprint arXiv:0907.0783},
            year={2009}
        }

    """
    def __init__(self,
            tasks: list[TaskData],
            dim,
            rho=0.05,
            cov_sigma=0.1,
            s_init=None
        ):
        """

        :param tasks: list of TaskData, which will be learned
        :param dim: dimention of problems
        :param rho: parameter that scales noise level in labels
        :param cov_sigma: covariance matrix scaler for Coalescent evolution variation
        :param s_init: initial values for S_i: variance scales of data
        """

        self.tasks = tasks
        self.K = len(tasks)
        self.dim = dim
        self.rho = 0.05
        self.R_distr = InverseWishart(dim, dim+1, corr_mat=True)

        self.cov_sigma = cov_sigma
        self.L_distr = InverseWishart(dim, dim+1,  cov_sigma * np.eye(dim), corr_mat=False)

        if s_init is None:
            s_init = np.zeros((self.K, dim), dtype=float)
            for i in range(self.K):
                S = np.random.randn(dim)/5
                s_init[i] = S
        else:
            assert s_init.shape == (self.K, dim), f"provided s_init have\
                incorrect shape: {s_init.shape=} instead of {(self.K, dim)}"

        self.S_leaves: S_handler = S_handler(s_init)
        self.weights = np.zeros((self.K, dim), dtype=float)

        self._trained = False

    def get_weights(self):
        if not self._trained:
            raise Warning("first call fit() method of trainer.")
        return self.weights

    def fit(self, n_steps=100):
        assert n_steps > 0 and isinstance(n_steps, int), "number of steps should be positive integer"

        # first setup the weights as in simple linear regression
        self._update_weights(
            tasks=self.tasks,
            weights_mp=self.weights,
            S_leaves=None,
            R=None,
            K=self.K,
            rho=self.rho,
            cov=np.eye(self.dim)
        )

        # method iteration
        for _ in range(n_steps):
            # get the most probable parameters
            R = self.R_distr.get_most_prob()
            L = self.L_distr.get_most_prob()


        # inference on coalescent tree.
        # Integrate out the evoulution of covariance
            leaves = [Item(elem, cov=0) for elem in self.S_leaves.s_list]
            coalescent_tree = CoalescentTree(leaves, L, self.dim)

        # gradient optimization of S_leaves
        # based on generated tree and parameters
            self.S_leaves.update_param(R, L, self.weights, coalescent_tree)

        # update the posteriors based on new tree and weights
            # update covariance matrix posterior
            L_samples = optimal_cov(coalescent_tree, self.dim)
            self.L_distr.update_posterior(L_samples)


            # update correlation matrix_posterior
            R_samples = optimal_R(self.S_leaves.s_list, self.weights,)
            self.R_distr.update_posterior(R_samples)

            # update weights based on new posterior
            self._update_weights(
                tasks=self.tasks,
                weights_mp=self.weights,
                S_leaves=self.S_leaves.s_list,
                R=R,
                K=self.K,
                rho=self.rho,
                cov=None
            )

        self._trained = True

    def _update_weights(self, tasks, weights_mp, S_leaves, R, K, rho, cov=None):
        for i in range(K):
            if cov is None:
                S = np.diag(S_leaves[i])
                cov_i = np.exp(S) @ R @ np.exp(S)
            else:
                cov_i = cov

            w = tasks[i].most_prob_w(cov_i, rho)
            weights_mp[i] = w
__init__(tasks, dim, rho=0.05, cov_sigma=0.1, s_init=None)

:param tasks: list of TaskData, which will be learned :param dim: dimention of problems :param rho: parameter that scales noise level in labels :param cov_sigma: covariance matrix scaler for Coalescent evolution variation :param s_init: initial values for S_i: variance scales of data

Source code in bmm_multitask_learning/coalescent/coalescent_learner.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def __init__(self,
        tasks: list[TaskData],
        dim,
        rho=0.05,
        cov_sigma=0.1,
        s_init=None
    ):
    """

    :param tasks: list of TaskData, which will be learned
    :param dim: dimention of problems
    :param rho: parameter that scales noise level in labels
    :param cov_sigma: covariance matrix scaler for Coalescent evolution variation
    :param s_init: initial values for S_i: variance scales of data
    """

    self.tasks = tasks
    self.K = len(tasks)
    self.dim = dim
    self.rho = 0.05
    self.R_distr = InverseWishart(dim, dim+1, corr_mat=True)

    self.cov_sigma = cov_sigma
    self.L_distr = InverseWishart(dim, dim+1,  cov_sigma * np.eye(dim), corr_mat=False)

    if s_init is None:
        s_init = np.zeros((self.K, dim), dtype=float)
        for i in range(self.K):
            S = np.random.randn(dim)/5
            s_init[i] = S
    else:
        assert s_init.shape == (self.K, dim), f"provided s_init have\
            incorrect shape: {s_init.shape=} instead of {(self.K, dim)}"

    self.S_leaves: S_handler = S_handler(s_init)
    self.weights = np.zeros((self.K, dim), dtype=float)

    self._trained = False

S_handler

class to incapsulate the methods for updating covariance scalers S

Source code in bmm_multitask_learning/coalescent/coalescent_learner.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
class S_handler:
    """
    class to incapsulate the methods for updating covariance scalers S
    """
    def __init__(self, s_list):
        self.s_list = s_list
        self.K = len(self.s_list)

    def log_prob_S(self, s, R_inv, L_inv, P, W):
        """
        returns the log probability of proposed parameters for S
        """
        s_neg = -s
        s_max = np.max(s_neg)
        s_neg = s_neg - s_max
        s_exp = np.exp(s_neg) * np.exp(s_max)
        if len(s_exp.shape) > 1:
            s_exp = s_exp.squeeze()
        S_exp = np.diag(s_exp)

        S = np.diag(s)

        cov_m = S_exp @ R_inv @ S_exp
        return - np.trace(S) - 1/2 * np.trace((S - P) @ L_inv @ (S - P)) - 1/2 * np.trace(W @ cov_m @ W)

    def _optimize_s(self, R, L, w, p, s_0, verbose=False):
        """
        This method is to optimize for given parameters
        """
        L_inv = np.linalg.inv(L)
        R_inv = np.linalg.inv(R)
        W = np.diag(w)
        P = np.diag(p)
        d = R.shape[0]

        def grad_S(s):
            s_neg = -s
            s_max = np.max(s_neg)
            s_neg = s_neg - s_max
            s_exp = np.exp(s_neg) * np.exp(s_max)
            if len(s_exp.shape) > 1:
                s_exp = s_exp.squeeze()
            S_exp = np.diag(s_exp)

            S = np.diag(s)

            cov_m = S_exp @ R_inv @ S_exp
            return (- np.eye(d) - (S - P) @ L_inv + W @ cov_m @ W).diagonal()

        if verbose:
            print(f"[INFO] log prob start: {self.log_prob_S(s_0, R_inv, L_inv, P, W)}")

        optimizer = grad_optimizer(100, 0.001, grad_S)
        s_opt = optimizer.run(s_0)

        if verbose:
            print(f"[INFO] log prob trained: {self.log_prob_S(s_opt)}")

        return s_opt

    def update_param(self, R, L, weights,
                    coalescent_tree: CoalescentTree,
                    verbose=False):
        for i in range(self.K):
            parent_s = coalescent_tree.leaves[i].parent.mean
            w = weights[i]
            s_0 = self.s_list[i]
            self.s_list[i] = self._optimize_s(R, L, w, parent_s, s_0, verbose)
log_prob_S(s, R_inv, L_inv, P, W)

returns the log probability of proposed parameters for S

Source code in bmm_multitask_learning/coalescent/coalescent_learner.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
def log_prob_S(self, s, R_inv, L_inv, P, W):
    """
    returns the log probability of proposed parameters for S
    """
    s_neg = -s
    s_max = np.max(s_neg)
    s_neg = s_neg - s_max
    s_exp = np.exp(s_neg) * np.exp(s_max)
    if len(s_exp.shape) > 1:
        s_exp = s_exp.squeeze()
    S_exp = np.diag(s_exp)

    S = np.diag(s)

    cov_m = S_exp @ R_inv @ S_exp
    return - np.trace(S) - 1/2 * np.trace((S - P) @ L_inv @ (S - P)) - 1/2 * np.trace(W @ cov_m @ W)

inverse_wishart

InverseWishart

class to handle methods of InverseWishart distribution

Source code in bmm_multitask_learning/coalescent/inverse_wishart.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
class InverseWishart:
    """
    class to handle methods of InverseWishart distribution
    """
    def __init__(self, d, degrees_of_freedom, init_cov=None, corr_mat=False):
        """
        :param d: dimension of the covariance matrix
        :param degrees_of_freedom: degrees of freedom of the
            inverse Wishart distribution
        :param init_cov: initial covariance matrix
        :param corr_mat: if True, the covariance matrix is a correlation
            matrix (i.e., it has ones on the diagonal)
        """
        self.d = d
        if init_cov is None:
            init_cov = np.eye(d)
        else:
            assert init_cov.shape == (d, d)

        self.cov = init_cov
        self.degrees_of_freedom = degrees_of_freedom
        self.corr_mat = corr_mat

    @staticmethod
    def cov2corr(cov):
        """
        Conver covariance matrox to correlation matrix
        """
        if len(cov.shape) == 0:
            cov = cov[None]

        std_devs = 1/np.sqrt(np.diag(cov))
        D = np.diag(std_devs)

        d = len(cov.shape)

        if d == 1:
            D = D[None]
            cov = cov[None]
        R = D @ cov @ D
        return R

    def update_posterior(self, samples):
        """
        :param samples: shape=(n x d)
        """
        assert len(samples.shape) == 2 and samples.shape[1] == self.d
        K = samples.shape[0]
        self.degrees_of_freedom += K

        S = samples.T @ samples  # d x d
        self.cov += S
        return

    def get_most_prob(self):
        """
        returns the mode of the inverse Wishart distribution
        """
        cov = self.cov
        cov = cov/(self.degrees_of_freedom + self.d + 1)

        if self.corr_mat:
            cov = InverseWishart.cov2corr(cov)
        return cov
__init__(d, degrees_of_freedom, init_cov=None, corr_mat=False)

:param d: dimension of the covariance matrix :param degrees_of_freedom: degrees of freedom of the inverse Wishart distribution :param init_cov: initial covariance matrix :param corr_mat: if True, the covariance matrix is a correlation matrix (i.e., it has ones on the diagonal)

Source code in bmm_multitask_learning/coalescent/inverse_wishart.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
def __init__(self, d, degrees_of_freedom, init_cov=None, corr_mat=False):
    """
    :param d: dimension of the covariance matrix
    :param degrees_of_freedom: degrees of freedom of the
        inverse Wishart distribution
    :param init_cov: initial covariance matrix
    :param corr_mat: if True, the covariance matrix is a correlation
        matrix (i.e., it has ones on the diagonal)
    """
    self.d = d
    if init_cov is None:
        init_cov = np.eye(d)
    else:
        assert init_cov.shape == (d, d)

    self.cov = init_cov
    self.degrees_of_freedom = degrees_of_freedom
    self.corr_mat = corr_mat
cov2corr(cov) staticmethod

Conver covariance matrox to correlation matrix

Source code in bmm_multitask_learning/coalescent/inverse_wishart.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@staticmethod
def cov2corr(cov):
    """
    Conver covariance matrox to correlation matrix
    """
    if len(cov.shape) == 0:
        cov = cov[None]

    std_devs = 1/np.sqrt(np.diag(cov))
    D = np.diag(std_devs)

    d = len(cov.shape)

    if d == 1:
        D = D[None]
        cov = cov[None]
    R = D @ cov @ D
    return R
get_most_prob()

returns the mode of the inverse Wishart distribution

Source code in bmm_multitask_learning/coalescent/inverse_wishart.py
59
60
61
62
63
64
65
66
67
68
def get_most_prob(self):
    """
    returns the mode of the inverse Wishart distribution
    """
    cov = self.cov
    cov = cov/(self.degrees_of_freedom + self.d + 1)

    if self.corr_mat:
        cov = InverseWishart.cov2corr(cov)
    return cov
update_posterior(samples)

:param samples: shape=(n x d)

Source code in bmm_multitask_learning/coalescent/inverse_wishart.py
47
48
49
50
51
52
53
54
55
56
57
def update_posterior(self, samples):
    """
    :param samples: shape=(n x d)
    """
    assert len(samples.shape) == 2 and samples.shape[1] == self.d
    K = samples.shape[0]
    self.degrees_of_freedom += K

    S = samples.T @ samples  # d x d
    self.cov += S
    return

parameters_cov

optimal_cov(coalesce_tree, dim)

retunrns samples from covariance matrix for InverseWishart distribution parameters update

Source code in bmm_multitask_learning/coalescent/parameters_cov.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def optimal_cov(coalesce_tree: CoalescentTree, dim):
    """
    retunrns samples from covariance matrix
    for InverseWishart distribution parameters update
    """
    sample_mean = np.zeros((dim, ), dtype=float)
    overall_items = 0

    # compute sample mean
    for item in coalesce_tree.iterate_over():
        parent = item.parent
        if parent is not None:
            overall_items += 1
            dt = item.t - parent.t
            D_i = item.mean - parent.mean
            x_i = D_i / np.sqrt(dt)
            sample_mean += x_i
    sample_mean /= overall_items


    # compute samples
    samples = []

    for item in coalesce_tree.iterate_over():
        parent = item.parent
        if parent is not None:
            dt = item.t - parent.t
            D_i = item.mean - parent.mean
            x_i = D_i / np.sqrt(dt)
            samples.append(x_i - sample_mean)

    return np.array(samples)

plot_coalescent

task_classes

TaskData dataclass

Source code in bmm_multitask_learning/coalescent/task_classes.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
@dataclass
class TaskData:
    X: np.ndarray  # (n_task, d)
    y: np.ndarray  # (n_task, 1)

    def most_prob_w(self, sigma, rho):
        """
        :param sigma: cov matrix of prior of w
        :param rho: scaling coefficient for data
        """
        coeff = (1/rho**2)
        mean_est_uncent = coeff * (self.X * self.y).sum(0)
        cov_fixed = np.linalg.inv(np.linalg.inv(sigma) +
                            coeff * (self.X.T @ self.X))

        mean_est = cov_fixed @ (mean_est_uncent)
        return mean_est
most_prob_w(sigma, rho)

:param sigma: cov matrix of prior of w :param rho: scaling coefficient for data

Source code in bmm_multitask_learning/coalescent/task_classes.py
11
12
13
14
15
16
17
18
19
20
21
22
def most_prob_w(self, sigma, rho):
    """
    :param sigma: cov matrix of prior of w
    :param rho: scaling coefficient for data
    """
    coeff = (1/rho**2)
    mean_est_uncent = coeff * (self.X * self.y).sum(0)
    cov_fixed = np.linalg.inv(np.linalg.inv(sigma) +
                        coeff * (self.X.T @ self.X))

    mean_est = cov_fixed @ (mean_est_uncent)
    return mean_est

utils