Hello colleagues! This is the blog of the open Russian- speaking date of the Scientology Lodge . We are already a legion, more precisely 2500+ people in Slak. For a year and a half, we generated 800k + messages (for the sake of this, we gave us a corporate account. Our people are everywhere and maybe even in your organization. If you are interested in machine learning, but for some reason you don’t know about Open Data Science , then perhaps you are aware of the activities organized by the community. The most ambitious of them is DataFest , which was held recently at the office of Mail.Ru Group , in two days it was visited by 1,700 people. We grow, our lodges open in the cities of Russia, as well as in New York, Dubai and even in Lviv, yes, we do not fight, and sometimes we even drink strong drinks together. And yes, we are a non-profit organization, our goal is education. We do everything for the sake of art. (ps: in the photo you can watch the sitting of the lodge in one of the secret churches in Moscow).
It is said that the program is learning by experience. E regarding the class of tasks T in terms of quality measures L if when solving a problem T quality measured by measure L , increases with the demonstration of new experience E .
\ Large \ mathcal {H} = \ left \ {h \ left (x, \ theta \ right) | \ theta \ in \ Theta \ right \}
Large mathcalM: left(X timesY right)n rightarrow mathcalH
\ Large \ begin {array} {rcl} Q \ left (h \ right) & = & \ text {E} _ {x, y \ sim P \ left (x, y \ right)} \ left [L \ left (h \ left (x \ right), y \ right) \ right] \\ & = & \ int L \ left (h \ left (x \ right), y \ right) d P \ left (x, y \ right) \ end {array}
LargeQ textemp left(h right)= frac1n sumni=1L left(h left(xi right),yi right)
Large hath= arg minh in mathcalHQ textemp left(h right)
\ Large \ begin {array} {rcl} \ forall h \ in \ mathcal {H}, h \ left (\ vec {x} \ right) & = & w_0 x_0 + w_1 x_1 + w_2 x_2 + \ cdots + w_m x_m \\ & = & \ sum_ {i = 0} ^ m w_i x_i \\ & = & \ vec {x} ^ T \ vec {w} \ end {array}
\ Large \ begin {array} {rcl} \ mathcal {L} \ left (X, \ vec {y}, \ vec {w} \ right) & = & \ frac {1} {2n} \ sum_ {i = 1} ^ n \ left (y_i - \ vec {x} _i ^ T \ vec {w} _i \ right) ^ 2 \\ & = & \ frac {1} {2n} \ left \ | \ vec {y} - X \ vec {w} \ right \ | _2 ^ 2 \\ & = & \ frac {1} {2n} \ left (\ vec {y} - X \ vec {w} \ right) ^ T \ left (\ vec {y} - X \ vec {w} \ right) \ end {array}
\ Large \ begin {array} {rcl} \ frac {\ partial \ mathcal {L}} {\ partial \ vec {w}} & = & \ frac {\ partial} {\ partial \ vec {w}} \ frac {1} {2n} \ left (\ vec {y} ^ T \ vec {y} -2 \ vec {y} ^ TX \ vec {w} + \ vec {w} ^ TX ^ TX \ vec {w } \ right) \\ & = & \ frac {1} {2n} \ left (-2 X ^ T \ vec {y} + 2X ^ TX \ vec {w} \ right) \ end {array}
\ Large \ begin {array} {rcl} \ frac {\ partial \ mathcal {L}} {\ partial \ vec {w} = 0 & \ Leftrightarrow & \ frac {1} {2n} \ left (-2 x ^ T \ vec {y} + 2X ^ TX \ vec {w} \ right) = 0 \\ & \ Leftrightarrow & -X ^ T \ vec {y} + X ^ TX \ vec {w} = 0 \\ & \ Leftrightarrow & X ^ TX \ vec {w} = X ^ T \ vec {y} \\ & \ Leftrightarrow & \ vec {w} = \ left (X ^ TX \ right) ^ {- 1} X ^ T \ vec {y } \ end {array}
def generate_wave_set(n_support=1000, n_train=25, std=0.3): data = {} # 0 2*pi data['support'] = np.linspace(0, 2*np.pi, num=n_support) # sin(x) + 1 # ground truth data['values'] = np.sin(data['support']) + 1 # support , data['x_train'] = np.sort(np.random.choice(data['support'], size=n_train, replace=True)) # sin(x) + 1 , data['y_train'] = np.sin(data['x_train']) + 1 + np.random.normal(0, std, size=data['x_train'].shape[0]) return data data = generate_wave_set(1000, 250) print 'Shape of X is', data['x_train'].shape print 'Head of X is', data['x_train'][:10] margin = 0.3 plt.plot(data['support'], data['values'], 'b--', alpha=0.5, label='manifold') plt.scatter(data['x_train'], data['y_train'], 40, 'g', 'o', alpha=0.8, label='data') plt.xlim(data['x_train'].min() - margin, data['x_train'].max() + margin) plt.ylim(data['y_train'].min() - margin, data['y_train'].max() + margin) plt.legend(loc='upper right', prop={'size': 20}) plt.title('True manifold and noised data') plt.xlabel('x') plt.ylabel('y') plt.show() 
# X = np.array([np.ones(data['x_train'].shape[0]), data['x_train']]).T # , , numpy # - h w = np.dot(np.dot(np.linalg.inv(np.dot(XT, X)), XT), data['y_train']) # : y_hat = np.dot(w, XT) margin = 0.3 plt.plot(data['support'], data['values'], 'b--', alpha=0.5, label='manifold') plt.scatter(data['x_train'], data['y_train'], 40, 'g', 'o', alpha=0.8, label='data') plt.plot(data['x_train'], y_hat, 'r', alpha=0.8, label='fitted') plt.xlim(data['x_train'].min() - margin, data['x_train'].max() + margin) plt.ylim(data['y_train'].min() - margin, data['y_train'].max() + margin) plt.legend(loc='upper right', prop={'size': 20}) plt.title('Fitted linear regression') plt.xlabel('x') plt.ylabel('y') plt.show() 
\ Large \ begin {array} {rcl} \ forall h \ in \ mathcal {H}, h \ left (x \ right) & = & w_0 + w_1 x + w_1 x ^ 2 + \ cdots + w_n x ^ p \\ & = & \ sum_ {i = 0} ^ p w_i x ^ i \ end {array}
# p , degree_list = [1, 2, 3, 5, 7, 10, 13] cmap = plt.get_cmap('jet') colors = [cmap(i) for i in np.linspace(0, 1, len(degree_list))] margin = 0.3 plt.plot(data['support'], data['values'], 'b--', alpha=0.5, label='manifold') plt.scatter(data['x_train'], data['y_train'], 40, 'g', 'o', alpha=0.8, label='data') w_list = [] err = [] for ix, degree in enumerate(degree_list): # dlist = [np.ones(data['x_train'].shape[0])] + \ map(lambda n: data['x_train']**n, range(1, degree + 1)) X = np.array(dlist).T w = np.dot(np.dot(np.linalg.inv(np.dot(XT, X)), XT), data['y_train']) w_list.append((degree, w)) y_hat = np.dot(w, XT) err.append(np.mean((data['y_train'] - y_hat)**2)) plt.plot(data['x_train'], y_hat, color=colors[ix], label='poly degree: %i' % degree) plt.xlim(data['x_train'].min() - margin, data['x_train'].max() + margin) plt.ylim(data['y_train'].min() - margin, data['y_train'].max() + margin) plt.legend(loc='upper right', prop={'size': 20}) plt.title('Fitted polynomial regressions') plt.xlabel('x') plt.ylabel('y') plt.show() 
| p | error |
|---|---|
| one | 0.26704 |
| 2 | 0.22495 |
| 3 | 0.08217 |
| five | 0.05862 |
| 7 | 0.05749 |
| ten | 0.0532 |
| 13 | 5.76155 |
np.linalg.eigvals(np.cov(X[:, 1:].T)) Out[10]: array([ 9.29965299e+17+0.j , 4.04567033e+13+0.j , 5.44657111e+09+0.j , 3.54104756e+06+0.j , 8.36745166e+03+0.j , 6.82745279e+01+0.j , 8.88434986e-01+0.j , 2.42827315e-02+0.00830052j, 2.42827315e-02-0.00830052j, 1.17621840e-03+0.j , 1.72254789e-04+0.j , -5.68384880e-06+0.j , 2.39611454e-07+0.j ]) 
There is only one remark - it is not necessary to use the formula `(X ^ TX ^ {- 1}) X ^ T` to calculate the linear regression coefficients. The problem with divergent values is well known and in practice use `QR` or` SVD`.
Well, that is, here is such a piece of code will give quite a decent result:degree = 13 dlist = [np.ones(data['x_train'].shape[0])] + \ list(map(lambda n: data['x_train']**n, range(1, degree + 1))) X = np.array(dlist).T q, r = np.linalg.qr(X) y_hat = np.dot(np.dot(q, qT), data['y_train']) plt.plot(data['x_train'], y_hat, label='poly degree: %i' % degree)
for ix, t in enumerate(w_list): degree, w = t fig, ax = plt.subplots() plt.bar(range(max(degree_list) + 1), np.hstack((w, [0]*(max(degree_list) - w.shape[0] + 1))), color=colors[ix]) plt.title('Magnitude of fitted LR parameters for poly:%i' % degree) plt.xlabel('degree') plt.ylabel('value of w') ax.set_xticks(np.array(range(max(degree_list) + 1)) + 0.5) ax.set_xticklabels(range(max(degree_list) + 1)) plt.show() 





Large mathcalLreg left(X, vecy, vecw right)= mathcalL left(X, vecy, vecw right)+ lambdaR left( vecw right)
LargeR left( vecw right)= frac12 left | vecw right |22= frac12 summj=1w2j= frac12 vecwT vecw
Large mathcalL left(X, vecy, vecw right)= frac12 left( vecy−X vecw right)T left( vecy−X vecw right)+ frac lambda2 vecwT vecw
\ Large \ begin {array} {rcl} \ Large \ frac {\ partial \ mathcal {L}} {\ partial \ vec {w}} & = & \ frac {\ partial} {\ partial \ vec {w} } \ left (\ frac {1} {2} \ left (\ vec {y} - X \ vec {w} \ right) ^ T \ left (\ vec {y} - X \ vec {w} \ right) + \ frac {\ lambda} {2} \ vec {w} ^ T \ vec {w} \ right) \\ & = & \ frac {\ partial} {\ partial \ vec {w}} \ left (\ frac {1} {2} \ left (\ vec {y} ^ T \ vec {y} -2 \ vec {y} ^ TX \ vec {w} + \ vec {w} ^ TX ^ TX \ vec {w} \ right) + \ frac {\ lambda} {2} \ vec {w} ^ T \ vec {w} \ right) \\ & = & -X ^ T \ vec {y} + X ^ TX \ vec {w } + \ lambda \ vec {w} \ end {array}
\ Large \ begin {array} {rcl} \ frac {\ partial \ mathcal {L}} {\ partial \ vec {w}} = 0 & \ Leftrightarrow & -X ^ T \ vec {y} + X ^ TX \ vec {w} + \ lambda \ vec {w} = 0 \\ & \ Leftrightarrow & X ^ TX \ vec {w} + \ lambda \ vec {w} = X ^ T \ vec {y} \\ & \ Leftrightarrow & \ left (X ^ TX + \ lambda E \ right) \ vec {w} = X ^ T \ vec {y} \\ & \ Leftrightarrow & \ vec {w} = \ left (X ^ TX + \ lambda E \ right) ^ {- 1} X ^ T \ vec {y} \ end {array}

# define regularization parameter lmbd = 0.1 degree_list = [1, 2, 3, 10, 12, 13] cmap = plt.get_cmap('jet') colors = [cmap(i) for i in np.linspace(0, 1, len(degree_list))] margin = 0.3 plt.plot(data['support'], data['values'], 'b--', alpha=0.5, label='manifold') plt.scatter(data['x_train'], data['y_train'], 40, 'g', 'o', alpha=0.8, label='data') w_list_l2 = [] err = [] for ix, degree in enumerate(degree_list): dlist = [[1]*data['x_train'].shape[0]] + map(lambda n: data['x_train']**n, range(1, degree + 1)) X = np.array(dlist).T w = np.dot(np.dot(np.linalg.inv(np.dot(XT, X) + lmbd*np.eye(X.shape[1])), XT), data['y_train']) w_list_l2.append((degree, w)) y_hat = np.dot(w, XT) plt.plot(data['x_train'], y_hat, color=colors[ix], label='poly degree: %i' % degree) err.append(np.mean((data['y_train'] - y_hat)**2)) plt.xlim(data['x_train'].min() - margin, data['x_train'].max() + margin) plt.ylim(data['y_train'].min() - margin, data['y_train'].max() + margin) plt.legend(loc='upper right', prop={'size': 20}) plt.title('Fitted polynomial regressions with L2 reg') plt.xlabel('x') plt.ylabel('y') plt.show() 
| p | error |
|---|---|
| one | 0.26748 |
| 2 | 0.22546 |
| 3 | 0.08803 |
| ten | 0.05833 |
| 12 | 0.05585 |
| 13 | 0.05638 |
for ix, t in enumerate(w_list_l2): degree, w = t fig, ax = plt.subplots() plt.bar(range(max(degree_list) + 1), np.hstack((w, [0]*(max(degree_list) - w.shape[0] + 1))), color=colors[ix]) plt.title('Magnitude of fitted LR parameters for poly:%i with L2 reg' % degree) plt.xlabel('degree') plt.ylabel('value of w') ax.set_xticks(np.array(range(max(degree_list) + 1)) + 0.5) ax.set_xticklabels(range(max(degree_list) + 1)) plt.show() 





LargeR left( vecw right)= left | vecw right |1= summj=1 left|wj right|
Large mathcalL left(X, vecy, vecw right)= frac12n sumni=1 left( vecxiT vecw−yi right)2+ lambda summj=1 left|wj right|
Large frac partial mathcalL partialwj= frac1n sumni=1 left( vecxiT vecw−yi right) vecxi+ lambda textsign(wj)
Large vecw textnew:= vecw− alpha frac partial mathcalL partial vecw
lmbd = 1 degree = 13 dlist = [[1]*data['x_train'].shape[0]] + map(lambda n: data['x_train']**n, range(1, degree + 1)) X = np.array(dlist).T # def mse(u, v): return ((u - v)**2).sum()/u.shape[0] # w = np.array([-1.0] * X.shape[1]) # n_iter = 20 # , lr = 0.00000001 loss = [] for ix in range(n_iter): w -= lr*(np.dot(np.dot(X, w) - data['y_train'], X)/X.shape[0] + lmbd*np.sign(w)) y_hat = np.dot(X, w) loss.append(mse(data['y_train'], y_hat)) print loss[-1] 1.3051230958e+38 1.21979102398e+58 1.14003816725e+78 1.06549974318e+98 9.95834819687e+117 9.30724755635e+137 8.69871743413e+157 8.12997446782e+177 7.59841727794e+197 7.10161456943e+217 6.63729401109e+237 6.20333184222e+257 5.79774315864e+277 5.41867283397e+297 inf inf inf inf inf inf \ Large \ begin {array} {rcl} \ overline {\ mu} _ {\ cdot j} & = & \ frac {1} {n} \ sum_ {i = 1} ^ n x_ {ij} \\ \ overline {\ sigma} _ {\ cdot j} & = & \ sqrt {\ frac {1} {n} \ sum_ {i = 1} ^ n \ left (x_ {ij} - \ overline {\ mu} _ { \ cdot j} \ right) ^ 2} \ end {array}
Large vecx textnew= frac vecx− overline mu overline sigma
lmbd = 1 degree = 13 dlist = [[1]*data['x_train'].shape[0]] + map(lambda n: data['x_train']**n, range(1, degree + 1)) X = np.array(dlist).T # x_mean = X.mean(axis=0) # x_std = X.std(axis=0) # X = (X - x_mean)/x_std X[:, 0] = 1.0 w = np.array([-1.0] * X.shape[1]) n_iter = 100 lr = 0.1 loss = [] for ix in range(n_iter): w -= lr*(np.dot(np.dot(X, w) - data['y_train'], X)/X.shape[0] + lmbd*np.sign(w)) y_hat = np.dot(X, w) loss.append(mse(data['y_train'], y_hat)) plt.plot(loss) plt.title('Train error') plt.xlabel('Iteration') plt.ylabel('MSE') plt.show() 
degree_list = [1, 2, 3, 10, 12, 13] cmap = plt.get_cmap('jet') colors = [cmap(i) for i in np.linspace(0, 1, len(degree_list))] margin = 0.3 plt.plot(data['support'], data['values'], 'b--', alpha=0.5, label='manifold') plt.scatter(data['x_train'], data['y_train'], 40, 'g', 'o', alpha=0.8, label='data') def mse(u, v): return ((u - v)**2).sum()/u.shape[0] def fit_lr_l1(X, y, lmbd, n_iter=100, lr=0.1): w = np.array([-1.0] * X.shape[1]) loss = [] for ix_iter in range(n_iter): w -= lr*(np.dot(np.dot(X, w) - y, X)/X.shape[0] +lmbd*np.sign(w)) y_hat = np.dot(X, w) loss.append(mse(y, y_hat)) return w, y_hat, loss w_list_l1 = [] for ix, degree in enumerate(degree_list): dlist = [[1]*data['x_train'].shape[0]] + map(lambda n: data['x_train']**n, range(1, degree + 1)) X = np.array(dlist).T x_mean = X.mean(axis=0) x_std = X.std(axis=0) X = (X - x_mean)/x_std X[:, 0] = 1.0 w, y_hat, loss = fit_lr_l1(X, data['y_train'], lmbd=0.05) w_list_l1.append((degree, w)) plt.plot(data['x_train'], y_hat, color=colors[ix], label='poly degree: %i' % degree) plt.xlim(data['x_train'].min() - margin, data['x_train'].max() + margin) plt.ylim(data['y_train'].min() - margin, data['y_train'].max() + margin) plt.legend(loc='upper right', prop={'size': 20}) plt.title('Fitted polynomial regressions with L1 reg') plt.xlabel('x') plt.ylabel('y') plt.show() 
| p | error |
|---|---|
| one | 0.27204 |
| 2 | 0.23794 |
| 3 | 0.24118 |
| ten | 0.18083 |
| 12 | 0.16069 |
| 13 | 0.15425 |
for ix, t in enumerate(w_list_l1): degree, w = t fig, ax = plt.subplots() plt.bar(range(max(degree_list) + 1), np.hstack((w, [0]*(max(degree_list) - w.shape[0] + 1))), color=colors[ix]) plt.title('Magnitude of fitted LR parameters for poly:%i with L1 reg' % degree) plt.xlabel('degree') plt.ylabel('value of w') ax.set_xticks(np.array(range(max(degree_list) + 1)) + 0.5) ax.set_xticklabels(range(max(degree_list) + 1)) plt.show() 





Large colorgreenp left(y midx right)= dfrac colororangep left(x midy right) colorbluep left(y right) colorredp left(x right)
Large theta textML= arg max thetap left(D mid theta right)
Largep left( theta midD right) proptop left(D mid theta right)p left( theta right)
Large theta textMAP= arg max thetap left( theta midD right)= arg max thetap left(D mid theta right)p left( theta right)
Largep(D)= prodni=1p(xi)
Largey= vecwT vecx+ epsilon, epsilon sim mathcalN left(0, sigma2 right)
Largep left(y mid vecx, vecw, sigma2 right)= mathcalN left(y mid vecwT vecx, sigma2 right)
Largep left( vecy midX, vecw, sigma2 right)= prodni=1 mathcalN left(yi mid vecwT vecxi, sigma2 right)
\ Large \ begin {array} {rcl} \ mathcal {L} & = & \ ln p \ left (\ vec {y} \ mid X, \ vec {w}, \ sigma ^ 2 \ right) \\ & = & \ ln \ prod_ {i = 1} ^ n \ mathcal {N} \ left (y_i \ mid \ vec {w} ^ T \ vec {x} _i, \ sigma ^ 2 \ right) \\ & = & \ ln \ frac {1} {\ left (\ sigma \ sqrt {2 \ pi} \ right) ^ n} e ^ {- \ frac {1} {2 \ sigma ^ 2} \ sum_ {i = 1} ^ n \ left (y_i - \ vec {w} ^ T \ vec {x} _i \ right) ^ 2} \\ & = & - \ frac {n} {2} \ ln 2 \ pi \ sigma ^ 2 - \ frac {1} {2 \ sigma ^ 2} \ sum_ {i = 1} ^ n \ left (y_i - \ vec {w} ^ T \ vec {x} _i \ right) ^ 2 \ end {array}
data = generate_wave_set(1000, 100) X = np.vstack((np.ones(data['x_train'].shape[0]), data['x_train'])).T w = np.dot(np.dot(np.linalg.inv(np.dot(XT, X)), XT), data['y_train']) w0_support = np.linspace(-3, 3, 1000) w1_support = np.linspace(-3, 3, 1000) # create cartesian product of parameters wx_space = list(it.product(w0_support, w1_support)) w0, w1 = zip(*wx_space) # calculate MSE on dataset for each pairs of parameters y = ((data['y_train'][:, np.newaxis] - np.dot(X, np.array(wx_space).T))**2).mean(axis=0) plt.hexbin(w0, w1, C=y**(0.2), cmap=cm.jet_r, bins=None) plt.axvline(0, color='black', linestyle='-', label='origin') plt.axhline(0, color='black', linestyle='-') plt.axvline(w[0], color='w', linestyle='--', label='ML solution') plt.axhline(w[1], color='w', linestyle='--') plt.axes().set_aspect('equal', 'datalim') plt.title('ML solution') plt.xlabel('w_0') plt.ylabel('w_1') plt.legend(loc='upper left', prop={'size': 20}) plt.show() 
p(x∣μ,σ)=1σ√2πe−(x−μ)22σ2
x = np.linspace(-5, 5, 1000) for scale in np.linspace(0.5, 1.4, 7): plt.plot(x, norm.pdf(x, scale=scale), label='scale=%0.2f' % scale) plt.legend(loc='upper right', prop={'size': 20}) plt.title('Normal distribution with different scale parameter') plt.show() 
p(→w∣→y,X,σ2)∝N(→w∣0,σ20E)n∏i=1N(yi∣→wT→xi,σ2)
w = np.dot(np.dot(np.linalg.inv(np.dot(XT, X)), XT), data['y_train']) # solve L2 problems for different values of w_l2 = {} lmbd_space = np.linspace(0.5, 1500, 500) for lmbd in lmbd_space: w_l2[lmbd] = np.dot(np.dot(np.linalg.inv(np.dot(XT, X) + lmbd*np.eye(X.shape[1])), XT), data['y_train']) w0_support = np.linspace(-3, 3, 1000) w1_support = np.linspace(-3, 3, 1000) wx_space = list(it.product(w0_support, w1_support)) w0, w1 = zip(*wx_space) y = ((data['y_train'][:, np.newaxis] - np.dot(X, np.array(wx_space).T))**2).mean(axis=0) plt.hexbin(w0, w1, C=y**(0.2), cmap=cm.jet_r, bins=None) plt.axvline(0, color='black', linestyle='-', label='origin') plt.axhline(0, color='black', linestyle='-') # plot prior distribution of parameters for i in range(1, 6): plt.gcf().gca().add_artist(plt.Circle((0, 0), i*0.3, color='black', linestyle='--', alpha=0.1)) plt.axvline(w[0], color='w', linestyle='--', label='ML solution') plt.axhline(w[1], color='w', linestyle='--') # plot MAP solutions flag = True for _, w_l2_solution in w_l2.items(): plt.plot(w_l2_solution[0], w_l2_solution[1], color='c', marker='.', mew=1, alpha=0.5, label='MAP L2 solution' if flag else None) flag = False plt.axes().set_aspect('equal', 'datalim') plt.title('ML and MAP L2 for different values of lambda') plt.xlabel('w_0') plt.ylabel('w_1') plt.legend(loc='upper left', prop={'size': 20}) plt.show() 
p(x∣μ,β)=12βe−|x−μ|β
from scipy.stats import laplace x = np.linspace(-5, 5, 1000) for scale in np.linspace(0.5, 1.4, 7): plt.plot(x, laplace.pdf(x, scale=scale), label='scale=%0.2f' % scale) plt.legend(loc='upper right', prop={'size': 20}) plt.title('Laplace distribution with different scale parameter') plt.show() 
p(→w∣→y,X,σ2)∝Laplace(→w∣0,β)n∏i=1N(yi∣→wT→xi,σ2)
w_l1 = {} lmbd_space = np.linspace(0.001, 2, 200) for lmbd in tqdm(lmbd_space): w_l1[lmbd] = fit_lr_l1(X, data['y_train'], lmbd, n_iter=10000, lr=0.001)[0] w0_support = np.linspace(-3, 3, 1000) w1_support = np.linspace(-3, 3, 1000) wx_space = list(it.product(w0_support, w1_support)) w0, w1 = zip(*wx_space) y = ((data['y_train'][:, np.newaxis] - np.dot(X, np.array(wx_space).T))**2).mean(axis=0) plt.hexbin(w0, w1, C=y**(0.2), cmap=cm.jet_r, bins=None) plt.axvline(0, color='black', linestyle='-', label='origin') plt.axhline(0, color='black', linestyle='-') # function to plot rhomb def plot_rhomb(cx=0, cy=0, r=0.5): plt.gcf().gca().add_artist(plt.Rectangle((cx, cy - np.sqrt(2*r**2)), 2*r, 2*r, angle=45, color='black', linestyle='--', alpha=0.1)) # plot Laplace distribution density for i in range(1, 6): plot_rhomb(r=0.2*i) plt.axvline(w[0], color='w', linestyle='--', label='ML solution') plt.axhline(w[1], color='w', linestyle='--') # plot MAP solutions flag = True for _, w_l1_solution in w_l1.items(): plt.plot(w_l1_solution[0], w_l1_solution[1], color='c', marker='.', mew=1, alpha=0.5, label='MAP L1 solution' if flag else None) flag = False plt.axes().set_aspect('equal', 'datalim') plt.title('ML and MAP L1 for different values of lambda') plt.xlabel('w_0') plt.ylabel('w_1') plt.legend(loc='upper left', prop={'size': 20}) plt.show() 
p(x∣α,β,μ)=β2αΓ(1β)e−(|x−μ|α)β
from scipy.stats import gennorm x = np.linspace(-5, 5, 1000) for beta in np.linspace(0, 3, 11): plt.plot(x, gennorm.pdf(x, beta=beta), label='beta=%0.2f' % beta) plt.legend(loc='upper right', prop={'size': 20}) plt.title('Generalized normal distribution with different beta parameter') plt.show() 
‖x‖p=(∞∑i=1|x|p)1p
f, ax = plt.subplots(3, 4) ax = reduce(lambda a, b: a + b, ax.tolist()) a_list = np.linspace(0, 2*np.pi, 361) r_list = np.linspace(0, 1.1, 100) for ix, p in enumerate(np.linspace(0.25, 3, 12)): points = [] for a in a_list: r_inner = [] for r in r_list: if np.linalg.norm([r*np.cos(a), r*np.sin(a)], p) > 1: break r_inner.append(r) r = max(r_inner) points.append([r*np.cos(a), r*np.sin(a)]) points = np.array(points) ax[ix].plot(points[:, 0], points[:, 1]) ax[ix].set_aspect('equal', 'datalim') ax[ix].set_title('Circle in Lp space, p=%0.2f' % p) 

Source: https://habr.com/ru/post/322076/
All Articles