| import gradio as gr |
| from time import time |
| from scipy import sparse |
| from scipy import linalg |
|
|
| from sklearn.datasets import make_regression |
| from sklearn.linear_model import Lasso |
|
|
|
|
| def load_dataset(): |
| X, y = make_regression(n_samples=200, n_features=5000, random_state=0) |
| |
| X_sp = sparse.coo_matrix(X) |
| return X,X_sp,y |
|
|
| def compare_lasso_dense(): |
| alpha_dense = 1 |
| alpha_sparse = 0.1 |
| sparse_lasso = Lasso(alpha= alpha_sparse, fit_intercept=False, max_iter=1000) |
| dense_lasso = Lasso(alpha=alpha_dense, fit_intercept=False, max_iter=1000) |
|
|
| t0 = time() |
| sparse_lasso.fit(X_sp, y) |
| |
| elapse1 = time() - t0 |
|
|
| t1 = time() |
| dense_lasso.fit(X, y) |
| |
| elapse2 = time() - t1 |
|
|
| |
| coeff_diff = linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_) |
| |
| return f"Sparse Lasso done in {(elapse1):.3f}s\t\n" + f"Dense Lasso done in {(elapse2):.3f}s\t\n" + f"Distance between coefficients : {coeff_diff:.2e}\t\n" |
|
|
| def compare_lasso_sparse(): |
| |
| Xs = X.copy() |
| |
| Xs[Xs < 2.5] = 0.0 |
| |
| Xs_sp = sparse.coo_matrix(Xs) |
| Xs_sp = Xs_sp.tocsc() |
|
|
| |
| print(f"Matrix density : {(Xs_sp.nnz / float(X.size) * 100):.3f}%") |
| matrix_density = Xs_sp.nnz / float(X.size) * 100 |
|
|
| alpha_dense = 1 |
| alpha_sparse = 0.1 |
| sparse_lasso = Lasso(alpha= alpha_sparse, fit_intercept=False, max_iter=1000) |
| dense_lasso = Lasso(alpha=alpha_dense, fit_intercept=False, max_iter=1000) |
|
|
| t0 = time() |
| sparse_lasso.fit(Xs_sp, y) |
| print(f"Sparse Lasso done in {(time() - t0):.3f}s") |
| elapses1 = time() - t0 |
|
|
| t1 = time() |
| dense_lasso.fit(Xs, y) |
| print(f"Dense Lasso done in {(time() - t1):.3f}s") |
| elapses2 = time() - t1 |
|
|
| |
| coeff_diff = linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_) |
| print(f"Distance between coefficients : {coeff_diff:.2e}") |
| return f"Matrix density : {(Xs_sp.nnz / float(X.size) * 100):.3f}%\t\n"+ f"Sparse Lasso done in {(elapses1):.3f}s\t\n" + f"Dense Lasso done in {(elapses2):.3f}s\t\n" + f"Distance between coefficients : {coeff_diff:.2e}\t\n" |
|
|
|
|
| X,X_sp,y = load_dataset() |
| |
| |
|
|
|
|
|
|
| title = " Lasso on Dense and Sparse data " |
| info = '''**Comparing the two Lasso implementations on Dense data** |
| We create a linear regression problem that is suitable for the Lasso, that is to say, with more features than samples. |
| We then store the data matrix in both dense (the usual) and sparse format, and train a Lasso on each. We compute the |
| runtime of both and check that they learned the same model by |
| computing the Euclidean norm of the difference between the coefficients they learned. |
| Because the data is dense, we expect better runtime with a dense data format. |
| ''' |
|
|
| info2='''***Comparing the two Lasso implementations on Sparse data*** |
| We make the previous problem sparse by replacing all small values with 0 |
| and run the same comparisons as above. Because the data is now sparse, |
| we expect the implementation that uses the sparse data format to be faster. |
| ''' |
|
|
| conclusion = '''**Conclusion** |
| We show that linear_model.Lasso provides the same results for dense and sparse data and that in the case of sparse data the speed is improved**. |
| ''' |
| with gr.Blocks() as demo: |
| gr.Markdown(f"# {title}") |
| gr.Markdown(info) |
| |
| txt_3 = gr.Textbox(value="", label="Dense Lasso comparison") |
| btn = gr.Button(value="Dense Lasso comparison") |
| btn.click(compare_lasso_dense, outputs=[txt_3]) |
|
|
| gr.Markdown(info2) |
| |
| txt_4 = gr.Textbox(value="", label="Sparse Lasso comparison") |
| btn = gr.Button(value="Sparse Lasso comparison") |
| btn.click(compare_lasso_sparse, outputs=[txt_4]) |
|
|
| gr.Markdown(conclusion) |
| |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|