Hw05 Linear Regression

Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 29

\documentclass[11pt]{article}

\usepackage[breakable]{tcolorbox}
\usepackage{parskip} % Stop auto-indenting (to mimic markdown behaviour)

% Basic figure setup, for now with no caption control since it's done
% automatically by Pandoc (which extracts ![](path) syntax from Markdown).
\usepackage{graphicx}
% Maintain compatibility with old templates. Remove in nbconvert 6.0
\let\Oldincludegraphics\includegraphics
% Ensure that by default, figures have no caption (until we provide a
% proper Figure object with a Caption API and a way to capture that
% in the conversion process - todo).
\usepackage{caption}
\DeclareCaptionFormat{nocaption}{}
\captionsetup{format=nocaption,aboveskip=0pt,belowskip=0pt}

\usepackage{float}
\floatplacement{figure}{H} % forces figures to be placed at the correct
location
\usepackage{xcolor} % Allow colors to be defined
\usepackage{enumerate} % Needed for markdown enumerations to work
\usepackage{geometry} % Used to adjust the document margins
\usepackage{amsmath} % Equations
\usepackage{amssymb} % Equations
\usepackage{textcomp} % defines textquotesingle
% Hack from http://tex.stackexchange.com/a/47451/13684:
\AtBeginDocument{%
\def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code
}
\usepackage{upquote} % Upright quotes for verbatim code
\usepackage{eurosym} % defines \euro

\usepackage{iftex}
\ifPDFTeX
\usepackage[T1]{fontenc}
\IfFileExists{alphabeta.sty}{
\usepackage{alphabeta}
}{
\usepackage[mathletters]{ucs}
\usepackage[utf8x]{inputenc}
}
\else
\usepackage{fontspec}
\usepackage{unicode-math}
\fi

\usepackage{fancyvrb} % verbatim replacement that allows latex


\usepackage{grffile} % extends the file name processing of package graphics
% to support a larger range
\makeatletter % fix for old versions of grffile with XeLaTeX
\@ifpackagelater{grffile}{2019/11/01}
{
% Do nothing on new versions
}
{
\def\Gread@@xetex#1{%
\IfFileExists{"\Gin@base".bb}%
{\Gread@eps{\Gin@base.bb}}%
{\Gread@@xetex@aux#1}%
}
}
\makeatother
\usepackage[Export]{adjustbox} % Used to constrain images to a maximum size
\adjustboxset{max size={0.9\linewidth}{0.9\paperheight}}

% The hyperref package gives us a pdf with properly built


% internal navigation ('pdf bookmarks' for the table of contents,
% internal cross-reference links, web links for URLs, etc.)
\usepackage{hyperref}
% The default LaTeX title has an obnoxious amount of whitespace. By default,
% titling removes some of it. It also provides customization options.
\usepackage{titling}
\usepackage{longtable} % longtable support required by pandoc >1.10
\usepackage{booktabs} % table support for pandoc > 1.12.2
\usepackage{array} % table support for pandoc >= 2.11.3
\usepackage{calc} % table minipage width calculation for pandoc >= 2.11.1
\usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate*
environment)
\usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout)
% normalem makes italics be italics, not underlines
\usepackage{soul} % strikethrough (\st) support for pandoc >= 3.0.0
\usepackage{mathrsfs}

% Colors for the hyperref package


\definecolor{urlcolor}{rgb}{0,.145,.698}
\definecolor{linkcolor}{rgb}{.71,0.21,0.01}
\definecolor{citecolor}{rgb}{.12,.54,.11}

% ANSI colors
\definecolor{ansi-black}{HTML}{3E424D}
\definecolor{ansi-black-intense}{HTML}{282C36}
\definecolor{ansi-red}{HTML}{E75C58}
\definecolor{ansi-red-intense}{HTML}{B22B31}
\definecolor{ansi-green}{HTML}{00A250}
\definecolor{ansi-green-intense}{HTML}{007427}
\definecolor{ansi-yellow}{HTML}{DDB62B}
\definecolor{ansi-yellow-intense}{HTML}{B27D12}
\definecolor{ansi-blue}{HTML}{208FFB}
\definecolor{ansi-blue-intense}{HTML}{0065CA}
\definecolor{ansi-magenta}{HTML}{D160C4}
\definecolor{ansi-magenta-intense}{HTML}{A03196}
\definecolor{ansi-cyan}{HTML}{60C6C8}
\definecolor{ansi-cyan-intense}{HTML}{258F8F}
\definecolor{ansi-white}{HTML}{C5C1B4}
\definecolor{ansi-white-intense}{HTML}{A1A6B2}
\definecolor{ansi-default-inverse-fg}{HTML}{FFFFFF}
\definecolor{ansi-default-inverse-bg}{HTML}{000000}

% common color for the border for error outputs.


\definecolor{outerrorbackground}{HTML}{FFDFDF}

% commands and environments needed by pandoc snippets


% extracted from the output of `pandoc -s`
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
% Add ',fontsize=\small' for more characters per line
\newenvironment{Shaded}{}{}
\newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
\newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}}
\newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
\newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}}
\newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}}
\newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
\newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}}
\newcommand{\RegionMarkerTok}[1]{{#1}}
\newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
\newcommand{\NormalTok}[1]{{#1}}

% Additional commands for more recent versions of Pandoc


\newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}}
\newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
\newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}}
\newcommand{\ImportTok}[1]{{#1}}
\newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\
textit{{#1}}}}
\newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\
textit{{#1}}}}}
\newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\
textit{{#1}}}}}
\newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}}
\newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
\newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}}
\newcommand{\BuiltInTok}[1]{{#1}}
\newcommand{\ExtensionTok}[1]{{#1}}
\newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}}
\newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}}
\newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\
textit{{#1}}}}}
\newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\
textit{{#1}}}}}

% Define a nice break command that doesn't care if a line doesn't already
% exist.
\def\br{\hspace*{\fill} \\* }
% Math Jax compatibility definitions
\def\gt{>}
\def\lt{<}
\let\Oldtex\TeX
\let\Oldlatex\LaTeX
\renewcommand{\TeX}{\textrm{\Oldtex}}
\renewcommand{\LaTeX}{\textrm{\Oldlatex}}
% Document parameters
% Document title
\title{hw05\_linear\_regression}
% Pygments definitions
\makeatletter
\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax%
\let\PY@ul=\relax \let\PY@tc=\relax%
\let\PY@bc=\relax \let\PY@ff=\relax}
\def\PY@tok#1{\csname PY@tok@#1\endcsname}
\def\PY@toks#1+{\ifx\relax#1\empty\else%
\PY@tok{#1}\expandafter\PY@toks\fi}
\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{%
\PY@it{\PY@bf{\PY@ff{#1}}}}}}}
\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}}

\@namedef{PY@tok@w}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
\@namedef{PY@tok@c}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.24,0.48,0.48}
{##1}}}
\@namedef{PY@tok@cp}{\def\PY@tc##1{\textcolor[rgb]{0.61,0.40,0.00}{##1}}}
\@namedef{PY@tok@k}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}
{##1}}}
\@namedef{PY@tok@kp}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kt}{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}}
\@namedef{PY@tok@o}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@ow}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.67,0.13,1.00}{##1}}}
\@namedef{PY@tok@nb}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@nf}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@nc}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@nn}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@ne}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.80,0.25,0.22}{##1}}}
\@namedef{PY@tok@nv}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@no}{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}}
\@namedef{PY@tok@nl}{\def\PY@tc##1{\textcolor[rgb]{0.46,0.46,0.00}{##1}}}
\@namedef{PY@tok@ni}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.44,0.44,0.44}{##1}}}
\@namedef{PY@tok@na}{\def\PY@tc##1{\textcolor[rgb]{0.41,0.47,0.13}{##1}}}
\@namedef{PY@tok@nt}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@nd}{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\@namedef{PY@tok@s}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sd}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@si}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.64,0.35,0.47}{##1}}}
\@namedef{PY@tok@se}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.67,0.36,0.12}{##1}}}
\@namedef{PY@tok@sr}{\def\PY@tc##1{\textcolor[rgb]{0.64,0.35,0.47}{##1}}}
\@namedef{PY@tok@ss}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@sx}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@m}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@gh}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.00,0.50}{##1}}}
\@namedef{PY@tok@gu}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.50,0.00,0.50}{##1}}}
\@namedef{PY@tok@gd}{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
\@namedef{PY@tok@gi}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.52,0.00}{##1}}}
\@namedef{PY@tok@gr}{\def\PY@tc##1{\textcolor[rgb]{0.89,0.00,0.00}{##1}}}
\@namedef{PY@tok@ge}{\let\PY@it=\textit}
\@namedef{PY@tok@gs}{\let\PY@bf=\textbf}
\@namedef{PY@tok@gp}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.00,0.50}{##1}}}
\@namedef{PY@tok@go}{\def\PY@tc##1{\textcolor[rgb]{0.44,0.44,0.44}{##1}}}
\@namedef{PY@tok@gt}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
\@namedef{PY@tok@err}{\def\PY@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}\
fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}}
\@namedef{PY@tok@kc}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kd}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kn}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kr}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@bp}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@fm}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@vc}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vg}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vi}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vm}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@sa}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sb}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sc}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@dl}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@s2}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sh}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@s1}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@mb}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mf}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mh}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mi}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@il}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mo}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@ch}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\@namedef{PY@tok@cm}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\@namedef{PY@tok@cpf}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\@namedef{PY@tok@c1}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\@namedef{PY@tok@cs}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}

\def\PYZbs{\char`\\}
\def\PYZus{\char`\_}
\def\PYZob{\char`\{}
\def\PYZcb{\char`\}}
\def\PYZca{\char`\^}
\def\PYZam{\char`\&}
\def\PYZlt{\char`\<}
\def\PYZgt{\char`\>}
\def\PYZsh{\char`\#}
\def\PYZpc{\char`\%}
\def\PYZdl{\char`\$}
\def\PYZhy{\char`\-}
\def\PYZsq{\char`\'}
\def\PYZdq{\char`\"}
\def\PYZti{\char`\~}
% for compatibility with earlier versions
\def\PYZat{@}
\def\PYZlb{[}
\def\PYZrb{]}
\makeatother

% For linebreaks inside Verbatim environment from package fancyvrb.


\makeatletter
\newbox\Wrappedcontinuationbox
\newbox\Wrappedvisiblespacebox
\newcommand*\Wrappedvisiblespace {\textcolor{red}{\textvisiblespace}}
\newcommand*\Wrappedcontinuationsymbol {\textcolor{red}{\llap{\tiny$\m@th\
hookrightarrow$}}}
\newcommand*\Wrappedcontinuationindent {3ex }
\newcommand*\Wrappedafterbreak {\kern\Wrappedcontinuationindent\copy\
Wrappedcontinuationbox}
% Take advantage of the already applied Pygments mark-up to insert
% potential linebreaks for TeX processing.
% {, <, #, %, $, ' and ": go to next line.
% _, }, ^, &, >, - and ~: stay at end of broken line.
% Use of \textquotesingle for straight quote.
\newcommand*\Wrappedbreaksatspecials {%
\def\PYGZus{\discretionary{\char`\_}{\Wrappedafterbreak}{\char`\_}}%
\def\PYGZob{\discretionary{}{\Wrappedafterbreak\char`\{}{\char`\{}}%
\def\PYGZcb{\discretionary{\char`\}}{\Wrappedafterbreak}{\char`\}}}%
\def\PYGZca{\discretionary{\char`\^}{\Wrappedafterbreak}{\char`\^}}%
\def\PYGZam{\discretionary{\char`\&}{\Wrappedafterbreak}{\char`\&}}%
\def\PYGZlt{\discretionary{}{\Wrappedafterbreak\char`\<}{\char`\<}}%
\def\PYGZgt{\discretionary{\char`\>}{\Wrappedafterbreak}{\char`\>}}%
\def\PYGZsh{\discretionary{}{\Wrappedafterbreak\char`\#}{\char`\#}}%
\def\PYGZpc{\discretionary{}{\Wrappedafterbreak\char`\%}{\char`\%}}%
\def\PYGZdl{\discretionary{}{\Wrappedafterbreak\char`\$}{\char`\$}}%
\def\PYGZhy{\discretionary{\char`\-}{\Wrappedafterbreak}{\char`\-}}%
\def\PYGZsq{\discretionary{}{\Wrappedafterbreak\textquotesingle}{\
textquotesingle}}%
\def\PYGZdq{\discretionary{}{\Wrappedafterbreak\char`\"}{\char`\"}}%
\def\PYGZti{\discretionary{\char`\~}{\Wrappedafterbreak}{\char`\~}}%
}
% Some characters . , ; ? ! / are not pygmentized.
% This macro makes them "active" and they will insert potential linebreaks
\newcommand*\Wrappedbreaksatpunct {%
\lccode`\~`\.\lowercase{\def~}{\discretionary{\hbox{\char`\.}}{\
Wrappedafterbreak}{\hbox{\char`\.}}}%
\lccode`\~`\,\lowercase{\def~}{\discretionary{\hbox{\char`\,}}{\
Wrappedafterbreak}{\hbox{\char`\,}}}%
\lccode`\~`\;\lowercase{\def~}{\discretionary{\hbox{\char`\;}}{\
Wrappedafterbreak}{\hbox{\char`\;}}}%
\lccode`\~`\:\lowercase{\def~}{\discretionary{\hbox{\char`\:}}{\
Wrappedafterbreak}{\hbox{\char`\:}}}%
\lccode`\~`\?\lowercase{\def~}{\discretionary{\hbox{\char`\?}}{\
Wrappedafterbreak}{\hbox{\char`\?}}}%
\lccode`\~`\!\lowercase{\def~}{\discretionary{\hbox{\char`\!}}{\
Wrappedafterbreak}{\hbox{\char`\!}}}%
\lccode`\~`\/\lowercase{\def~}{\discretionary{\hbox{\char`\/}}{\
Wrappedafterbreak}{\hbox{\char`\/}}}%
\catcode`\.\active
\catcode`\,\active
\catcode`\;\active
\catcode`\:\active
\catcode`\?\active
\catcode`\!\active
\catcode`\/\active
\lccode`\~`\~
}
\makeatother

\let\OriginalVerbatim=\Verbatim
\makeatletter
\renewcommand{\Verbatim}[1][1]{%
%\parskip\z@skip
\sbox\Wrappedcontinuationbox {\Wrappedcontinuationsymbol}%
\sbox\Wrappedvisiblespacebox {\FV@SetupFont\Wrappedvisiblespace}%
\def\FancyVerbFormatLine ##1{\hsize\linewidth
\vtop{\raggedright\hyphenpenalty\z@\exhyphenpenalty\z@
\doublehyphendemerits\z@\finalhyphendemerits\z@
\strut ##1\strut}%
}%
% If the linebreak is at a space, the latter will be displayed as visible
% space at end of first line, and a continuation symbol starts next line.
% Stretch/shrink are however usually zero for typewriter font.
\def\FV@Space {%
\nobreak\hskip\z@ plus\fontdimen3\font minus\fontdimen4\font
\discretionary{\copy\Wrappedvisiblespacebox}{\Wrappedafterbreak}
{\kern\fontdimen2\font}%
}%

% Allow breaks at special characters using \PYG... macros.


\Wrappedbreaksatspecials
% Breaks at punctuation characters . , ; ? ! and / need catcode=\active
\OriginalVerbatim[#1,codes*=\Wrappedbreaksatpunct]%
}
\makeatother

% Exact colors from NB


\definecolor{incolor}{HTML}{303F9F}
\definecolor{outcolor}{HTML}{D84315}
\definecolor{cellborder}{HTML}{CFCFCF}
\definecolor{cellbackground}{HTML}{F7F7F7}

% prompt
\makeatletter
\newcommand{\boxspacing}{\kern\kvtcb@left@rule\kern\kvtcb@boxsep}
\makeatother
\newcommand{\prompt}[4]{
{\ttfamily\llap{{\color{#2}[#3]:\hspace{3pt}#4}}\vspace{-\baselineskip}}
}

% Prevent overflowing lines due to hard-to-break entities


\sloppy
% Setup hyperref package
\hypersetup{
breaklinks=true, % so long urls are correctly broken across lines
colorlinks=true,
urlcolor=urlcolor,
linkcolor=linkcolor,
citecolor=citecolor,
}
% Slightly bigger margins than the latex defaults

\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}

\begin{document}

\maketitle

About the homework: In some tasks, in addition to writing code, answers


to questions and conclusions are required (there are special Markdown
cells marked with \textbf{Answer}).

The ability to analyze the results of experiments is an important skill.


Therefore, answers carry more weight than the code: the code accounts
for 30\% of the task grade, while answers to questions account for 70\%.

\textbf{Reminder:} Plagiarism is prohibited. Use of AI without


mentioning how and in which parts it was used is also prohibited. If
plagiarism or dishonest use of AI is revealed, the assignment is graded
with 0.

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{k+kn}{import} \PY{n+nn}{matplotlib}\PY{n+nn}{.}\PY{n+nn}{pyplot} \PY{k}{as} \
PY{n+nn}{plt}
\PY{k+kn}{import} \PY{n+nn}{numpy} \PY{k}{as} \PY{n+nn}{np}
\PY{k+kn}{import} \PY{n+nn}{pandas} \PY{k}{as} \PY{n+nn}{pd}
\PY{k+kn}{import} \PY{n+nn}{seaborn} \PY{k}{as} \PY{n+nn}{sns}
\PY{k+kn}{from} \PY{n+nn}{sklearn}\PY{n+nn}{.}\PY{n+nn}{model\PYZus{}selection} \
PY{k+kn}{import} \PY{n}{train\PYZus{}test\PYZus{}split}
\PY{k+kn}{from} \PY{n+nn}{sklearn}\PY{n+nn}{.}\PY{n+nn}{preprocessing} \PY{k+kn}
{import} \PY{n}{OneHotEncoder}
\PY{k+kn}{from} \PY{n+nn}{sklearn}\PY{n+nn}{.}\PY{n+nn}{linear\PYZus{}model} \
PY{k+kn}{import} \PY{n}{LinearRegression}
\PY{k+kn}{from} \PY{n+nn}{sklearn}\PY{n+nn}{.}\PY{n+nn}{metrics} \PY{k+kn}
{import} \PY{n}{mean\PYZus{}squared\PYZus{}error}
\PY{k+kn}{from} \PY{n+nn}{sklearn}\PY{n+nn}{.}\PY{n+nn}{preprocessing} \PY{k+kn}
{import} \PY{n}{StandardScaler}
\PY{k+kn}{from} \PY{n+nn}{sklearn}\PY{n+nn}{.}\PY{n+nn}{linear\PYZus{}model} \
PY{k+kn}{import} \PY{n}{Lasso}
\PY{k+kn}{from} \PY{n+nn}{sklearn}\PY{n+nn}{.}\PY{n+nn}{linear\PYZus{}model} \
PY{k+kn}{import} \PY{n}{Ridge}
\PY{k+kn}{from} \PY{n+nn}{sklearn}\PY{n+nn}{.}\PY{n+nn}{model\PYZus{}selection} \
PY{k+kn}{import} \PY{n}{GridSearchCV}
\PY{k+kn}{import} \PY{n+nn}{warnings}

\PY{n}{warnings}\PY{o}{.}\PY{n}{filterwarnings}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\
PY{l+s+s2}{ignore}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{category}\PY{o}{=}\PY{n+ne}
{FutureWarning}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

Let's work with data about students.

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{o}{!}wget\PY{+w}{ }https://raw.githubusercontent.com/Majid\PYZhy{}Sohrabi/DAM\
PYZhy{}2024/refs/heads/main/07\PYZhy{}trees/students.csv
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]
'wget' is not recognized as an internal or external command,
operable program or batch file.
\end{Verbatim}

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{df} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}{read\PYZus{}csv}\PY{p}{(}\PY{l+s+s1}
{\PYZsq{}}\PY{l+s+s1}{students.csv}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}

\PY{n}{df}\PY{o}{.}\PY{n}{sample}\PY{p}{(}\PY{l+m+mi}{4}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{tcolorbox}[breakable, size=fbox, boxrule=.5pt, pad at


break*=1mm, opacityfill=0]
\prompt{Out}{outcolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
ID Performance Index Previous Scores (out of 10) \textbackslash{}
964 965 70.0 7
15 16 65.0 7
933 934 90.0 8
785 786 85.0 9

Sample Question Papers Practiced Hours Studied \textbackslash{}


964 1 6
15 9 6
933 9 9
785 5 6

Hours Prepared for Exams Hours Free time Sleep Hours Hours Not Sleep \
textbackslash{}
964 3 10 5 19
15 3 8 6 18
933 4 4 7 17
785 3 9 6 18

Extracurricular Activities Has a car Sex Height Weight BMI \


textbackslash{}
964 No No f 1.484778 74.765670 33.914032
15 Yes NaN m 1.672624 71.629860 25.603391
933 Yes NaN f 1.623859 57.967983 21.983232
785 No No f 1.618163 82.568565 31.533342

Eye color Nails length Favorite Subject Least Favorite Subject


964 brown 0.007216 NaN History
15 brown 0.003618 Drama Data Science
933 blue 0.002448 Art Biology
785 brown 0.007026 Machine Learning Drama
\end{Verbatim}
\end{tcolorbox}

We will predict their performance index.

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{X} \PY{o}{=} \PY{n}{df}\PY{o}{.}\PY{n}{drop}\PY{p}{(}\PY{n}{columns}\PY{o}
{=}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Performance Index}\PY{l+s+s1}{\
PYZsq{}}\PY{p}{]}\PY{p}{)}
\PY{n}{y} \PY{o}{=} \PY{n}{df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Performance
Index}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}
\end{Verbatim}
\end{tcolorbox}

\section{Task 1 (2 points)}\label{task-1-2-points}

Let's try to propose hypothesis, which features will be the most useful.

For every feature \(x^j\):

\begin{itemize}
\tightlist
\item
Draw a plot of \(x^j\) and target (scatter plot, if feature is
numerical and boxplot if the feature is categorical)
\item
Compute correlation of \(x^j\) and target if the feature is numerical.
\end{itemize}

Draw conclusions: which features seems to be the most important? Which


do not seem useful? Is it logical?

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{k}{def} \PY{n+nf}{analyze\PYZus{}features}\PY{p}{(}\PY{n}{X}\PY{p}{,} \PY{n}
{y}\PY{p}{)}\PY{p}{:}
\PY{k}{for} \PY{n}{column} \PY{o+ow}{in} \PY{n}{X}\PY{o}{.}\PY{n}{columns}\
PY{p}{:}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+se}{\
PYZbs{}n}\PY{l+s+s2}{Analyzing feature: }\PY{l+s+si}{\PYZob{}}\PY{n}{column}\
PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}

\PY{k}{if} \PY{n}{X}\PY{p}{[}\PY{n}{column}\PY{p}{]}\PY{o}{.}\PY{n}
{dtype} \PY{o+ow}{in} \PY{p}{[}\PY{n}{np}\PY{o}{.}\PY{n}{float64}\PY{p}{,} \PY{n}
{np}\PY{o}{.}\PY{n}{int64}\PY{p}{]}\PY{p}{:}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\
PY{p}{(}\PY{l+m+mi}{8}\PY{p}{,} \PY{l+m+mi}{5}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{scatterplot}\PY{p}{(}\PY{n}{x}\PY{o}{=}\
PY{n}{X}\PY{p}{[}\PY{n}{column}\PY{p}{]}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{n}{y}\PY{p}
{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\
PYZdq{}}\PY{l+s+s2}{Scatter Plot: }\PY{l+s+si}{\PYZob{}}\PY{n}{column}\PY{l+s+si}{\
PYZcb{}}\PY{l+s+s2}{ vs Performance Index}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{n}{column}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\
PY{l+s+s2}{Performance Index}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}

\PY{n}{correlation} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{corrcoef}\PY{p}


{(}\PY{n}{X}\PY{p}{[}\PY{n}{column}\PY{p}{]}\PY{p}{,} \PY{n}{y}\PY{p}{)}\PY{p}{[}\
PY{l+m+mi}{0}\PY{p}{,} \PY{l+m+mi}{1}\PY{p}{]}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}
{Correlation with Performance Index: }\PY{l+s+si}{\PYZob{}}\PY{n}{correlation}\
PY{l+s+si}{:}\PY{l+s+s2}{.2f}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{k}{else}\PY{p}{:}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\
PY{p}{(}\PY{l+m+mi}{8}\PY{p}{,} \PY{l+m+mi}{5}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{boxplot}\PY{p}{(}\PY{n}{x}\PY{o}{=}\PY{n}
{X}\PY{p}{[}\PY{n}{column}\PY{p}{]}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{n}{y}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\
PYZdq{}}\PY{l+s+s2}{Box Plot: }\PY{l+s+si}{\PYZob{}}\PY{n}{column}\PY{l+s+si}{\
PYZcb{}}\PY{l+s+s2}{ vs Performance Index}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{n}{column}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\
PY{l+s+s2}{Performance Index}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}

\PY{n}{analyze\PYZus{}features}\PY{p}{(}\PY{n}{X}\PY{p}{,} \PY{n}{y}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]

Analyzing feature: ID
\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_1.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: -0.03

Analyzing feature: Previous Scores (out of 10)


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_3.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.86

Analyzing feature: Sample Question Papers Practiced


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_5.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.01

Analyzing feature: Hours Studied


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_7.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.32

Analyzing feature: Hours Prepared for Exams


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_9.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.30

Analyzing feature: Hours Free time


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_11.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: -0.24

Analyzing feature: Sleep Hours


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_13.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.05
Analyzing feature: Hours Not Sleep
\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_15.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: -0.05

Analyzing feature: Extracurricular Activities


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_17.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]

Analyzing feature: Has a car


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_19.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]

Analyzing feature: Sex


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_21.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]

Analyzing feature: Height


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_23.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.05

Analyzing feature: Weight


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_25.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: nan

Analyzing feature: BMI


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_27.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: nan

Analyzing feature: Eye color


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_29.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]

Analyzing feature: Nails length


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_31.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.04

Analyzing feature: Favorite Subject


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_33.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]

Analyzing feature: Least Favorite Subject


\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_35.png}
\end{center}
{ \hspace*{\fill} \\}

\textbf{Answer} \#YOUR ANSWER

\section{Task 2 (3 points)}\label{task-2-3-points}
Split data into train and test. Set
\texttt{test\_size=0.3,\ random\_state=0}

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{X\PYZus{}train}\PY{p}{,} \PY{n}{X\PYZus{}test}\PY{p}{,} \PY{n}{y\
PYZus{}train}\PY{p}{,} \PY{n}{y\PYZus{}test} \PY{o}{=} \PY{n}{train\PYZus{}test\
PYZus{}split}\PY{p}{(}\PY{n}{X}\PY{p}{,} \PY{n}{y}\PY{p}{,} \PY{n}{test\
PYZus{}size}\PY{o}{=}\PY{l+m+mf}{0.3}\PY{p}{,} \PY{n}{random\PYZus{}state}\PY{o}
{=}\PY{l+m+mi}{0}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{X\
PYZus{}train shape: }\PY{l+s+si}{\PYZob{}}\PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}
{shape}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{X\
PYZus{}test shape: }\PY{l+s+si}{\PYZob{}}\PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}
{shape}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{y\
PYZus{}train shape: }\PY{l+s+si}{\PYZob{}}\PY{n}{y\PYZus{}train}\PY{o}{.}\PY{n}
{shape}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{y\
PYZus{}test shape: }\PY{l+s+si}{\PYZob{}}\PY{n}{y\PYZus{}test}\PY{o}{.}\PY{n}
{shape}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]
X\_train shape: (700, 18)
X\_test shape: (300, 18)
y\_train shape: (700,)
y\_test shape: (300,)
\end{Verbatim}

Preprocess the data:

\begin{itemize}
\tightlist
\item
Depict number of missing values in every column, replace missing
values with median, computed on train data (numerical features) or
with `Not Given' (categorical features)
\item
Encode the categorical with one-hot-encoder
\end{itemize}

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{7}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{X\PYZus{}train}\PY{p}{,} \PY{n}{X\PYZus{}test}\PY{p}{,} \PY{n}{y\
PYZus{}train}\PY{p}{,} \PY{n}{y\PYZus{}test} \PY{o}{=} \PY{n}{train\PYZus{}test\
PYZus{}split}\PY{p}{(}\PY{n}{X}\PY{p}{,} \PY{n}{y}\PY{p}{,} \PY{n}{test\
PYZus{}size}\PY{o}{=}\PY{l+m+mf}{0.2}\PY{p}{,} \PY{n}{random\PYZus{}state}\PY{o}
{=}\PY{l+m+mi}{42}\PY{p}{)}

\PY{n}{missing\PYZus{}values} \PY{o}{=} \PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}


{isnull}\PY{p}{(}\PY{p}{)}\PY{o}{.}\PY{n}{sum}\PY{p}{(}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Missing values in each
column:}\PY{l+s+se}{\PYZbs{}n}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{missing\
PYZus{}values}\PY{p}{)}

\PY{n}{values} \PY{o}{=} \PY{p}{\PYZob{}}\PY{p}{\PYZcb{}}

\PY{k}{for} \PY{n}{col} \PY{o+ow}{in} \PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}


{columns}\PY{p}{:}
\PY{k}{if} \PY{n}{X\PYZus{}train}\PY{p}{[}\PY{n}{col}\PY{p}{]}\PY{o}{.}\PY{n}
{dtype} \PY{o+ow}{in} \PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{float64}\PY{l+s+s1}
{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{int64}\PY{l+s+s1}{\PYZsq{}}\
PY{p}{]}\PY{p}{:}
\PY{n}{median\PYZus{}value} \PY{o}{=} \PY{n}{X\PYZus{}train}\PY{p}{[}\PY{n}
{col}\PY{p}{]}\PY{o}{.}\PY{n}{median}\PY{p}{(}\PY{p}{)}
\PY{n}{values}\PY{p}{[}\PY{n}{col}\PY{p}{]} \PY{o}{=} \PY{n}{median\
PYZus{}value}
\PY{k}{else}\PY{p}{:}
\PY{n}{values}\PY{p}{[}\PY{n}{col}\PY{p}{]} \PY{o}{=} \PY{l+s+s1}{\
PYZsq{}}\PY{l+s+s1}{Not Given}\PY{l+s+s1}{\PYZsq{}}

\PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{fillna}\PY{p}{(}\PY{n}{values}\PY{p}{,} \
PY{n}{inplace}\PY{o}{=}\PY{k+kc}{True}\PY{p}{)}
\PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}{fillna}\PY{p}{(}\PY{n}{values}\PY{p}{,} \PY{n}
{inplace}\PY{o}{=}\PY{k+kc}{True}\PY{p}{)}

\PY{n}{categorical\PYZus{}features} \PY{o}{=} \PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}


{select\PYZus{}dtypes}\PY{p}{(}\PY{n}{exclude}\PY{o}{=}\PY{p}{[}\PY{l+s+s1}{\
PYZsq{}}\PY{l+s+s1}{int64}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{float64}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{)}\PY{o}{.}\PY{n}{columns}\
PY{o}{.}\PY{n}{tolist}\PY{p}{(}\PY{p}{)}

\PY{n}{encoder} \PY{o}{=} \PY{n}{OneHotEncoder}\PY{p}{(}\PY{n}{handle\


PYZus{}unknown}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{ignore}\PY{l+s+s1}{\
PYZsq{}}\PY{p}{,} \PY{n}{sparse}\PY{o}{=}\PY{k+kc}{False}\PY{p}{)}

\PY{n}{X\PYZus{}train\PYZus{}encoded} \PY{o}{=} \PY{n}{encoder}\PY{o}{.}\PY{n}{fit\


PYZus{}transform}\PY{p}{(}\PY{n}{X\PYZus{}train}\PY{p}{[}\PY{n}{categorical\
PYZus{}features}\PY{p}{]}\PY{p}{)}
\PY{n}{X\PYZus{}test\PYZus{}encoded} \PY{o}{=} \PY{n}{encoder}\PY{o}{.}\PY{n}
{transform}\PY{p}{(}\PY{n}{X\PYZus{}test}\PY{p}{[}\PY{n}{categorical\
PYZus{}features}\PY{p}{]}\PY{p}{)}

\PY{n}{encoded\PYZus{}columns} \PY{o}{=} \PY{n}{encoder}\PY{o}{.}\PY{n}{get\


PYZus{}feature\PYZus{}names\PYZus{}out}\PY{p}{(}\PY{n}{categorical\
PYZus{}features}\PY{p}{)}
\PY{n}{X\PYZus{}train\PYZus{}encoded\PYZus{}df} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}
{DataFrame}\PY{p}{(}\PY{n}{X\PYZus{}train\PYZus{}encoded}\PY{p}{,} \PY{n}{columns}\
PY{o}{=}\PY{n}{encoded\PYZus{}columns}\PY{p}{,} \PY{n}{index}\PY{o}{=}\PY{n}{X\
PYZus{}train}\PY{o}{.}\PY{n}{index}\PY{p}{)}
\PY{n}{X\PYZus{}test\PYZus{}encoded\PYZus{}df} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}
{DataFrame}\PY{p}{(}\PY{n}{X\PYZus{}test\PYZus{}encoded}\PY{p}{,} \PY{n}{columns}\
PY{o}{=}\PY{n}{encoded\PYZus{}columns}\PY{p}{,} \PY{n}{index}\PY{o}{=}\PY{n}{X\
PYZus{}test}\PY{o}{.}\PY{n}{index}\PY{p}{)}

\PY{n}{X\PYZus{}train} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}{concat}\PY{p}{(}\PY{p}


{[}\PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{drop}\PY{p}{(}\PY{n}{columns}\PY{o}{=}\
PY{n}{categorical\PYZus{}features}\PY{p}{)}\PY{p}{,} \PY{n}{X\PYZus{}train\
PYZus{}encoded\PYZus{}df}\PY{p}{]}\PY{p}{,} \PY{n}{axis}\PY{o}{=}\PY{l+m+mi}{1}\
PY{p}{)}
\PY{n}{X\PYZus{}test} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}{concat}\PY{p}{(}\PY{p}
{[}\PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}{drop}\PY{p}{(}\PY{n}{columns}\PY{o}{=}\
PY{n}{categorical\PYZus{}features}\PY{p}{)}\PY{p}{,} \PY{n}{X\PYZus{}test\
PYZus{}encoded\PYZus{}df}\PY{p}{]}\PY{p}{,} \PY{n}{axis}\PY{o}{=}\PY{l+m+mi}{1}\
PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Processed training data


shape:}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{shape}\
PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Processed test data
shape:}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}{shape}\
PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]
Missing values in each column:
ID 0
Previous Scores (out of 10) 0
Sample Question Papers Practiced 0
Hours Studied 0
Hours Prepared for Exams 0
Hours Free time 0
Sleep Hours 0
Hours Not Sleep 0
Extracurricular Activities 0
Has a car 66
Sex 0
Height 0
Weight 0
BMI 0
Eye color 26
Nails length 0
Favorite Subject 95
Least Favorite Subject 86
dtype: int64
Processed training data shape: (800, 44)
Processed test data shape: (200, 44)
\end{Verbatim}

Train linear regression without regularization, compute MSE on train and


test data.

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{8}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{model} \PY{o}{=} \PY{n}{LinearRegression}\PY{p}{(}\PY{p}{)}

\PY{n}{model}\PY{o}{.}\PY{n}{fit}\PY{p}{(}\PY{n}{X\PYZus{}train}\PY{p}{,} \PY{n}{y\
PYZus{}train}\PY{p}{)}

\PY{n}{y\PYZus{}train\PYZus{}pred} \PY{o}{=} \PY{n}{model}\PY{o}{.}\PY{n}{predict}\


PY{p}{(}\PY{n}{X\PYZus{}train}\PY{p}{)}

\PY{n}{y\PYZus{}test\PYZus{}pred} \PY{o}{=} \PY{n}{model}\PY{o}{.}\PY{n}{predict}\


PY{p}{(}\PY{n}{X\PYZus{}test}\PY{p}{)}

\PY{n}{mse\PYZus{}train} \PY{o}{=} \PY{n}{mean\PYZus{}squared\PYZus{}error}\PY{p}


{(}\PY{n}{y\PYZus{}train}\PY{p}{,} \PY{n}{y\PYZus{}train\PYZus{}pred}\PY{p}{)}
\PY{n}{mse\PYZus{}test} \PY{o}{=} \PY{n}{mean\PYZus{}squared\PYZus{}error}\PY{p}
{(}\PY{n}{y\PYZus{}test}\PY{p}{,} \PY{n}{y\PYZus{}test\PYZus{}pred}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Mean Squared Error


(Train):}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{mse\PYZus{}train}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Mean Squared Error
(Test):}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{mse\PYZus{}test}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]
Mean Squared Error (Train): 33.41315001919449
Mean Squared Error (Test): 35.01524646715072
\end{Verbatim}

\section{Task 3 (3 points)}\label{task-3-3-points}

Investigate the coefficients of the model. Why are they so big/small?


Can the absolute value of the coefficitnt be used as a measure of
importance of the feature? Why/why not?

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{9}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{coefficients} \PY{o}{=} \PY{n}{model}\PY{o}{.}\PY{n}{coef\PYZus{}}
\PY{n}{features} \PY{o}{=} \PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{columns}

\PY{n}{coef\PYZus{}df} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}{DataFrame}\PY{p}{(}\


PY{p}{\PYZob{}}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \PY{n}
{features}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \
PY{n}{coefficients}
\PY{p}{\PYZcb{}}\PY{p}{)}\PY{o}{.}\PY{n}{sort\PYZus{}values}\PY{p}{(}\PY{n}{by}\
PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,}
\PY{n}{key}\PY{o}{=}\PY{n+nb}{abs}\PY{p}{,} \PY{n}{ascending}\PY{o}{=}\PY{k+kc}
{False}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{n}{coef\PYZus{}df}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{12}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{barplot}\PY{p}{(}\PY{n}{data}\PY{o}{=}\PY{n}{coef\
PYZus{}df}\PY{p}{,} \PY{n}{x}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature
Coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xticks}\PY{p}{(}\PY{n}{rotation}\PY{o}{=}\PY{l+m+mi}
{45}\PY{p}{,} \PY{n}{ha}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{right}\PY{l+s+s1}
{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]
Feature Coefficient
11 Nails length 75.753324
1 Previous Scores (out of 10) 8.819939
8 Height 4.329989
3 Hours Studied 2.815551
28 Favorite Subject\_Data Science 2.512030
20 Eye color\_blue 2.293019
33 Favorite Subject\_Not Given -0.962370
34 Favorite Subject\_PE -0.915451
19 Eye color\_Not Given -0.760727
24 Eye color\_hazel -0.725891
31 Favorite Subject\_Machine Learning -0.723537
27 Favorite Subject\_Biology 0.634076
29 Favorite Subject\_Drama -0.616945
42 Least Favorite Subject\_Not Given -0.570334
23 Eye color\_green -0.553014
35 Least Favorite Subject\_Algebra 0.485285
12 Extracurricular Activities\_No -0.443115
13 Extracurricular Activities\_Yes 0.443115
10 BMI 0.427057
16 Has a car\_Yes 0.424814
30 Favorite Subject\_History 0.390032
14 Has a car\_No -0.389230
18 Sex\_m 0.388419
17 Sex\_f -0.388419
36 Least Favorite Subject\_Art 0.312078
39 Least Favorite Subject\_Drama 0.261711
21 Eye color\_brown -0.249533
37 Least Favorite Subject\_Biology -0.211375
32 Favorite Subject\_Music -0.199302
9 Weight -0.176340
38 Least Favorite Subject\_Data Science -0.142514
25 Favorite Subject\_Algebra -0.129790
41 Least Favorite Subject\_Machine Learning -0.099963
6 Sleep Hours 0.097429
7 Hours Not Sleep -0.097429
2 Sample Question Papers Practiced 0.052788
43 Least Favorite Subject\_PE -0.043824
15 Has a car\_Not Given -0.035584
4 Hours Prepared for Exams -0.020286
5 Hours Free time -0.019722
26 Favorite Subject\_Art 0.011258
40 Least Favorite Subject\_History 0.008937
22 Eye color\_gray -0.003854
0 ID 0.001049
\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_21_1.png}
\end{center}
{ \hspace*{\fill} \\}

\textbf{Answer} \#YOUR ANSWER

Draw conclusions about the model and important features and explain if
it is logical. If it cannot be done right now, tackle the problem.

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{10}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{scaler} \PY{o}{=} \PY{n}{StandardScaler}\PY{p}{(}\PY{p}{)}
\PY{n}{X\PYZus{}train\PYZus{}scaled} \PY{o}{=} \PY{n}{scaler}\PY{o}{.}\PY{n}{fit\
PYZus{}transform}\PY{p}{(}\PY{n}{X\PYZus{}train}\PY{p}{)}
\PY{n}{X\PYZus{}test\PYZus{}scaled} \PY{o}{=} \PY{n}{scaler}\PY{o}{.}\PY{n}
{transform}\PY{p}{(}\PY{n}{X\PYZus{}test}\PY{p}{)}

\PY{n}{model\PYZus{}scaled} \PY{o}{=} \PY{n}{LinearRegression}\PY{p}{(}\PY{p}{)}


\PY{n}{model\PYZus{}scaled}\PY{o}{.}\PY{n}{fit}\PY{p}{(}\PY{n}{X\PYZus{}train\
PYZus{}scaled}\PY{p}{,} \PY{n}{y\PYZus{}train}\PY{p}{)}

\PY{n}{y\PYZus{}train\PYZus{}pred\PYZus{}scaled} \PY{o}{=} \PY{n}{model\


PYZus{}scaled}\PY{o}{.}\PY{n}{predict}\PY{p}{(}\PY{n}{X\PYZus{}train\
PYZus{}scaled}\PY{p}{)}
\PY{n}{y\PYZus{}test\PYZus{}pred\PYZus{}scaled} \PY{o}{=} \PY{n}{model\
PYZus{}scaled}\PY{o}{.}\PY{n}{predict}\PY{p}{(}\PY{n}{X\PYZus{}test\PYZus{}scaled}\
PY{p}{)}

\PY{n}{mse\PYZus{}train\PYZus{}scaled} \PY{o}{=} \PY{n}{mean\PYZus{}squared\


PYZus{}error}\PY{p}{(}\PY{n}{y\PYZus{}train}\PY{p}{,} \PY{n}{y\PYZus{}train\
PYZus{}pred\PYZus{}scaled}\PY{p}{)}
\PY{n}{mse\PYZus{}test\PYZus{}scaled} \PY{o}{=} \PY{n}{mean\PYZus{}squared\
PYZus{}error}\PY{p}{(}\PY{n}{y\PYZus{}test}\PY{p}{,} \PY{n}{y\PYZus{}test\
PYZus{}pred\PYZus{}scaled}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{MSE on Scaled Data


(Train):}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{mse\PYZus{}train\PYZus{}scaled}\
PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{MSE on Scaled Data
(Test):}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{mse\PYZus{}test\PYZus{}scaled}\PY{p}
{)}

\PY{n}{scaled\PYZus{}coefficients} \PY{o}{=} \PY{n}{model\PYZus{}scaled}\PY{o}{.}\


PY{n}{coef\PYZus{}}
\PY{n}{features} \PY{o}{=} \PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{columns}

\PY{n}{scaled\PYZus{}coef\PYZus{}df} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}


{DataFrame}\PY{p}{(}\PY{p}{\PYZob{}}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \PY{n}
{features}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \
PY{n}{scaled\PYZus{}coefficients}
\PY{p}{\PYZcb{}}\PY{p}{)}\PY{o}{.}\PY{n}{sort\PYZus{}values}\PY{p}{(}\PY{n}{by}\
PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,}
\PY{n}{key}\PY{o}{=}\PY{n+nb}{abs}\PY{p}{,} \PY{n}{ascending}\PY{o}{=}\PY{k+kc}
{False}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Feature Importance
(Scaled Data):}\PY{l+s+se}{\PYZbs{}n}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{scaled\
PYZus{}coef\PYZus{}df}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{12}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{barplot}\PY{p}{(}\PY{n}{data}\PY{o}{=}\PY{n}{scaled\
PYZus{}coef\PYZus{}df}\PY{p}{,} \PY{n}{x}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature
Coefficients (Standardized Data)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xticks}\PY{p}{(}\PY{n}{rotation}\PY{o}{=}\PY{l+m+mi}
{45}\PY{p}{,} \PY{n}{ha}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{right}\PY{l+s+s1}
{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]
MSE on Scaled Data (Train): 34.35796646723679
MSE on Scaled Data (Test): 35.92801779595547
Feature Importance (Scaled Data):
Feature Coefficient
21 Eye color\_brown 6.325281e+14
22 Eye color\_gray 5.495992e+14
23 Eye color\_green 3.918485e+14
24 Eye color\_hazel 3.173848e+14
18 Sex\_m 2.668008e+14
17 Sex\_f 2.668008e+14
20 Eye color\_blue 2.478994e+14
14 Has a car\_No 2.340483e+14
7 Hours Not Sleep -2.307394e+14
6 Sleep Hours -2.307394e+14
19 Eye color\_Not Given 2.243246e+14
26 Favorite Subject\_Art -2.233820e+14
32 Favorite Subject\_Music -1.946857e+14
33 Favorite Subject\_Not Given -1.929421e+14
16 Has a car\_Yes 1.920359e+14
34 Favorite Subject\_PE -1.828381e+14
31 Favorite Subject\_Machine Learning -1.779310e+14
29 Favorite Subject\_Drama -1.728044e+14
25 Favorite Subject\_Algebra -1.674379e+14
30 Favorite Subject\_History -1.606477e+14
27 Favorite Subject\_Biology -1.582915e+14
15 Has a car\_Not Given 1.577493e+14
28 Favorite Subject\_Data Science -1.345094e+14
43 Least Favorite Subject\_PE -5.697428e+13
37 Least Favorite Subject\_Biology -5.625066e+13
36 Least Favorite Subject\_Art -5.344811e+13
39 Least Favorite Subject\_Drama -5.344811e+13
38 Least Favorite Subject\_Data Science -5.318087e+13
35 Least Favorite Subject\_Algebra -5.318087e+13
42 Least Favorite Subject\_Not Given -5.291141e+13
41 Least Favorite Subject\_Machine Learning -5.236573e+13
40 Least Favorite Subject\_History -5.096033e+13
13 Extracurricular Activities\_Yes 8.938172e+12
12 Extracurricular Activities\_No 8.938172e+12
1 Previous Scores (out of 10) 1.623020e+01
3 Hours Studied 6.757968e+00
10 BMI -1.453314e+00
8 Height -1.175614e+00
9 Weight 8.328092e-01
0 ID 3.044399e-01
5 Hours Free time -2.577608e-01
2 Sample Question Papers Practiced 1.949572e-01
4 Hours Prepared for Exams 1.259778e-01
11 Nails length -9.631292e-02
\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_24_1.png}
\end{center}
{ \hspace*{\fill} \\}

\textbf{Answer} \#YOUR ANSWER

\section{Task 3 (3 points)}\label{task-3-3-points}

For different \texttt{alpha} hyperparameter fit linear regression with


\(L_1\) regularization (\texttt{Lasso} model, if there are warnings -
set \texttt{tol=0.1}).

Choose the grid in such way that the smallest alpha will result in 0-5
zero coefficent and for biggest - all coefficient will be 0.

For every model save number of zero coefficients and MSE on train and
test data

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{11}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{alphas} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{logspace}\PY{p}{(}\PY{o}{\
PYZhy{}}\PY{l+m+mi}{4}\PY{p}{,} \PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{10}\PY{p}{)} \
PY{c+c1}{\PYZsh{} Test alpha values from 10\PYZca{}\PYZhy{}4 to 10\PYZca{}1}

\PY{n}{results} \PY{o}{=} \PY{p}{[}\PY{p}{]}

\PY{k}{for} \PY{n}{alpha} \PY{o+ow}{in} \PY{n}{alphas}\PY{p}{:}


\PY{n}{lasso\PYZus{}model} \PY{o}{=} \PY{n}{Lasso}\PY{p}{(}\PY{n}{alpha}\PY{o}
{=}\PY{n}{alpha}\PY{p}{,} \PY{n}{tol}\PY{o}{=}\PY{l+m+mf}{0.1}\PY{p}{,} \PY{n}
{random\PYZus{}state}\PY{o}{=}\PY{l+m+mi}{42}\PY{p}{)}

\PY{n}{lasso\PYZus{}model}\PY{o}{.}\PY{n}{fit}\PY{p}{(}\PY{n}{X\PYZus{}train\
PYZus{}scaled}\PY{p}{,} \PY{n}{y\PYZus{}train}\PY{p}{)}

\PY{n}{zero\PYZus{}coefficients} \PY{o}{=} \PY{n}{np}\PY{o}{.}\PY{n}{sum}\PY{p}


{(}\PY{n}{lasso\PYZus{}model}\PY{o}{.}\PY{n}{coef\PYZus{}} \PY{o}{==} \PY{l+m+mi}
{0}\PY{p}{)}

\PY{n}{y\PYZus{}train\PYZus{}pred\PYZus{}lasso} \PY{o}{=} \PY{n}{lasso\


PYZus{}model}\PY{o}{.}\PY{n}{predict}\PY{p}{(}\PY{n}{X\PYZus{}train\PYZus{}scaled}\
PY{p}{)}
\PY{n}{y\PYZus{}test\PYZus{}pred\PYZus{}lasso} \PY{o}{=} \PY{n}{lasso\
PYZus{}model}\PY{o}{.}\PY{n}{predict}\PY{p}{(}\PY{n}{X\PYZus{}test\PYZus{}scaled}\
PY{p}{)}

\PY{n}{mse\PYZus{}train\PYZus{}lasso} \PY{o}{=} \PY{n}{mean\PYZus{}squared\


PYZus{}error}\PY{p}{(}\PY{n}{y\PYZus{}train}\PY{p}{,} \PY{n}{y\PYZus{}train\
PYZus{}pred\PYZus{}lasso}\PY{p}{)}
\PY{n}{mse\PYZus{}test\PYZus{}lasso} \PY{o}{=} \PY{n}{mean\PYZus{}squared\
PYZus{}error}\PY{p}{(}\PY{n}{y\PYZus{}test}\PY{p}{,} \PY{n}{y\PYZus{}test\
PYZus{}pred\PYZus{}lasso}\PY{p}{)}

\PY{n}{results}\PY{o}{.}\PY{n}{append}\PY{p}{(}\PY{p}{\PYZob{}}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \
PY{n}{alpha}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{zero\PYZus{}coefficients}\PY{l+s+s1}{\
PYZsq{}}\PY{p}{:} \PY{n}{zero\PYZus{}coefficients}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}train}\PY{l+s+s1}{\PYZsq{}}\
PY{p}{:} \PY{n}{mse\PYZus{}train\PYZus{}lasso}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}test}\PY{l+s+s1}{\PYZsq{}}\
PY{p}{:} \PY{n}{mse\PYZus{}test\PYZus{}lasso}
\PY{p}{\PYZcb{}}\PY{p}{)}

\PY{n}{results\PYZus{}df} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}{DataFrame}\PY{p}{(}\


PY{n}{results}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{12}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{2}\
PY{p}{,} \PY{l+m+mi}{1}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{zero\
PYZus{}coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{marker}\PY{o}
{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{d}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Number
of Zero Coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Zero
Coefficients vs Alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{2}\
PY{p}{,} \PY{l+m+mi}{2}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\
PYZus{}train}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{label}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Train MSE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}
{marker}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{d}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}test}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{label}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{Test MSE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{marker}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{d}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Mean
Squared Error}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{legend}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{MSE vs
Alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{Verbatim}[commandchars=\\\{\}]
alpha zero\_coefficients mse\_train mse\_test
0 0.000100 1 33.460394 34.933565
1 0.000359 1 33.460624 34.932772
2 0.001292 2 33.461489 34.929961
3 0.004642 2 33.465088 34.922845
4 0.016681 6 33.481958 34.914884
5 0.059948 10 33.606476 35.039053
6 0.215443 30 34.317761 35.629144
7 0.774264 41 36.453629 37.818053
8 2.782559 42 51.849926 52.343639
9 10.000000 43 188.207827 187.842796
\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_28_1.png}
\end{center}
{ \hspace*{\fill} \\}

Draw 2 plots near each other: number of zero coefficients and MSE (both
train and test on one plot). Draw conclusions: compare the MSE of Lasso
and initial linear regression, investigate the importance of features in
the new model (which are non-zero?).

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{12}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{14}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{2}\
PY{p}{,} \PY{l+m+mi}{1}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{zero\
PYZus{}coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{marker}\PY{o}
{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{s}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{color}\
PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{black}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,}\PY{n}
{label}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Zero Coefficients}\PY{l+s+s1}{\
PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Number
of Zero Coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Zero
Coefficients vs Alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{legend}\PY{p}{(}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{2}\
PY{p}{,} \PY{l+m+mi}{2}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\
PYZus{}train}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{label}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Train MSE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}
{marker}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{s}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}test}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{label}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{Test MSE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{marker}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{\PYZca{}}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Mean
Squared Error}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{legend}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{MSE vs
Alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}

\PY{n}{best\PYZus{}model\PYZus{}index} \PY{o}{=} \PY{n}{results\PYZus{}df}\PY{p}


{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}test}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\
PY{o}{.}\PY{n}{idxmin}\PY{p}{(}\PY{p}{)}
\PY{n}{best\PYZus{}alpha} \PY{o}{=} \PY{n}{results\PYZus{}df}\PY{o}{.}\PY{n}{loc}\
PY{p}{[}\PY{n}{best\PYZus{}model\PYZus{}index}\PY{p}{,} \PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}

\PY{n}{best\PYZus{}lasso\PYZus{}model} \PY{o}{=} \PY{n}{Lasso}\PY{p}{(}\PY{n}


{alpha}\PY{o}{=}\PY{n}{best\PYZus{}alpha}\PY{p}{,} \PY{n}{tol}\PY{o}{=}\PY{l+m+mf}
{0.1}\PY{p}{,} \PY{n}{random\PYZus{}state}\PY{o}{=}\PY{l+m+mi}{42}\PY{p}{)}
\PY{n}{best\PYZus{}lasso\PYZus{}model}\PY{o}{.}\PY{n}{fit}\PY{p}{(}\PY{n}{X\
PYZus{}train\PYZus{}scaled}\PY{p}{,} \PY{n}{y\PYZus{}train}\PY{p}{)}

\PY{n}{non\PYZus{}zero\PYZus{}features} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}


{DataFrame}\PY{p}{(}\PY{p}{\PYZob{}}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \PY{n}
{features}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \
PY{n}{best\PYZus{}lasso\PYZus{}model}\PY{o}{.}\PY{n}{coef\PYZus{}}
\PY{p}{\PYZcb{}}\PY{p}{)}\PY{o}{.}\PY{n}{query}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\
PY{l+s+s2}{Coefficient != 0}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}\PY{o}{.}\PY{n}{sort\
PYZus{}values}\PY{p}{(}\PY{n}{by}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{key}\PY{o}{=}\PY{n+nb}{abs}\
PY{p}{,} \PY{n}{ascending}\PY{o}{=}\PY{k+kc}{False}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Non\PYZhy{}Zero
Coefficients in the Best Lasso Model:}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{n}{non\PYZus{}zero\PYZus{}features}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{10}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{barplot}\PY{p}{(}\PY{n}{data}\PY{o}{=}\PY{n}{non\
PYZus{}zero\PYZus{}features}\PY{p}{,} \PY{n}{x}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{l+s+s1}
{\PYZsq{}}\PY{l+s+s1}{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{orient}\PY{o}
{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{h}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Non\
PYZhy{}Zero Coefficients in Best Lasso Model}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient Value}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_30_0.png}
\end{center}
{ \hspace*{\fill} \\}

\begin{Verbatim}[commandchars=\\\{\}]
Non-Zero Coefficients in the Best Lasso Model:
Feature Coefficient
1 Previous Scores (out of 10) 1.608457e+01
3 Hours Studied 7.066704e+00
28 Favorite Subject\_Data Science 6.560467e-01
20 Eye color\_blue 4.739343e-01
8 Height -4.367537e-01
12 Extracurricular Activities\_No -4.173775e-01
14 Has a car\_No -2.813193e-01
6 Sleep Hours 2.718085e-01
27 Favorite Subject\_Biology 2.579085e-01
0 ID 2.450867e-01
17 Sex\_f -2.358383e-01
30 Favorite Subject\_History 2.185682e-01
11 Nails length 1.903384e-01
9 Weight -1.700829e-01
33 Favorite Subject\_Not Given -1.643066e-01
34 Favorite Subject\_PE -1.598870e-01
35 Least Favorite Subject\_Algebra 1.401083e-01
42 Least Favorite Subject\_Not Given -1.366785e-01
5 Hours Free time -1.256126e-01
2 Sample Question Papers Practiced 1.245590e-01
26 Favorite Subject\_Art 1.130772e-01
36 Least Favorite Subject\_Art 9.661969e-02
22 Eye color\_gray 9.209113e-02
24 Eye color\_hazel -9.055465e-02
39 Least Favorite Subject\_Drama 8.245955e-02
16 Has a car\_Yes 7.531293e-02
31 Favorite Subject\_Machine Learning -6.733776e-02
23 Eye color\_green -6.458070e-02
15 Has a car\_Not Given -5.750441e-02
19 Eye color\_Not Given -5.274793e-02
32 Favorite Subject\_Music 5.083011e-02
29 Favorite Subject\_Drama -4.350958e-02
37 Least Favorite Subject\_Biology -3.196163e-02
25 Favorite Subject\_Algebra 1.900236e-02
40 Least Favorite Subject\_History 1.515391e-02
4 Hours Prepared for Exams 6.509990e-03
38 Least Favorite Subject\_Data Science -3.606735e-03
13 Extracurricular Activities\_Yes 7.327472e-17
\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_30_2.png}
\end{center}
{ \hspace*{\fill} \\}
\textbf{Answer} \#YOUR ANSWER

\section{Task 4 (2 points)}\label{task-4-2-points}

Now, use \(L_2\) regularization. Tune hyperparameters of \texttt{Ridge}


model using cross-validation. Report the best MSE on test data.

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{13}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{params} \PY{o}{=} \PY{p}{\PYZob{}}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \PY{n}{np}\PY{o}{.}\PY{n}{logspace}\PY{p}{(}\PY{o}{\
PYZhy{}}\PY{l+m+mi}{5}\PY{p}{,} \PY{l+m+mi}{5}\PY{p}{,} \PY{l+m+mi}{50}\PY{p}{)}\
PY{p}{\PYZcb{}}

\PY{n}{ridge\PYZus{}model} \PY{o}{=} \PY{n}{Ridge}\PY{p}{(}\PY{p}{)}

\PY{n}{grid\PYZus{}search} \PY{o}{=} \PY{n}{GridSearchCV}\PY{p}{(}\PY{n}


{estimator}\PY{o}{=}\PY{n}{ridge\PYZus{}model}\PY{p}{,} \PY{n}{param\PYZus{}grid}\
PY{o}{=}\PY{n}{params}\PY{p}{,} \PY{n}{scoring}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{neg\PYZus{}mean\PYZus{}squared\PYZus{}error}\PY{l+s+s1}{\PYZsq{}}\PY{p}
{,} \PY{n}{cv}\PY{o}{=}\PY{l+m+mi}{5}\PY{p}{,} \PY{n}{verbose}\PY{o}{=}\PY{l+m+mi}
{1}\PY{p}{)}

\PY{n}{grid\PYZus{}search}\PY{o}{.}\PY{n}{fit}\PY{p}{(}\PY{n}{X\PYZus{}train\
PYZus{}scaled}\PY{p}{,} \PY{n}{y\PYZus{}train}\PY{p}{)}

\PY{n}{best\PYZus{}ridge\PYZus{}model} \PY{o}{=} \PY{n}{grid\PYZus{}search}\PY{o}


{.}\PY{n}{best\PYZus{}estimator\PYZus{}}
\PY{n}{best\PYZus{}alpha} \PY{o}{=} \PY{n}{grid\PYZus{}search}\PY{o}{.}\PY{n}{best\
PYZus{}params\PYZus{}}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\
PYZsq{}}\PY{p}{]}

\PY{n}{y\PYZus{}test\PYZus{}pred\PYZus{}ridge} \PY{o}{=} \PY{n}{best\PYZus{}ridge\


PYZus{}model}\PY{o}{.}\PY{n}{predict}\PY{p}{(}\PY{n}{X\PYZus{}test\PYZus{}scaled}\
PY{p}{)}

\PY{n}{mse\PYZus{}test\PYZus{}ridge} \PY{o}{=} \PY{n}{mean\PYZus{}squared\


PYZus{}error}\PY{p}{(}\PY{n}{y\PYZus{}test}\PY{p}{,} \PY{n}{y\PYZus{}test\
PYZus{}pred\PYZus{}ridge}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Best alpha:
}\PY{l+s+si}{\PYZob{}}\PY{n}{best\PYZus{}alpha}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\
PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Best Test
MSE (Ridge): }\PY{l+s+si}{\PYZob{}}\PY{n}{mse\PYZus{}test\PYZus{}ridge}\PY{l+s+si}
{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}

\PY{n}{cv\PYZus{}results} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}{DataFrame}\PY{p}{(}\


PY{n}{grid\PYZus{}search}\PY{o}{.}\PY{n}{cv\PYZus{}results\PYZus{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{10}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{cv\PYZus{}results}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{param\PYZus{}alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\
PY{p}{,} \PY{o}{\PYZhy{}}\PY{n}{cv\PYZus{}results}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{mean\PYZus{}test\PYZus{}score}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{marker}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{d}\PY{l+s+s1}{\PYZsq{}}\
PY{p}{,} \PY{n}{color}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{green}\PY{l+s+s1}{\
PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Mean
Cross\PYZhy{}Validated MSE (negative)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Ridge
Cross\PYZhy{}Validation Results}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Best alpha: 2.0235896477251556
Best Test MSE (Ridge): 34.976535731861425
\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_34_1.png}
\end{center}
{ \hspace*{\fill} \\}

Investigate coefficients: how many zeros? Which features are important?

\begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at


break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{14}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{ridge\PYZus{}coefficients} \PY{o}{=} \PY{n}{best\PYZus{}ridge\PYZus{}model}\
PY{o}{.}\PY{n}{coef\PYZus{}}

\PY{n}{ridge\PYZus{}coef\PYZus{}df} \PY{o}{=} \PY{n}{pd}\PY{o}{.}\PY{n}{DataFrame}\


PY{p}{(}\PY{p}{\PYZob{}}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \PY{n}
{features}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \
PY{n}{ridge\PYZus{}coefficients}
\PY{p}{\PYZcb{}}\PY{p}{)}\PY{o}{.}\PY{n}{sort\PYZus{}values}\PY{p}{(}\PY{n}{by}\
PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,}
\PY{n}{key}\PY{o}{=}\PY{n+nb}{abs}\PY{p}{,} \PY{n}{ascending}\PY{o}{=}\PY{k+kc}
{False}\PY{p}{)}

\PY{n}{near\PYZus{}zero\PYZus{}count} \PY{o}{=} \PY{p}{(}\PY{n}{np}\PY{o}{.}\PY{n}


{abs}\PY{p}{(}\PY{n}{ridge\PYZus{}coefficients}\PY{p}{)} \PY{o}{\PYZlt{}} \
PY{l+m+mf}{1e\PYZhy{}6}\PY{p}{)}\PY{o}{.}\PY{n}{sum}\PY{p}{(}\PY{p}{)}

\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Number of
coefficients close to zero: }\PY{l+s+si}{\PYZob{}}\PY{n}{near\PYZus{}zero\
PYZus{}count}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Most Important Features
in Ridge Regression:}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{n}{ridge\PYZus{}coef\PYZus{}df}\PY{o}{.}\PY{n}{head}\
PY{p}{(}\PY{p}{)}\PY{p}{)}

\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{12}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{barplot}\PY{p}{(}\PY{n}{data}\PY{o}{=}\PY{n}{ridge\
PYZus{}coef\PYZus{}df}\PY{p}{,} \PY{n}{x}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{l+s+s1}{\
PYZsq{}}\PY{l+s+s1}{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{orient}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{h}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Ridge
Regression Coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient Value}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{Verbatim}[commandchars=\\\{\}]
Number of coefficients close to zero: 0
Most Important Features in Ridge Regression:
Feature Coefficient
1 Previous Scores (out of 10) 16.132697
3 Hours Studied 6.988131
9 Weight -1.094029
10 BMI 1.079102
28 Favorite Subject\_Data Science 0.604166
\end{Verbatim}

\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_36_1.png}
\end{center}
{ \hspace*{\fill} \\}

\textbf{Answer} \#YOUR ANSWER

% Add a bibliography block to the postdoc

\end{document}

You might also like