Hw05 Linear Regression
Hw05 Linear Regression
Hw05 Linear Regression
\usepackage[breakable]{tcolorbox}
\usepackage{parskip} % Stop auto-indenting (to mimic markdown behaviour)
% Basic figure setup, for now with no caption control since it's done
% automatically by Pandoc (which extracts ![](path) syntax from Markdown).
\usepackage{graphicx}
% Maintain compatibility with old templates. Remove in nbconvert 6.0
\let\Oldincludegraphics\includegraphics
% Ensure that by default, figures have no caption (until we provide a
% proper Figure object with a Caption API and a way to capture that
% in the conversion process - todo).
\usepackage{caption}
\DeclareCaptionFormat{nocaption}{}
\captionsetup{format=nocaption,aboveskip=0pt,belowskip=0pt}
\usepackage{float}
\floatplacement{figure}{H} % forces figures to be placed at the correct
location
\usepackage{xcolor} % Allow colors to be defined
\usepackage{enumerate} % Needed for markdown enumerations to work
\usepackage{geometry} % Used to adjust the document margins
\usepackage{amsmath} % Equations
\usepackage{amssymb} % Equations
\usepackage{textcomp} % defines textquotesingle
% Hack from http://tex.stackexchange.com/a/47451/13684:
\AtBeginDocument{%
\def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code
}
\usepackage{upquote} % Upright quotes for verbatim code
\usepackage{eurosym} % defines \euro
\usepackage{iftex}
\ifPDFTeX
\usepackage[T1]{fontenc}
\IfFileExists{alphabeta.sty}{
\usepackage{alphabeta}
}{
\usepackage[mathletters]{ucs}
\usepackage[utf8x]{inputenc}
}
\else
\usepackage{fontspec}
\usepackage{unicode-math}
\fi
% ANSI colors
\definecolor{ansi-black}{HTML}{3E424D}
\definecolor{ansi-black-intense}{HTML}{282C36}
\definecolor{ansi-red}{HTML}{E75C58}
\definecolor{ansi-red-intense}{HTML}{B22B31}
\definecolor{ansi-green}{HTML}{00A250}
\definecolor{ansi-green-intense}{HTML}{007427}
\definecolor{ansi-yellow}{HTML}{DDB62B}
\definecolor{ansi-yellow-intense}{HTML}{B27D12}
\definecolor{ansi-blue}{HTML}{208FFB}
\definecolor{ansi-blue-intense}{HTML}{0065CA}
\definecolor{ansi-magenta}{HTML}{D160C4}
\definecolor{ansi-magenta-intense}{HTML}{A03196}
\definecolor{ansi-cyan}{HTML}{60C6C8}
\definecolor{ansi-cyan-intense}{HTML}{258F8F}
\definecolor{ansi-white}{HTML}{C5C1B4}
\definecolor{ansi-white-intense}{HTML}{A1A6B2}
\definecolor{ansi-default-inverse-fg}{HTML}{FFFFFF}
\definecolor{ansi-default-inverse-bg}{HTML}{000000}
% Define a nice break command that doesn't care if a line doesn't already
% exist.
\def\br{\hspace*{\fill} \\* }
% Math Jax compatibility definitions
\def\gt{>}
\def\lt{<}
\let\Oldtex\TeX
\let\Oldlatex\LaTeX
\renewcommand{\TeX}{\textrm{\Oldtex}}
\renewcommand{\LaTeX}{\textrm{\Oldlatex}}
% Document parameters
% Document title
\title{hw05\_linear\_regression}
% Pygments definitions
\makeatletter
\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax%
\let\PY@ul=\relax \let\PY@tc=\relax%
\let\PY@bc=\relax \let\PY@ff=\relax}
\def\PY@tok#1{\csname PY@tok@#1\endcsname}
\def\PY@toks#1+{\ifx\relax#1\empty\else%
\PY@tok{#1}\expandafter\PY@toks\fi}
\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{%
\PY@it{\PY@bf{\PY@ff{#1}}}}}}}
\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}}
\@namedef{PY@tok@w}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
\@namedef{PY@tok@c}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.24,0.48,0.48}
{##1}}}
\@namedef{PY@tok@cp}{\def\PY@tc##1{\textcolor[rgb]{0.61,0.40,0.00}{##1}}}
\@namedef{PY@tok@k}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}
{##1}}}
\@namedef{PY@tok@kp}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kt}{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}}
\@namedef{PY@tok@o}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@ow}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.67,0.13,1.00}{##1}}}
\@namedef{PY@tok@nb}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@nf}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@nc}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@nn}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@ne}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.80,0.25,0.22}{##1}}}
\@namedef{PY@tok@nv}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@no}{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}}
\@namedef{PY@tok@nl}{\def\PY@tc##1{\textcolor[rgb]{0.46,0.46,0.00}{##1}}}
\@namedef{PY@tok@ni}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.44,0.44,0.44}{##1}}}
\@namedef{PY@tok@na}{\def\PY@tc##1{\textcolor[rgb]{0.41,0.47,0.13}{##1}}}
\@namedef{PY@tok@nt}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@nd}{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\@namedef{PY@tok@s}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sd}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@si}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.64,0.35,0.47}{##1}}}
\@namedef{PY@tok@se}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.67,0.36,0.12}{##1}}}
\@namedef{PY@tok@sr}{\def\PY@tc##1{\textcolor[rgb]{0.64,0.35,0.47}{##1}}}
\@namedef{PY@tok@ss}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@sx}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@m}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@gh}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.00,0.50}{##1}}}
\@namedef{PY@tok@gu}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.50,0.00,0.50}{##1}}}
\@namedef{PY@tok@gd}{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
\@namedef{PY@tok@gi}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.52,0.00}{##1}}}
\@namedef{PY@tok@gr}{\def\PY@tc##1{\textcolor[rgb]{0.89,0.00,0.00}{##1}}}
\@namedef{PY@tok@ge}{\let\PY@it=\textit}
\@namedef{PY@tok@gs}{\let\PY@bf=\textbf}
\@namedef{PY@tok@gp}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.00,0.50}{##1}}}
\@namedef{PY@tok@go}{\def\PY@tc##1{\textcolor[rgb]{0.44,0.44,0.44}{##1}}}
\@namedef{PY@tok@gt}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
\@namedef{PY@tok@err}{\def\PY@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}\
fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}}
\@namedef{PY@tok@kc}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kd}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kn}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kr}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]
{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@bp}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@fm}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@vc}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vg}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vi}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vm}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@sa}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sb}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sc}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@dl}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@s2}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sh}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@s1}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@mb}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mf}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mh}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mi}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@il}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mo}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@ch}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\@namedef{PY@tok@cm}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\@namedef{PY@tok@cpf}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\@namedef{PY@tok@c1}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\@namedef{PY@tok@cs}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]
{0.24,0.48,0.48}{##1}}}
\def\PYZbs{\char`\\}
\def\PYZus{\char`\_}
\def\PYZob{\char`\{}
\def\PYZcb{\char`\}}
\def\PYZca{\char`\^}
\def\PYZam{\char`\&}
\def\PYZlt{\char`\<}
\def\PYZgt{\char`\>}
\def\PYZsh{\char`\#}
\def\PYZpc{\char`\%}
\def\PYZdl{\char`\$}
\def\PYZhy{\char`\-}
\def\PYZsq{\char`\'}
\def\PYZdq{\char`\"}
\def\PYZti{\char`\~}
% for compatibility with earlier versions
\def\PYZat{@}
\def\PYZlb{[}
\def\PYZrb{]}
\makeatother
\let\OriginalVerbatim=\Verbatim
\makeatletter
\renewcommand{\Verbatim}[1][1]{%
%\parskip\z@skip
\sbox\Wrappedcontinuationbox {\Wrappedcontinuationsymbol}%
\sbox\Wrappedvisiblespacebox {\FV@SetupFont\Wrappedvisiblespace}%
\def\FancyVerbFormatLine ##1{\hsize\linewidth
\vtop{\raggedright\hyphenpenalty\z@\exhyphenpenalty\z@
\doublehyphendemerits\z@\finalhyphendemerits\z@
\strut ##1\strut}%
}%
% If the linebreak is at a space, the latter will be displayed as visible
% space at end of first line, and a continuation symbol starts next line.
% Stretch/shrink are however usually zero for typewriter font.
\def\FV@Space {%
\nobreak\hskip\z@ plus\fontdimen3\font minus\fontdimen4\font
\discretionary{\copy\Wrappedvisiblespacebox}{\Wrappedafterbreak}
{\kern\fontdimen2\font}%
}%
% prompt
\makeatletter
\newcommand{\boxspacing}{\kern\kvtcb@left@rule\kern\kvtcb@boxsep}
\makeatother
\newcommand{\prompt}[4]{
{\ttfamily\llap{{\color{#2}[#3]:\hspace{3pt}#4}}\vspace{-\baselineskip}}
}
\geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
\begin{document}
\maketitle
\PY{n}{warnings}\PY{o}{.}\PY{n}{filterwarnings}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\
PY{l+s+s2}{ignore}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{category}\PY{o}{=}\PY{n+ne}
{FutureWarning}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{Verbatim}[commandchars=\\\{\}]
'wget' is not recognized as an internal or external command,
operable program or batch file.
\end{Verbatim}
\PY{n}{df}\PY{o}{.}\PY{n}{sample}\PY{p}{(}\PY{l+m+mi}{4}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
Hours Prepared for Exams Hours Free time Sleep Hours Hours Not Sleep \
textbackslash{}
964 3 10 5 19
15 3 8 6 18
933 4 4 7 17
785 3 9 6 18
\section{Task 1 (2 points)}\label{task-1-2-points}
Let's try to propose hypothesis, which features will be the most useful.
\begin{itemize}
\tightlist
\item
Draw a plot of \(x^j\) and target (scatter plot, if feature is
numerical and boxplot if the feature is categorical)
\item
Compute correlation of \(x^j\) and target if the feature is numerical.
\end{itemize}
\PY{k}{if} \PY{n}{X}\PY{p}{[}\PY{n}{column}\PY{p}{]}\PY{o}{.}\PY{n}
{dtype} \PY{o+ow}{in} \PY{p}{[}\PY{n}{np}\PY{o}{.}\PY{n}{float64}\PY{p}{,} \PY{n}
{np}\PY{o}{.}\PY{n}{int64}\PY{p}{]}\PY{p}{:}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\
PY{p}{(}\PY{l+m+mi}{8}\PY{p}{,} \PY{l+m+mi}{5}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{scatterplot}\PY{p}{(}\PY{n}{x}\PY{o}{=}\
PY{n}{X}\PY{p}{[}\PY{n}{column}\PY{p}{]}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{n}{y}\PY{p}
{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\
PYZdq{}}\PY{l+s+s2}{Scatter Plot: }\PY{l+s+si}{\PYZob{}}\PY{n}{column}\PY{l+s+si}{\
PYZcb{}}\PY{l+s+s2}{ vs Performance Index}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{n}{column}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\
PY{l+s+s2}{Performance Index}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\PY{n}{analyze\PYZus{}features}\PY{p}{(}\PY{n}{X}\PY{p}{,} \PY{n}{y}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{Verbatim}[commandchars=\\\{\}]
Analyzing feature: ID
\end{Verbatim}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_1.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: -0.03
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_3.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.86
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_5.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.01
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_7.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.32
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_9.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.30
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_11.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: -0.24
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_13.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.05
Analyzing feature: Hours Not Sleep
\end{Verbatim}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_15.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: -0.05
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_17.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_19.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_21.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_23.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.05
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_25.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: nan
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_27.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: nan
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_29.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_31.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Correlation with Performance Index: 0.04
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_33.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_10_35.png}
\end{center}
{ \hspace*{\fill} \\}
\section{Task 2 (3 points)}\label{task-2-3-points}
Split data into train and test. Set
\texttt{test\_size=0.3,\ random\_state=0}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{X\
PYZus{}train shape: }\PY{l+s+si}{\PYZob{}}\PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}
{shape}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{X\
PYZus{}test shape: }\PY{l+s+si}{\PYZob{}}\PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}
{shape}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{y\
PYZus{}train shape: }\PY{l+s+si}{\PYZob{}}\PY{n}{y\PYZus{}train}\PY{o}{.}\PY{n}
{shape}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{y\
PYZus{}test shape: }\PY{l+s+si}{\PYZob{}}\PY{n}{y\PYZus{}test}\PY{o}{.}\PY{n}
{shape}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{Verbatim}[commandchars=\\\{\}]
X\_train shape: (700, 18)
X\_test shape: (300, 18)
y\_train shape: (700,)
y\_test shape: (300,)
\end{Verbatim}
\begin{itemize}
\tightlist
\item
Depict number of missing values in every column, replace missing
values with median, computed on train data (numerical features) or
with `Not Given' (categorical features)
\item
Encode the categorical with one-hot-encoder
\end{itemize}
\PY{n}{X\PYZus{}train}\PY{o}{.}\PY{n}{fillna}\PY{p}{(}\PY{n}{values}\PY{p}{,} \
PY{n}{inplace}\PY{o}{=}\PY{k+kc}{True}\PY{p}{)}
\PY{n}{X\PYZus{}test}\PY{o}{.}\PY{n}{fillna}\PY{p}{(}\PY{n}{values}\PY{p}{,} \PY{n}
{inplace}\PY{o}{=}\PY{k+kc}{True}\PY{p}{)}
\begin{Verbatim}[commandchars=\\\{\}]
Missing values in each column:
ID 0
Previous Scores (out of 10) 0
Sample Question Papers Practiced 0
Hours Studied 0
Hours Prepared for Exams 0
Hours Free time 0
Sleep Hours 0
Hours Not Sleep 0
Extracurricular Activities 0
Has a car 66
Sex 0
Height 0
Weight 0
BMI 0
Eye color 26
Nails length 0
Favorite Subject 95
Least Favorite Subject 86
dtype: int64
Processed training data shape: (800, 44)
Processed test data shape: (200, 44)
\end{Verbatim}
\PY{n}{model}\PY{o}{.}\PY{n}{fit}\PY{p}{(}\PY{n}{X\PYZus{}train}\PY{p}{,} \PY{n}{y\
PYZus{}train}\PY{p}{)}
\begin{Verbatim}[commandchars=\\\{\}]
Mean Squared Error (Train): 33.41315001919449
Mean Squared Error (Test): 35.01524646715072
\end{Verbatim}
\section{Task 3 (3 points)}\label{task-3-3-points}
\PY{n+nb}{print}\PY{p}{(}\PY{n}{coef\PYZus{}df}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{12}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{barplot}\PY{p}{(}\PY{n}{data}\PY{o}{=}\PY{n}{coef\
PYZus{}df}\PY{p}{,} \PY{n}{x}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature
Coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xticks}\PY{p}{(}\PY{n}{rotation}\PY{o}{=}\PY{l+m+mi}
{45}\PY{p}{,} \PY{n}{ha}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{right}\PY{l+s+s1}
{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{Verbatim}[commandchars=\\\{\}]
Feature Coefficient
11 Nails length 75.753324
1 Previous Scores (out of 10) 8.819939
8 Height 4.329989
3 Hours Studied 2.815551
28 Favorite Subject\_Data Science 2.512030
20 Eye color\_blue 2.293019
33 Favorite Subject\_Not Given -0.962370
34 Favorite Subject\_PE -0.915451
19 Eye color\_Not Given -0.760727
24 Eye color\_hazel -0.725891
31 Favorite Subject\_Machine Learning -0.723537
27 Favorite Subject\_Biology 0.634076
29 Favorite Subject\_Drama -0.616945
42 Least Favorite Subject\_Not Given -0.570334
23 Eye color\_green -0.553014
35 Least Favorite Subject\_Algebra 0.485285
12 Extracurricular Activities\_No -0.443115
13 Extracurricular Activities\_Yes 0.443115
10 BMI 0.427057
16 Has a car\_Yes 0.424814
30 Favorite Subject\_History 0.390032
14 Has a car\_No -0.389230
18 Sex\_m 0.388419
17 Sex\_f -0.388419
36 Least Favorite Subject\_Art 0.312078
39 Least Favorite Subject\_Drama 0.261711
21 Eye color\_brown -0.249533
37 Least Favorite Subject\_Biology -0.211375
32 Favorite Subject\_Music -0.199302
9 Weight -0.176340
38 Least Favorite Subject\_Data Science -0.142514
25 Favorite Subject\_Algebra -0.129790
41 Least Favorite Subject\_Machine Learning -0.099963
6 Sleep Hours 0.097429
7 Hours Not Sleep -0.097429
2 Sample Question Papers Practiced 0.052788
43 Least Favorite Subject\_PE -0.043824
15 Has a car\_Not Given -0.035584
4 Hours Prepared for Exams -0.020286
5 Hours Free time -0.019722
26 Favorite Subject\_Art 0.011258
40 Least Favorite Subject\_History 0.008937
22 Eye color\_gray -0.003854
0 ID 0.001049
\end{Verbatim}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_21_1.png}
\end{center}
{ \hspace*{\fill} \\}
Draw conclusions about the model and important features and explain if
it is logical. If it cannot be done right now, tackle the problem.
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Feature Importance
(Scaled Data):}\PY{l+s+se}{\PYZbs{}n}\PY{l+s+s2}{\PYZdq{}}\PY{p}{,} \PY{n}{scaled\
PYZus{}coef\PYZus{}df}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{12}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{barplot}\PY{p}{(}\PY{n}{data}\PY{o}{=}\PY{n}{scaled\
PYZus{}coef\PYZus{}df}\PY{p}{,} \PY{n}{x}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Feature
Coefficients (Standardized Data)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xticks}\PY{p}{(}\PY{n}{rotation}\PY{o}{=}\PY{l+m+mi}
{45}\PY{p}{,} \PY{n}{ha}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{right}\PY{l+s+s1}
{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{Verbatim}[commandchars=\\\{\}]
MSE on Scaled Data (Train): 34.35796646723679
MSE on Scaled Data (Test): 35.92801779595547
Feature Importance (Scaled Data):
Feature Coefficient
21 Eye color\_brown 6.325281e+14
22 Eye color\_gray 5.495992e+14
23 Eye color\_green 3.918485e+14
24 Eye color\_hazel 3.173848e+14
18 Sex\_m 2.668008e+14
17 Sex\_f 2.668008e+14
20 Eye color\_blue 2.478994e+14
14 Has a car\_No 2.340483e+14
7 Hours Not Sleep -2.307394e+14
6 Sleep Hours -2.307394e+14
19 Eye color\_Not Given 2.243246e+14
26 Favorite Subject\_Art -2.233820e+14
32 Favorite Subject\_Music -1.946857e+14
33 Favorite Subject\_Not Given -1.929421e+14
16 Has a car\_Yes 1.920359e+14
34 Favorite Subject\_PE -1.828381e+14
31 Favorite Subject\_Machine Learning -1.779310e+14
29 Favorite Subject\_Drama -1.728044e+14
25 Favorite Subject\_Algebra -1.674379e+14
30 Favorite Subject\_History -1.606477e+14
27 Favorite Subject\_Biology -1.582915e+14
15 Has a car\_Not Given 1.577493e+14
28 Favorite Subject\_Data Science -1.345094e+14
43 Least Favorite Subject\_PE -5.697428e+13
37 Least Favorite Subject\_Biology -5.625066e+13
36 Least Favorite Subject\_Art -5.344811e+13
39 Least Favorite Subject\_Drama -5.344811e+13
38 Least Favorite Subject\_Data Science -5.318087e+13
35 Least Favorite Subject\_Algebra -5.318087e+13
42 Least Favorite Subject\_Not Given -5.291141e+13
41 Least Favorite Subject\_Machine Learning -5.236573e+13
40 Least Favorite Subject\_History -5.096033e+13
13 Extracurricular Activities\_Yes 8.938172e+12
12 Extracurricular Activities\_No 8.938172e+12
1 Previous Scores (out of 10) 1.623020e+01
3 Hours Studied 6.757968e+00
10 BMI -1.453314e+00
8 Height -1.175614e+00
9 Weight 8.328092e-01
0 ID 3.044399e-01
5 Hours Free time -2.577608e-01
2 Sample Question Papers Practiced 1.949572e-01
4 Hours Prepared for Exams 1.259778e-01
11 Nails length -9.631292e-02
\end{Verbatim}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_24_1.png}
\end{center}
{ \hspace*{\fill} \\}
\section{Task 3 (3 points)}\label{task-3-3-points}
Choose the grid in such way that the smallest alpha will result in 0-5
zero coefficent and for biggest - all coefficient will be 0.
For every model save number of zero coefficients and MSE on train and
test data
\PY{n}{lasso\PYZus{}model}\PY{o}{.}\PY{n}{fit}\PY{p}{(}\PY{n}{X\PYZus{}train\
PYZus{}scaled}\PY{p}{,} \PY{n}{y\PYZus{}train}\PY{p}{)}
\PY{n}{results}\PY{o}{.}\PY{n}{append}\PY{p}{(}\PY{p}{\PYZob{}}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{:} \
PY{n}{alpha}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{zero\PYZus{}coefficients}\PY{l+s+s1}{\
PYZsq{}}\PY{p}{:} \PY{n}{zero\PYZus{}coefficients}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}train}\PY{l+s+s1}{\PYZsq{}}\
PY{p}{:} \PY{n}{mse\PYZus{}train\PYZus{}lasso}\PY{p}{,}
\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}test}\PY{l+s+s1}{\PYZsq{}}\
PY{p}{:} \PY{n}{mse\PYZus{}test\PYZus{}lasso}
\PY{p}{\PYZcb{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{12}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{2}\
PY{p}{,} \PY{l+m+mi}{1}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{zero\
PYZus{}coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{marker}\PY{o}
{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{d}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Number
of Zero Coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Zero
Coefficients vs Alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{2}\
PY{p}{,} \PY{l+m+mi}{2}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\
PYZus{}train}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{label}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Train MSE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}
{marker}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{d}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}test}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{label}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{Test MSE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{marker}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{d}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Mean
Squared Error}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{legend}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{MSE vs
Alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{Verbatim}[commandchars=\\\{\}]
alpha zero\_coefficients mse\_train mse\_test
0 0.000100 1 33.460394 34.933565
1 0.000359 1 33.460624 34.932772
2 0.001292 2 33.461489 34.929961
3 0.004642 2 33.465088 34.922845
4 0.016681 6 33.481958 34.914884
5 0.059948 10 33.606476 35.039053
6 0.215443 30 34.317761 35.629144
7 0.774264 41 36.453629 37.818053
8 2.782559 42 51.849926 52.343639
9 10.000000 43 188.207827 187.842796
\end{Verbatim}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_28_1.png}
\end{center}
{ \hspace*{\fill} \\}
Draw 2 plots near each other: number of zero coefficients and MSE (both
train and test on one plot). Draw conclusions: compare the MSE of Lasso
and initial linear regression, investigate the importance of features in
the new model (which are non-zero?).
\PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{2}\
PY{p}{,} \PY{l+m+mi}{1}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{zero\
PYZus{}coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{marker}\PY{o}
{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{s}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{color}\
PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{black}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,}\PY{n}
{label}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Zero Coefficients}\PY{l+s+s1}{\
PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Number
of Zero Coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Zero
Coefficients vs Alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{legend}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{subplot}\PY{p}{(}\PY{l+m+mi}{1}\PY{p}{,} \PY{l+m+mi}{2}\
PY{p}{,} \PY{l+m+mi}{2}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\
PYZus{}train}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{label}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Train MSE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}
{marker}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{s}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{plot}\PY{p}{(}\PY{n}{results\PYZus{}df}\PY{p}{[}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \
PY{n}{results\PYZus{}df}\PY{p}{[}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{mse\PYZus{}test}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{]}\PY{p}{,} \PY{n}{label}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{Test MSE}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{marker}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{\PYZca{}}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xscale}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{log}\
PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Alpha
(log scale)}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Mean
Squared Error}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{legend}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{MSE vs
Alpha}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Non\PYZhy{}Zero
Coefficients in the Best Lasso Model:}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{n}{non\PYZus{}zero\PYZus{}features}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{10}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{barplot}\PY{p}{(}\PY{n}{data}\PY{o}{=}\PY{n}{non\
PYZus{}zero\PYZus{}features}\PY{p}{,} \PY{n}{x}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\
PY{l+s+s1}{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{l+s+s1}
{\PYZsq{}}\PY{l+s+s1}{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{orient}\PY{o}
{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{h}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Non\
PYZhy{}Zero Coefficients in Best Lasso Model}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient Value}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_30_0.png}
\end{center}
{ \hspace*{\fill} \\}
\begin{Verbatim}[commandchars=\\\{\}]
Non-Zero Coefficients in the Best Lasso Model:
Feature Coefficient
1 Previous Scores (out of 10) 1.608457e+01
3 Hours Studied 7.066704e+00
28 Favorite Subject\_Data Science 6.560467e-01
20 Eye color\_blue 4.739343e-01
8 Height -4.367537e-01
12 Extracurricular Activities\_No -4.173775e-01
14 Has a car\_No -2.813193e-01
6 Sleep Hours 2.718085e-01
27 Favorite Subject\_Biology 2.579085e-01
0 ID 2.450867e-01
17 Sex\_f -2.358383e-01
30 Favorite Subject\_History 2.185682e-01
11 Nails length 1.903384e-01
9 Weight -1.700829e-01
33 Favorite Subject\_Not Given -1.643066e-01
34 Favorite Subject\_PE -1.598870e-01
35 Least Favorite Subject\_Algebra 1.401083e-01
42 Least Favorite Subject\_Not Given -1.366785e-01
5 Hours Free time -1.256126e-01
2 Sample Question Papers Practiced 1.245590e-01
26 Favorite Subject\_Art 1.130772e-01
36 Least Favorite Subject\_Art 9.661969e-02
22 Eye color\_gray 9.209113e-02
24 Eye color\_hazel -9.055465e-02
39 Least Favorite Subject\_Drama 8.245955e-02
16 Has a car\_Yes 7.531293e-02
31 Favorite Subject\_Machine Learning -6.733776e-02
23 Eye color\_green -6.458070e-02
15 Has a car\_Not Given -5.750441e-02
19 Eye color\_Not Given -5.274793e-02
32 Favorite Subject\_Music 5.083011e-02
29 Favorite Subject\_Drama -4.350958e-02
37 Least Favorite Subject\_Biology -3.196163e-02
25 Favorite Subject\_Algebra 1.900236e-02
40 Least Favorite Subject\_History 1.515391e-02
4 Hours Prepared for Exams 6.509990e-03
38 Least Favorite Subject\_Data Science -3.606735e-03
13 Extracurricular Activities\_Yes 7.327472e-17
\end{Verbatim}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_30_2.png}
\end{center}
{ \hspace*{\fill} \\}
\textbf{Answer} \#YOUR ANSWER
\section{Task 4 (2 points)}\label{task-4-2-points}
\PY{n}{grid\PYZus{}search}\PY{o}{.}\PY{n}{fit}\PY{p}{(}\PY{n}{X\PYZus{}train\
PYZus{}scaled}\PY{p}{,} \PY{n}{y\PYZus{}train}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Best alpha:
}\PY{l+s+si}{\PYZob{}}\PY{n}{best\PYZus{}alpha}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\
PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Best Test
MSE (Ridge): }\PY{l+s+si}{\PYZob{}}\PY{n}{mse\PYZus{}test\PYZus{}ridge}\PY{l+s+si}
{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\begin{Verbatim}[commandchars=\\\{\}]
Fitting 5 folds for each of 50 candidates, totalling 250 fits
Best alpha: 2.0235896477251556
Best Test MSE (Ridge): 34.976535731861425
\end{Verbatim}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_34_1.png}
\end{center}
{ \hspace*{\fill} \\}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+sa}{f}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Number of
coefficients close to zero: }\PY{l+s+si}{\PYZob{}}\PY{n}{near\PYZus{}zero\
PYZus{}count}\PY{l+s+si}{\PYZcb{}}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{l+s+s2}{\PYZdq{}}\PY{l+s+s2}{Most Important Features
in Ridge Regression:}\PY{l+s+s2}{\PYZdq{}}\PY{p}{)}
\PY{n+nb}{print}\PY{p}{(}\PY{n}{ridge\PYZus{}coef\PYZus{}df}\PY{o}{.}\PY{n}{head}\
PY{p}{(}\PY{p}{)}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{figure}\PY{p}{(}\PY{n}{figsize}\PY{o}{=}\PY{p}{(}\
PY{l+m+mi}{12}\PY{p}{,} \PY{l+m+mi}{6}\PY{p}{)}\PY{p}{)}
\PY{n}{sns}\PY{o}{.}\PY{n}{barplot}\PY{p}{(}\PY{n}{data}\PY{o}{=}\PY{n}{ridge\
PYZus{}coef\PYZus{}df}\PY{p}{,} \PY{n}{x}\PY{o}{=}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{y}\PY{o}{=}\PY{l+s+s1}{\
PYZsq{}}\PY{l+s+s1}{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{,} \PY{n}{orient}\PY{o}{=}\
PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{h}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{title}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}{Ridge
Regression Coefficients}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{xlabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Coefficient Value}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{ylabel}\PY{p}{(}\PY{l+s+s1}{\PYZsq{}}\PY{l+s+s1}
{Feature}\PY{l+s+s1}{\PYZsq{}}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{tight\PYZus{}layout}\PY{p}{(}\PY{p}{)}
\PY{n}{plt}\PY{o}{.}\PY{n}{show}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
\begin{Verbatim}[commandchars=\\\{\}]
Number of coefficients close to zero: 0
Most Important Features in Ridge Regression:
Feature Coefficient
1 Previous Scores (out of 10) 16.132697
3 Hours Studied 6.988131
9 Weight -1.094029
10 BMI 1.079102
28 Favorite Subject\_Data Science 0.604166
\end{Verbatim}
\begin{center}
\adjustimage{max size={0.9\linewidth}{0.9\paperheight}}{output_36_1.png}
\end{center}
{ \hspace*{\fill} \\}
\end{document}