%%HTML
<script src="require.js"></script>
import pandas as pd
ab_df = pd.read_pickle("AB_test_data.pkl")
ab_df.head()
dias | fin_semana | click_a | click_b | conversion_a | conversion_b | |
---|---|---|---|---|---|---|
0 | 0 | 1 | 1254 | 780 | 126 | 88 |
1 | 1 | 0 | 1147 | 1011 | 116 | 96 |
2 | 2 | 0 | 678 | 1040 | 67 | 105 |
3 | 3 | 0 | 968 | 932 | 94 | 89 |
4 | 4 | 0 | 899 | 735 | 93 | 71 |
ab_df.describe()
dias | fin_semana | click_a | click_b | conversion_a | conversion_b | |
---|---|---|---|---|---|---|
count | 180.000000 | 180.000000 | 180.000000 | 180.000000 | 180.000000 | 180.000000 |
mean | 89.500000 | 0.272222 | 990.883333 | 1001.366667 | 98.033333 | 102.472222 |
std | 52.105662 | 0.446345 | 187.278369 | 194.521125 | 19.092805 | 21.424006 |
min | 0.000000 | 0.000000 | 542.000000 | 602.000000 | 51.000000 | 59.000000 |
25% | 44.750000 | 0.000000 | 867.000000 | 876.750000 | 86.000000 | 88.000000 |
50% | 89.500000 | 0.000000 | 973.000000 | 979.000000 | 96.000000 | 99.500000 |
75% | 134.250000 | 1.000000 | 1106.250000 | 1143.500000 | 110.000000 | 116.250000 |
max | 179.000000 | 1.000000 | 1449.000000 | 1448.000000 | 148.000000 | 159.000000 |
cols = ab_df.columns[2:6]
total = []
for i in cols:
total.append(ab_df[i].sum())
print(f"Tasa de conversión campaña A:{total[2]/total[0]: 0.4f}")
print(f"Tasa de conversión campaña B:{total[3]/total[1]: 0.4f}")
Tasa de conversión campaña A: 0.0989 Tasa de conversión campaña B: 0.1023
# Los valores anteriores son el total de los 180 dias en los que se experimentaron las dos opciones.
# Podríamos preguntarnos a qué convergen estos valores acumulados a media en que van pasando los dias.
cols = ab_df.columns[2:6]
for c in cols:
name = "accu_" + c
ab_df[name] = ab_df[c].cumsum()
ab_df["accu_cr_a"] = ab_df["accu_conversion_a"]/ab_df["accu_click_a"]
ab_df["accu_cr_b"] = ab_df["accu_conversion_b"]/ab_df["accu_click_b"]
import plotly.express as px
import plotly.io as pio
pio.renderers.default='notebook'
fig = px.line(ab_df, x ="dias", y=["accu_cr_a", "accu_cr_b" ])
fig.update_traces(marker_size=5)
fig.update_xaxes(title_text="Días")
fig.update_yaxes(title_text="Conversion rate")
fig.update_layout(
#font_family="cursive",
font_color="green",
title_font_family= "Times New Roman",
title_font_color="black",
legend_title_font_color="green",
yaxis_zeroline=False, xaxis_zeroline=False,
#esto lo agrege para la linea vertical
showlegend = True,
hovermode = 'x',
spikedistance = -1,
xaxis=dict(showspikes = True, spikemode = 'across', spikesnap = 'cursor', showline=True, showgrid=True,)
)
fig.update_traces(hovertemplate=None) # forma en que aparece la informacion
fig.update_layout(
title={
'text': "Conversion rate: resultados de 180 días de experimento",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'})
fig.update_layout(hovermode="x unified")
#fig.write_html("conversion.html")
fig.show()
import scipy as sp
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
import scipy.stats as stats
# Realizaremos, por ejemplo, un contraste t-student
ab_df["cr_a"] = ab_df["conversion_a"]/ab_df["click_a"]
ab_df["cr_b"] = ab_df["conversion_b"]/ab_df["click_b"]
t_value, t_test_p = stats.ttest_ind(ab_df["cr_a"], ab_df["cr_b"], equal_var=True)
print(f" Varlor del estadístico t: {t_value}")
print(f" P-valor: {t_test_p}")
Varlor del estadístico t: -6.858392067428916 P-valor: 3.0682058663362284e-11
# Alternativa: ajustar la conversion rate al tipo de campaña
import numpy as np
ols_df = ab_df[['dias','cr_a','cr_b']].melt(id_vars='dias')
ols_df["Impacto_Camp_B"] = (ols_df["variable"]=="cr_b").astype(int)
ols_df["intercept_CA"] = np.ones( (ols_df.shape[0],1) )
reg = sm.OLS(endog=ols_df['value'], exog=ols_df[['intercept_CA', 'Impacto_Camp_B']])
results = reg.fit()
print(results.summary())
OLS Regression Results ============================================================================== Dep. Variable: value R-squared: 0.116 Model: OLS Adj. R-squared: 0.114 Method: Least Squares F-statistic: 47.04 Date: Fri, 16 Sep 2022 Prob (F-statistic): 3.07e-11 Time: 13:09:02 Log-Likelihood: 1430.4 No. Observations: 360 AIC: -2857. Df Residuals: 358 BIC: -2849. Df Model: 1 Covariance Type: nonrobust ================================================================================== coef std err t P>|t| [0.025 0.975] ---------------------------------------------------------------------------------- intercept_CA 0.0989 0.000 290.723 0.000 0.098 0.100 Impacto_Camp_B 0.0033 0.000 6.858 0.000 0.002 0.004 ============================================================================== Omnibus: 17.073 Durbin-Watson: 2.028 Prob(Omnibus): 0.000 Jarque-Bera (JB): 18.139 Skew: 0.528 Prob(JB): 0.000115 Kurtosis: 3.304 Cond. No. 2.62 ============================================================================== Notes: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.