
X = df[['total_bill']].values
y = df['tip'].values
model = LinearRegression()
model.fit(X, y)
print(f"Slope: model.coef_[0]:.3f, Intercept: model.intercept_:.3f")
print(f"R²: model.score(X, y):.3f")
df.head()
df.info()
df.describe(percentiles=[.01, .05, .25, .5, .75, .95, .99])
Antes de modelar, debemos sentir los datos. La estadÃstica descriptiva resume la historia central de una variable.
from statsmodels.stats.proportion import proportion_confint
successes = (df['sex'] == 'Male').sum()
n = len(df)
ci_prop = proportion_confint(successes, n, alpha=0.05, method='wilson')
print(f"Proportion of males CI: ci_prop")
# Generate & test normality
sample = np.random.normal(loc=0, scale=1, size=1000)
stats.normaltest(sample) # p > 0.05 → normal
df['high_tip'] = (df['tip'] > df['tip'].median()).astype(int)
X = df[['total_bill', 'size']].values
y = df['high_tip'].values
log_model = LogisticRegression()
log_model.fit(X, y)
print(f"Accuracy: log_model.score(X, y):.3f")