我试图对一个相当大的数据集做一个线性回归,在这里我可以得到每个系数的p值。当数据集较小时,这相当简单,但当我在实际数据集上使用它时,一切都会中断。这段代码复制了玩具数据集的问题。我可以做线性回归/
sklearn
statsmodels
对于这个任务,特别是b/c,它处理分类数据的方式。
如何使statsmodels在更复杂的数据集上运行线性模型?
使用OLS、GLM和MixedLM也会发生这种情况。
I even tried setting my recursion limit higher but it did not work...
有几篇文章讨论了这个主题,但是没有一篇涉及到产生递归错误的数据集:
Find p-value (significance) in scikit-learn LinearRegression
https://datascience.stackexchange.com/questions/15398/how-to-get-p-value-and-confident-interval-in-logisticregression-with-sklearn
# Make dataset
from sklearn.datasets import make_regression
import numpy as np
import pandas as pd
X, y = make_regression(n_features = 4000)
X = pd.DataFrame(X,
index=[*map(lambda i:f"sample_{i}", range(X.shape[0]))],
columns=[*map(lambda j:f"attr_{j}", range(X.shape[1]))],
)
y = pd.Series(y,index=X.index)
# X.iloc[:5,:5]
# attr_0 attr_1 attr_2 attr_3 attr_4
# sample_0 -2.077675 -0.222409 -0.782709 1.265239 1.606933
# sample_1 0.040124 -1.427598 -0.595388 0.403271 2.098169
# sample_2 -0.864165 0.465151 0.636452 -0.127071 -0.405423
# sample_3 -1.725911 0.148566 0.343320 -0.351172 1.755546
# sample_4 0.695828 1.313974 1.149156 1.846968 -0.009125
# Import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
data = X.copy()
data["y"] = y
formula = "y ~ " + " + ".join(X.columns)
model = smf.ols(formula=formula, data=data).fit()
# ---------------------------------------------------------------------------
# RecursionError Traceback (most recent call last)
# <ipython-input-11-4479099d07d7> in <module>()
# 24 data["y"] = y
# 25 formula = "y ~ " + " + ".join(X.columns)
# ---> 26 model = smf.ols(formula=formula, data=data)
# ...
# ~/anaconda/envs/python3/lib/python3.6/site-packages/patsy/desc.py in eval(self, tree, require_evalexpr)
# 398 "'%s' operator" % (tree.type,),
# 399 tree.token)
# --> 400 result = self._evaluators[key](self, tree)
# 401 if require_evalexpr and not isinstance(result, IntermediateExpr):
# 402 if isinstance(result, ModelDesc):
# RecursionError: maximum recursion depth exceeded
# https://pastebin.com/JhmqPKp4
另外,我试着修改一些代码
sklearn公司
但我也犯了同样的错误:
# Sklearn method
# https://gist.github.com/rspeare/77061e6e317896be29c6de9a85db301d
from sklearn.linear_model import LinearRegression
class LinearRegression:
"""
Wrapper Class for Logistic Regression which has the usual sklearn instance
in an attribute self.model, and pvalues, z scores and estimated
errors for each coefficient in
self.z_scores
self.p_values
self.sigma_estimates
as well as the negative hessian of the log Likelihood (Fisher information)
self.F_ij
"""
def __init__(self,*args,**kwargs):#,**kwargs):
self.model = LinearRegression(*args,**kwargs)#,**args)
def fit(self,X,y):
self.model.fit(X,y)
#### Get p-values for the fitted model ####
denom = (2.0*(1.0+np.cosh(self.model.decision_function(X))))
F_ij = np.dot((X/denom[:,None]).T,X) ## Fisher Information Matrix
Cramer_Rao = np.linalg.inv(F_ij) ## Inverse Information Matrix
sigma_estimates = np.array([np.sqrt(Cramer_Rao[i,i]) for i in range(Cramer_Rao.shape[0])]) # sigma for each coefficient
z_scores = self.model.coef_[0]/sigma_estimates # z-score for eaach model coefficient
p_values = [stat.norm.sf(abs(x))*2 for x in z_scores] ### two tailed test for p-values
self.z_scores = z_scores
self.p_values = p_values
self.sigma_estimates = sigma_estimates
self.F_ij = F_iJ
model = LinearRegression().fit(X,y)
# RecursionError Traceback (most recent call last)
# <ipython-input-18-6f8d228c181e> in <module>()
# 35 self.F_ij = F_iJ
# 36
# ---> 37 model = LinearRegression().fit(X,y)
# <ipython-input-18-6f8d228c181e> in __init__(self, *args, **kwargs)
# 18
# 19 def __init__(self,*args,**kwargs):#,**kwargs):
# ---> 20 self.model = LinearRegression(*args,**kwargs)#,**args)
# 21
# 22 def fit(self,X,y):
# ... last 1 frames repeated, from the frame below ...
# <ipython-input-18-6f8d228c181e> in __init__(self, *args, **kwargs)
# 18
# 19 def __init__(self,*args,**kwargs):#,**kwargs):
# ---> 20 self.model = LinearRegression(*args,**kwargs)#,**args)
# 21
# 22 def fit(self,X,y):
# RecursionError: maximum recursion depth exceeded