    example_data = {'age': [37,37,27,22,32,22,42,32,37,22], 'target': [0,0,2,0,0,0,0,0,2,0]}
    example_df = pd.DataFrame(data=example_data)




    AttributeError                            Traceback (most recent call last)
    <ipython-input-248-c4a80b9f6e55> in <module>()
    ----> 1 ldc(x=example_data['age'],y=example_data['target'])
    <ipython-input-231-78a770305f24> in ldc(x, y, f, k, s, n)
         29         return np.median(values)
    ---> 31     if len(x.shape) == 1: x = x.values.reshape((-1, 1))
         32     if len(y.shape) == 1: y = y.values.reshape((-1, 1))
    AttributeError: 'list' object has no attribute 'shape'


    Implements the Randomized Dependence Coefficient
    David Lopez-Paz, Philipp Hennig, Bernhard Schoelkopf
    import numpy as np
    from scipy.stats import rankdata
    def rdc(x, y, f=np.sin, k=20, s=1/6., n=1):
        Computes the Randomized Dependence Coefficient
        x,y: numpy arrays 1-D or 2-D
             If 1-D, size (samples,)
             If 2-D, size (samples, variables)
        f:   function to use for random projection
        k:   number of random projections to use
        s:   scale parameter
        n:   number of times to compute the RDC and
             return the median (for stability)
        According to the paper, the coefficient should be relatively insensitive to
        the settings of the f, k, and s parameters.
        if n > 1:
            values = []
            for i in range(n):
                    values.append(rdc(x, y, f, k, s, 1))
                except np.linalg.linalg.LinAlgError: pass
            return np.median(values)
        if len(x.shape) == 1: x = x.values.reshape((-1, 1))
        if len(y.shape) == 1: y = y.values.reshape((-1, 1))
        # Copula Transformation
        cx = np.column_stack([rankdata(xc, method='ordinal') for xc in x.T])/float(x.size)
        cy = np.column_stack([rankdata(yc, method='ordinal') for yc in y.T])/float(y.size)
        # Add a vector of ones so that w.x + b is just a dot product
        O = np.ones(cx.shape[0])
        X = np.column_stack([cx, O])
        Y = np.column_stack([cy, O])
        # Random linear projections
        Rx = (s/X.shape[1])*np.random.randn(X.shape[1], k)
        Ry = (s/Y.shape[1])*np.random.randn(Y.shape[1], k)
        X = np.dot(X, Rx)
        Y = np.dot(Y, Ry)
        # Apply non-linear function to random projections
        fX = f(X)
        fY = f(Y)
        # Compute full covariance matrix
        C = np.cov(np.hstack([fX, fY]).T)
        # Due to numerical issues, if k is too large,
        # then rank(fX) < k or rank(fY) < k, so we need
        # to find the largest k such that the eigenvalues
        # (canonical correlations) are real-valued
        k0 = k
        lb = 1
        ub = k
        while True:
            # Compute canonical correlations
            Cxx = C[:k, :k]
            Cyy = C[k0:k0+k, k0:k0+k]
            Cxy = C[:k, k0:k0+k]
            Cyx = C[k0:k0+k, :k]
            eigs = np.linalg.eigvals(np.dot(np.dot(np.linalg.inv(Cxx), Cxy),
                                            np.dot(np.linalg.inv(Cyy), Cyx)))
            # Binary search if k is too large
            if not (np.all(np.isreal(eigs)) and
                    0 <= np.min(eigs) and
                    np.max(eigs) <= 1):
                ub -= 1
                k = (ub + lb) / 2
            if lb == ub: break
            lb = k
            if ub == lb + 1:
                k = ub
                k = (ub + lb) / 2
        return np.sqrt(np.max(eigs))
    你误会了 example_data 而不是 example_df 在下一行。


