我想了解scikit-learn中各种ML类的源代码。比如,如果我想检查交叉验证模块的类KFold,我该如何检查它?。其他人也是如此。
感谢
您可以在他们的github:上查看它
https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/cross_validation.py
你可以下载它,但你不必
KFold看起来像这个
class KFold(_BaseKFold):
"""K-Folds cross validation iterator.
Provides train/test indices to split data in train test sets. Split
dataset into k consecutive folds (without shuffling).
Each fold is then used a validation set once while the k - 1 remaining
fold form the training set.
Parameters
----------
n : int
Total number of elements.
n_folds : int, default=3
Number of folds. Must be at least 2.
shuffle : boolean, optional
Whether to shuffle the data before splitting into batches.
random_state : None, int or RandomState
Pseudo-random number generator state used for random
sampling. If None, use default numpy RNG for shuffling
Examples
--------
>>> from sklearn import cross_validation
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([1, 2, 3, 4])
>>> kf = cross_validation.KFold(4, n_folds=2)
>>> len(kf)
2
>>> print(kf) # doctest: +NORMALIZE_WHITESPACE
sklearn.cross_validation.KFold(n=4, n_folds=2, shuffle=False,
random_state=None)
>>> for train_index, test_index in kf:
... print("TRAIN:", train_index, "TEST:", test_index)
... X_train, X_test = X[train_index], X[test_index]
... y_train, y_test = y[train_index], y[test_index]
TRAIN: [2 3] TEST: [0 1]
TRAIN: [0 1] TEST: [2 3]
Notes
-----
The first n % n_folds folds have size n // n_folds + 1, other folds have
size n // n_folds.
See also
--------
StratifiedKFold: take label information into account to avoid building
folds with imbalanced class distributions (for binary or multiclass
classification tasks).
"""
def __init__(self, n, n_folds=3, shuffle=False,
random_state=None):
super(KFold, self).__init__(n, n_folds, shuffle, random_state)
self.idxs = np.arange(n)
if shuffle:
rng = check_random_state(self.random_state)
rng.shuffle(self.idxs)
def _iter_test_indices(self):
n = self.n
n_folds = self.n_folds
fold_sizes = (n // n_folds) * np.ones(n_folds, dtype=np.int)
fold_sizes[:n % n_folds] += 1
current = 0
for fold_size in fold_sizes:
start, stop = current, current + fold_size
yield self.idxs[start:stop]
current = stop
def __repr__(self):
return '%s.%s(n=%i, n_folds=%i, shuffle=%s, random_state=%s)' % (
self.__class__.__module__,
self.__class__.__name__,
self.n,
self.n_folds,
self.shuffle,
self.random_state,
)
def __len__(self):
return self.n_folds