diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ced2b3d07575886000a81dbf5c6c39897d6240ac --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Mooc Scikit-Learn Model + +This repository is used to initialized learners environment by downloading notebooks and datasets from https://github.com/INRIA/scikit-learn-mooc. This is used to et a reset url and to filter out solution notebooks. + +To manually update (using javascript console): + +```js +const baseUrl = "https://github.com/INRIA/scikit-learn-mooc/raw/master/" +let datasets, notebooks; + +fetch('https://api.github.com/repos/INRIA/scikit-learn-mooc/contents/datasets') +.then(function(response) { + return response.json() +}).then(function(data) { + datasets = data.map(item => item.path) + return fetch('https://api.github.com/repos/INRIA/scikit-learn-mooc/contents/notebooks') +}).then(function(response) { + return response.json() +}).then(function(data) { + // Filter out solutions + notebooks = data.map(item => item.path).filter(item => item.indexOf('sol') === -1) + console.log(JSON.stringify({baseUrl, notebooks, datasets}, null, 2)) +}); +``` diff --git a/notebooks.json b/notebooks.json index 8da95487850266f3bdc91bd102e88cbfdeb3a728..346235f66f29493d27c1d13c825d8f3172028f02 100644 --- a/notebooks.json +++ b/notebooks.json @@ -1,36 +1,79 @@ { - "baseUrl": "https://github.com/INRIA/scikit-learn-mooc/raw/master/", - "notebooks": [ - "notebooks/01_tabular_data_exploration.ipynb", - "notebooks/02_numerical_pipeline.ipynb", - "notebooks/02_numerical_pipeline_ex_01.ipynb", - "notebooks/02_numerical_pipeline_scaling.ipynb", - "notebooks/03_categorical_pipeline.ipynb", - "notebooks/03_categorical_pipeline_column_transformer.ipynb", - "notebooks/03_categorical_pipeline_ex_01.ipynb", - "notebooks/03_categorical_pipeline_ex_02.ipynb", - "notebooks/04_parameter_tuning.ipynb", - "notebooks/04_parameter_tuning_ex_01.ipynb", - "notebooks/04_parameter_tuning_ex_02.ipynb", - "notebooks/04_parameter_tuning_search.ipynb", - "notebooks/cross_validation.ipynb", - "notebooks/dev_features_importance.ipynb", - "notebooks/ensemble.ipynb", - "notebooks/feature_selection.ipynb", - "notebooks/linear_models.ipynb", - "notebooks/metrics.ipynb", - "notebooks/trees.ipynb" - ], - "datasets": [ - "datasets/adult-census-numeric-all.csv", - "datasets/adult-census-numeric-test.csv", - "datasets/adult-census-numeric.csv", - "datasets/adult-census.csv", - "datasets/blood_transfusion.csv", - "datasets/cps_85_wages.csv", - "datasets/house_prices.csv", - "datasets/penguins.csv", - "datasets/penguins_classification.csv", - "datasets/penguins_regression.csv" - ] -} \ No newline at end of file + "baseUrl": "https://github.com/INRIA/scikit-learn-mooc/raw/master/", + "notebooks": [ + "notebooks/01_tabular_data_exploration.ipynb", + "notebooks/02_numerical_pipeline_ex_01.ipynb", + "notebooks/02_numerical_pipeline_hands_on.ipynb", + "notebooks/02_numerical_pipeline_introduction.ipynb", + "notebooks/02_numerical_pipeline_scaling.ipynb", + "notebooks/03_categorical_pipeline.ipynb", + "notebooks/03_categorical_pipeline_column_transformer.ipynb", + "notebooks/03_categorical_pipeline_ex_01.ipynb", + "notebooks/03_categorical_pipeline_ex_02.ipynb", + "notebooks/04_parameter_tuning.ipynb", + "notebooks/04_parameter_tuning_ex_01.ipynb", + "notebooks/04_parameter_tuning_ex_02.ipynb", + "notebooks/04_parameter_tuning_search.ipynb", + "notebooks/cross_validation_baseline.ipynb", + "notebooks/cross_validation_ex_01.ipynb", + "notebooks/cross_validation_ex_02.ipynb", + "notebooks/cross_validation_ex_03.ipynb", + "notebooks/cross_validation_ex_04.ipynb", + "notebooks/cross_validation_ex_05.ipynb", + "notebooks/cross_validation_grouping.ipynb", + "notebooks/cross_validation_nested.ipynb", + "notebooks/cross_validation_stratification.ipynb", + "notebooks/cross_validation_time.ipynb", + "notebooks/cross_validation_train_test.ipynb", + "notebooks/dev_features_importance.ipynb", + "notebooks/ensemble_adaboost.ipynb", + "notebooks/ensemble_bagging.ipynb", + "notebooks/ensemble_ex_01.ipynb", + "notebooks/ensemble_ex_02.ipynb", + "notebooks/ensemble_ex_03.ipynb", + "notebooks/ensemble_ex_04.ipynb", + "notebooks/ensemble_ex_05.ipynb", + "notebooks/ensemble_gradient_boosting.ipynb", + "notebooks/ensemble_hist_gradient_boosting.ipynb", + "notebooks/ensemble_hyperparameters.ipynb", + "notebooks/ensemble_introduction.ipynb", + "notebooks/ensemble_random_forest.ipynb", + "notebooks/feature_selection_ex_01.ipynb", + "notebooks/feature_selection_introduction.ipynb", + "notebooks/feature_selection_limitation_model.ipynb", + "notebooks/linear_models_ex_01.ipynb", + "notebooks/linear_models_ex_02.ipynb", + "notebooks/linear_models_ex_03.ipynb", + "notebooks/linear_models_ex_04.ipynb", + "notebooks/linear_models_ex_05.ipynb", + "notebooks/linear_models_regularization.ipynb", + "notebooks/linear_regression_in_sklearn.ipynb", + "notebooks/linear_regression_non_linear_link.ipynb", + "notebooks/linear_regression_without_sklearn.ipynb", + "notebooks/logistic_regression.ipynb", + "notebooks/logistic_regression_non_linear.ipynb", + "notebooks/metrics_classification.ipynb", + "notebooks/metrics_ex_01.ipynb", + "notebooks/metrics_ex_02.ipynb", + "notebooks/metrics_regression.ipynb", + "notebooks/trees_classification.ipynb", + "notebooks/trees_dataset.ipynb", + "notebooks/trees_ex_01.ipynb", + "notebooks/trees_ex_02.ipynb", + "notebooks/trees_hyperparameters.ipynb", + "notebooks/trees_regression.ipynb" + ], + "datasets": [ + "datasets/README.md", + "datasets/adult-census-numeric-all.csv", + "datasets/adult-census-numeric-test.csv", + "datasets/adult-census-numeric.csv", + "datasets/adult-census.csv", + "datasets/blood_transfusion.csv", + "datasets/cps_85_wages.csv", + "datasets/house_prices.csv", + "datasets/penguins.csv", + "datasets/penguins_classification.csv", + "datasets/penguins_regression.csv" + ] +}