Cheat Sheet Tutorial

Download as pdf or txt
Download as pdf or txt
You are on page 1of 2

Loading data from PyCaret's repository (regression) (classification) * model: **plot= set_config() Natural Language Processing

Cheat sheet # loading data from pycaret


from pycaret.datasets import get_data
'omp'
'br'
'gpc'
'mlp
'naïve'
'grand_means'
'ts'
'cv'
save_config()
load_config()
# set up environment
from pycaret.nlp import *
PyCaret is an open source, low-code machine learning data = get_data('dataset_name') 'ard' 'ridge' 'snaive' 'acf' get_clusters() clf1 = setup(data=df, target='colunm')
library in Python that allows you to go preparing your 'par' 'rf' 'polytrend' 'acf' # create and evaluate model
data to deploying your model within minutes in Loading data using Pandas 'ransac' 'qda' 'arima' 'pacf' * model: **plot= model = create_model(*)
your choice of notebook environment # importing pandas 'tr' 'ada' 'exp_smooth' 'decomp_stl' ‘kmeans’ 'cluster' model_df = assign_model(*)
import pandas as pd 'huber' 'gbc' 'ets' 'diagnostics' ‘ap’ 'tsne' plot_model(model, plot=**)
Installing PyCaret df = pd.read_csv(r'dir/file_name.csv') 'kr' 'lda' 'theta' 'forecast' ‘meanshift’ 'elbow' evaluate_model(model)
# install pycaret 'svm' 'et' 'tbats' 'insample' ‘sc’ 'silhouette' model_tuned = tune_model(model=model,
pip install pycaret Supervised Learning 'knn' 'xgboost' 'bats' 'residuals' ‘hclust’ 'distance' supervised_target='column')
# install full version of pycaret 'dt' 'lightgbm' 'prophet' 'train_test_split' ‘dbscan’ 'distribution' # model deployment
pip install pycaret[full] Regression and Classification 'rf' 'catboost' 'lr_cds_dt' 'decomp_classical' ‘optics’ save_model(model, 'saved_model')
# install pycaret time series module # set up environment 'et' 'en_cds' ‘birch’ model_loaded = load_model('saved_model')
pip install pycaret-ts-alpha from pycaret.regression import * 'gbr' ‘ridge_cds_dt’ ‘kmodes’ # utils
from pycaret.classification import * 'mlp' ‘lasso_cds_dt’ pull()
PyCaret on GPU clf1 = setup(data = df, target='column') 'xgboost' ‘lar_cds_dt’ Anomaly Detection models()
# uninstall lightgbm CPU # create and evaluate model 'lightgbm' ‘llar_cds_dt’ # set up environment get_logs()
pip uninsstall lightgbm –y compare_models() 'catboost' ‘br_cds_dt’ from pycaret.anomaly import * get_config()
# install lightgbm GPU model = create_model(*) **plot= ‘huber_cds_dt’ clf1 = setup(data = df) set_config()
pip install lightgbm --install-option=--gpu model_tuned = tune_model(model) 'residuals_interactive' 'residuals' 'error' ‘par_cds_dt’ # create and evaluate model get_topics()
--install-option="--opencl-include-dir=/usr/ ens_model = ensemble_model(model, method=***) 'cooks' 'rfe' 'learning' 'vc' 'manifold' ‘omp_cds_dt’ model = create_model(*) * model:
/local/include/" --install-option="--opencl- blender = blend_models(top3) 'feature' 'feature_all' 'parameter' 'tree' ‘knn_cds_dt’ model_df = assign_model(*) 'lda' 'lsi' 'hdp' 'rp' 'nmf'
library=usr/local/cuda/lib64/libOpenCL.so" stacker = stack_models(top3) *** method= ‘dt_cds_dt’ plot_model(model, plot=**) 'frequency' 'distribution' 'bigram'
plot_model(model, plot=**) 'bagging' 'boosting' ‘rf_cds_dt’ evaluate_model(model) 'trigram' 'sentiment' 'pos' 'tsne'
Run PyCaret on a Docker Container evaluate_model(model) (1) classification only ‘et_cds_dt’ model_tuned = tune_model(model=model, 'topic_model' 'topic_distribution'
FROM python:3.7-slim interpret_model(model) ‘gbr_cds_dt’ supervised_target='column') 'wordcloud' 'umap'
WORKDIR /app (1) calibrate_model() Time Series Analysis ‘ada_cds_dt’ # make predictions ** plot=
ADD . /app (1) optimize_threshold() # set up environment ‘lightgbm_cds_dt’ df1 = predict_model(model=model, data=df) 'tsne' 'umap'
RUN apt-get update && apt-get install libgomp1 # make predictions from pycaret.time_series import * # model deployment
RUN pip install --trusted-host pypi.python.org df1 = predict_model(model=model, data=df) exp = setup(data = df, fh = 12) Unsupervised Learning save_model(model, 'saved_model') Association Rule
-r requirements.txt # model deployment # create and evaluate model model_loaded = load_model('saved_model') # set up environment
deploy_model(model=model,
compare_models()
CMD pytest # replace it with your entry point final_model = finalize_model(model) Clustering model_name=model_final, from pycaret.arules import *
save_model(model, 'saved_model') model = create_model(*) # set up environment platform = 'aws', authentication = {'bucket : clf1 = setup(data=df, transaction_id='colunm',
PyCaret Tutorials model_loaded = load_model('saved_model') model_tuned = tune_model(model) from pycaret.clustering import * 'S3-bucket-name'}) item_id='column')
deploy_model(model=model,
blender = blend_models(top3)
Classification model_name=model_final, clf1 = setup(data = df) # utils # create and evaluate model
Binary classification (Beginner) platform = 'aws', authentication = {'bucket : plot_model(model, plot=**) # create and evaluate model pull() model = create_model()
Binary classification (Intermediate) 'S3-bucket-name'}) final_model = finalize_model(model) model = create_model(*) models() plot_model(model, plot='2d')
Multiclass classification (Beginner) # utils # make predictions model_df = assign_model(*) get_metrics()
pull() pred_holdout = predict_model(arima) plot_model(model, plot=**) add_metric() Other Resources
Regression models() pred_unseen = predict_model(finalize_model( evaluate_model(model) remove_metric()
Regression (Beginner) get_metrics() arima), fh=24) model_tuned = tune_model(model=model, get_logs() PyCaret Github
Regression (Intermediate) add_metric() # model deployment supervised_target = 'column_name') get_config() PyCaert Slack
remove_metric() final_model = finalize_model(model) # make predictions set_config() Example Notebooks made by contributors
Clustering get_logs() save_model(model, 'saved_model') df1 = predict_model(model=model, data = df) save_config() Blog tutorials
Clustering (Beginner) get_config() model_loaded = load_model('saved_model') # model deployment load_config() Documentation 'The detailed API docs of PyCaret'
deploy_model(model=model,
set_config() get_clusters()
model_name=model_final, save_model(model, 'saved_model') Video Tutorials
Anomaly detection save_config() platform = 'aws', authentication = {'bucket : model_loaded = load_model('saved_model') * model: **plot= Discussions 'Have questions?'
deploy_model(model=model,
load_config()
Anomaly detection (Beginner) 'S3-bucket-name'}) model_name=model_final, 'abod' 'tsne' Changelog 'Changes and version history'
get_leaderboard() # utils platform = 'aws', authentication = {'bucket : 'cluster' 'umap' Roadmap of PyCaret
Natural Language Processing *model: pull() 'S3-bucket-name'}) 'histogram'
NLP (Beginner) (regression) (classification) models() # utils 'knn'
NLP (Intermediate) 'lr' 'lr' get_metrics() pull() 'lof'
'lasso' 'knn' add_metric() models() 'svm'
Association Rule Mining 'ridge' 'nb' remove_metric() get_metrics() 'pca'
Association Rule Mining (Beginner) 'en' 'dt' get_logs() add_metric() 'mcd'
'lar' 'svm' get_config() remove_metric() 'sod'
Time Series 'llar' 'rbfsvm' set_config() get_logs() 'sos'
Time series and forecasting (Beginner) save_config() get_config()
Parameters of setup() and its default values
pycaret.org
Clustering Anomaly Detection Regression & Classification Time Series
data, data,
preprocess = True, Preprocess = True,
imputation_type = 'simple’, imputation_type = 'simple’,
iterative_imputation_iters = 5, iterative_imputation_iters = 5, data = DataFrame, target = ’column_name’, create_clusters = False, data = [.Series, .DataFrame],
categorical_features = None, categorical_features = None, train_size = 0.7, cluster_iter = 20, preprocess = True,
categorical_imputation = 'mode’, categorical_imputation = 'mode’, test_data = None,
categorical_iterative_imputer = 'lightgbm’, categorical_iterative_imputer = 'lightgbm’, polynomial_features = False, imputation_type = 'simple’,
preprocess = True,
ordinal_features = None, ordinal_features = None, imputation_type = 'simple’,
polynomial_degree = 2, fold_strategy = 'expanding’,
high_cardinality_features = None, high_cardinality_features = None, iterative_imputation_iters = 5, trigonometry_features = False, fold = 3,
high_cardinality_method = 'frequency’, high_cardinality_method = 'frequency’, categorical_features = None, polynomial_threshold = 0.1, fh = 1,
numeric_features = None, numeric_features = None, categorical_imputation = 'constant’, group_features = None, seasonal_period = None,
numeric_imputation = 'mean’, numeric_imputation = 'mean’, categorical_iterative_imputer = 'lightgbm’, group_names = None, enforce_pi = False,
numeric_iterative_imputer = 'lightgbm’, numeric_iterative_imputer = 'lightgbm’, ordinal_features = None, feature_selection = False, n_jobs = -1,
date_features = None, date_features = None, high_cardinality_features = None,
ignore_features = None, ignore_features = None, feature_selection_threshold = 0.8, use_gpu = False,
high_cardinality_method = 'frequency’,
normalize = False, normalize = False, numeric_features = None,
feature_selection_method = 'classic’, custom_pipeline = None,
normalize_method = 'zscore’, normalize_method = 'zscore’, numeric_imputation = 'mean’, feature_interaction = False, html = True,
transformation = False, transformation = False, numeric_iterative_imputer = 'lightgbm’, feature_ratio = False, session_id = None,
transformation_method = 'yeo-johnson’, transformation_method = 'yeo-johnson’, date_features = None, interaction_threshold = 0.01, system_log = True,
handle_unknown_categorical = True, handle_unknown_categorical = True, ignore_features = None, transform_target = False, log_experiment = False,
unknown_categorical_method = 'least_frequent’, unknown_categorical_method = 'least_frequent’, normalize = False, transform_target_method = 'box-cox’, experiment_name = None,
pca = False, pca = False, normalize_method = 'zscore’,
pca_method = 'linear’, pca_method = 'linear’, data_split_shuffle = True, log_plots = False,
transformation = False,
pca_components = None, pca_components = None, transformation_method = 'yeo-johnson’,
data_split_stratify = False, log_profile = False,
ignore_low_variance = False, ignore_low_variance = False, handle_unknown_categorical = True, fold_strategy = 'kfold’, log_data = False,
combine_rare_levels = False, combine_rare_levels = False, unknown_categorical_method = 'least_frequent’, fold = 10, verbose = True,
rare_level_threshold = 0.1, rare_level_threshold = 0.1, pca = False, fold_shuffle = False, profile = False,
bin_numeric_features = None, bin_numeric_features = None, pca_method = 'linear’, fold_groups = None, profile_kwargs = None
remove_multicollinearity = False, remove_multicollinearity = False, pca_components = None, n_jobs = - 1,
multicollinearity_threshold = 0.9, multicollinearity_threshold = 0.9, ignore_low_variance = False, use_gpu = False,
remove_perfect_collinearity = False, remove_perfect_collinearity = False, combine_rare_levels = False, Association Rule
group_features = None, group_features = None, rare_level_threshold = 0.1,
custom_pipeline = None,
group_names = None, group_names = None, bin_numeric_features = None, html = True, data,
n_jobs = -1, n_jobs = - 1, remove_outliers = False, session_id = None, transaction_id =’column_name’,
use_gpu = False, use_gpu = False, outliers_threshold = 0.05, log_experiment = False, item_id = ’column_name’,
custom_pipeline = None, custom_pipeline = None, remove_multicollinearity = False, experiment_name = None, ignore_items = None,
html = True, html = True, multicollinearity_threshold = 0.9, session_id = None
log_plots = False,
session_id = None, session_id = None, remove_perfect_collinearity = True,
system_log = True, system_log = True, log_profile = False,
log_experiment = False, log_experiment = False, log_data = False, NLP
experiment_name = None, experiment_name = None, silent = False,
log_plots = False, log_plots = False, verbose = True, data,
log_profile = False, log_profile = False, profile = False, Target = ’column_name’,
log_data = False, log_data = False, profile_kwargs = None custom_stopwords = None,
silent = False, silent = False, Html = True,
verbose = True, verbose = True, session_id = None,
profile = False, profile = False, log_experiment = False,
profile_kwargs = None profile_kwargs = None experiment_name = None,
log_plots = False,
log_data = False,
Color code Verbose = True
required
optional

You might also like