Intro to MLOps with vetiver
Posit’s pro products, like Connect
AWS SageMaker (R only, for now)
A public or private cloud, using Docker
Containerized environments for your code
Start with a trained and versioned model
requirements.txt
or renv.lock
app.py
or plumber.R
Start with a trained and versioned model
# # Generated by the vetiver package; edit with care
# start with python base image
FROM python:3.11
# create directory in container for vetiver files
WORKDIR /vetiver
# copy and install requirements
COPY vetiver_requirements.txt /vetiver/requirements.txt
#
RUN pip install --no-cache-dir --upgrade -r /vetiver/requirements.txt
# copy app file
COPY app.py /vetiver/app/app.py
# expose port
EXPOSE 8080
# run vetiver API
CMD ["uvicorn", "app.app:api", "--host", "0.0.0.0", "--port", "8080"]
# Generated by the vetiver package; edit with care
FROM rocker/r-ver:4.4.0
ENV RENV_CONFIG_REPOS_OVERRIDE https://packagemanager.rstudio.com/cran/latest
RUN apt-get update -qq && apt-get install -y --no-install-recommends \
libcurl4-openssl-dev \
libicu-dev \
libsodium-dev \
libssl-dev \
make \
zlib1g-dev \
&& apt-get clean
COPY vetiver_renv.lock renv.lock
RUN Rscript -e "install.packages('renv')"
RUN Rscript -e "renv::restore()"
COPY plumber.R /opt/ml/plumber.R
EXPOSE 8080
ENTRYPOINT ["R", "-e", "pr <- plumber::plumb('/opt/ml/plumber.R'); pr$run(host = '0.0.0.0', port = 8080)"]
import pandas as pd
import numpy as np
from sklearn import model_selection, ensemble
housing = pd.read_parquet('../data/housing.parquet')
np.random.seed(123)
X, y = housing[["bedrooms", "bathrooms", "sqft_living", "yr_built"]], np.log10(housing["price"])
X_train, X_test, y_train, y_test = model_selection.train_test_split(
X, y,
test_size = 0.2
)
housing_fit = ensemble.RandomForestRegressor(n_estimators=200).fit(X_train, y_train)
library(tidyverse)
library(tidymodels)
library(arrow)
path <- here::here("data", "housing.parquet")
housing <- read_parquet(path)
set.seed(123)
housing_split <- housing |>
mutate(price = log10(price)) |>
initial_split(prop = 0.8)
housing_train <- training(housing_split)
housing_test <- testing(housing_split)
housing_fit <-
workflow(
price ~ bedrooms + bathrooms + sqft_living + yr_built,
rand_forest(trees = 200, mode = "regression")
) |>
fit(data = housing_train)
from sklearn import metrics
metric_set = [metrics.root_mean_squared_error, metrics.r2_score, metrics.mean_absolute_error]
y_predictions = pd.Series(housing_fit.predict(X_test))
housing_metrics = pd.DataFrame()
for metric in metric_set:
metric_name = str(metric.__name__)
metric_output = metric(y_test, y_predictions)
housing_metrics = pd.concat(
(
housing_metrics,
pd.DataFrame({"name": [metric_name], "score": [metric_output]}),
),
axis=0,
)
housing_metrics.reset_index(inplace=True, drop=True)
housing_metrics
#> name score
#> 0 root_mean_squared_error 0.155040
#> 1 r2_score 0.534481
#> 2 mean_absolute_error 0.121908
Activity
Compute metrics for your model using the testing data.
Store these metrics as metadata in a vetiver model object.
Write this new vetiver model object as a new version of your pin.
07:00
How do we extract our metrics out to use them?
Activity
Obtain the metrics metadata for your versioned model.
Optional: Redeploy your model to your Connect server, then obtain the metrics metadata for your neighbor’s model by calling the /metadata
endpoint for their API.
What else might you want to store as model metadata?
How or when might you use model metadata?
07:00
# Generated by the vetiver package; edit with care
library(pins)
library(plumber)
library(rapidoc)
library(vetiver)
# Packages needed to generate model predictions
if (FALSE) {
library(parsnip)
library(ranger)
library(workflows)
}
b <- board_connect(auth = "envvar")
v <- vetiver_pin_read(b, "julia.silge/seattle-housing-rstats", version = "117005")
#* @plumber
function(pr) {
pr %>% vetiver_api(v)
}
Activity
Create a Plumber or FastAPI app file to serve your model’s predictions.
Run this app locally and check out the visual documentation again.
05:00
# Generated by the vetiver package; edit with care
library(pins)
library(plumber)
library(rapidoc)
library(vetiver)
# Packages needed to generate model predictions
if (FALSE) {
library(parsnip)
library(ranger)
library(workflows)
}
b <- board_connect(auth = "envvar")
v <- vetiver_pin_read(b, "julia.silge/seattle-housing-rstats", version = "117005")
#* @plumber
function(pr) {
pr %>% vetiver_api(v)
}
# Generated by the vetiver package; edit with care
library(pins)
library(plumber)
library(rapidoc)
library(vetiver)
library(DALEXtra)
library(dplyr)
# Packages needed to generate model predictions
if (FALSE) {
library(parsnip)
library(ranger)
library(workflows)
}
b <- board_connect(auth = "envvar")
v <- vetiver_pin_read(b, "julia.silge/seattle-housing-rstats", version = "117005")
explainer <- pin_read(b, "julia.silge/seattle-shap-rstats")
handler_explain <- function(req) {
new_data <- req$body
new_data <- vetiver_type_convert(new_data, v$prototype)
shap <- predict_parts(explainer, new_data, type = "shap", B = 25)
shap |> group_by(variable) |> summarise(contribution = mean(contribution))
}
#* @plumber
function(pr) {
pr |>
vetiver_api(v) |>
pr_post(path = "/explain", handler = handler_explain)
}
from vetiver import VetiverModel
import vetiver
import pins
from dotenv import load_dotenv
load_dotenv()
b = pins.board_connect(allow_pickle_read=True)
v = VetiverModel.from_pin(b, 'isabel.zimmerman/seattle-housing-python')
connect_url = "https://pub.demo.posit.team/public"
pin_path = {"shap_python": "seattle-shap-python/"}
shap_board = pins.board_url(connect_url, pin_path, allow_pickle_read = True)
explainer = shap_board.pin_read("shap_python")
def shap_explainer(data):
import pandas as pd
values_as_json = pd.DataFrame(explainer.shap_values(data)).to_json(orient='records')
return values_as_json
vetiver_api = vetiver.VetiverAPI(v)
vetiver_api.vetiver_post(shap_explainer, "shap")
api = vetiver_api.app
Activity
Add a new endpoint to the API app file you already made.
Run the app locally and check out your new endpoint.
How might you want to use an additional endpoint?
07:00