Initialize tracking emulating how SIENTIA™ website environment works¶
In [1]:
Copied!
import sientia_tracker.regression as regression
# Initialize tracking
tracking_uri = "file:./tmp/mlruns"
username = "example_user"
password = "example_password"
project_name = "example_project_regression"
tracker = regression.RegressionTracker(tracking_uri,username,password)
tracker.set_project(project_name)
import sientia_tracker.regression as regression
# Initialize tracking
tracking_uri = "file:./tmp/mlruns"
username = "example_user"
password = "example_password"
project_name = "example_project_regression"
tracker = regression.RegressionTracker(tracking_uri,username,password)
tracker.set_project(project_name)
Experiment example_project_regression already exists
Set parameters needed to save the model. They are: the experiment name, inputs, training size, a flag to indicate if the data was shuffled¶
In [2]:
Copied!
dataset_name= "California Housing"
inputs= "MedInc, HouseAge, AveRooms, AveOccup, Latitude, Longitude"
train_size = 0.8
shuffle = False
dataset_name= "California Housing"
inputs= "MedInc, HouseAge, AveRooms, AveOccup, Latitude, Longitude"
train_size = 0.8
shuffle = False
Load the dataset and create a model using default values of the run parameters¶
In [3]:
Copied!
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
# Load dataset
data = fetch_california_housing()
X = data.data
y = data.target
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = train_size, random_state=42, shuffle=shuffle)
# Initialize and train model
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Calculate metrics
r2 = r2_score(y_test, y_pred)
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
# Load dataset
data = fetch_california_housing()
X = data.data
y = data.target
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = train_size, random_state=42, shuffle=shuffle)
# Initialize and train model
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Calculate metrics
r2 = r2_score(y_test, y_pred)
Initialize run¶
In [4]:
Copied!
run = tracker.save_experiment(model, dataset_name=dataset_name, inputs=inputs, train_size=train_size, r2=r2,shuffle=shuffle)
run_id = run.info.run_id
run = tracker.save_experiment(model, dataset_name=dataset_name, inputs=inputs, train_size=train_size, r2=r2,shuffle=shuffle)
run_id = run.info.run_id
Saving experiment example_project_regression
Log models and metrics¶
In [5]:
Copied!
# Log parameters and metrics
tracker.log_params({"max_iter": 1000})
# Log model
artifact_path = "Regression_for_CaliforniaHousing"
tracker.log_model(model, artifact_path)
# Log parameters and metrics
tracker.log_params({"max_iter": 1000})
# Log model
artifact_path = "Regression_for_CaliforniaHousing"
tracker.log_model(model, artifact_path)
Retrieve information of run¶
In [6]:
Copied!
# Retrieve the run using the run ID
retrieved_run = tracker.client.get_run(run_id)
# Access and print metrics and params
metrics = retrieved_run.data.metrics
params = retrieved_run.data.params
print("Metrics:", metrics)
for key, value in params.items():
print( key,':' ,value)
# Retrieve the run using the run ID
retrieved_run = tracker.client.get_run(run_id)
# Access and print metrics and params
metrics = retrieved_run.data.metrics
params = retrieved_run.data.params
print("Metrics:", metrics)
for key, value in params.items():
print( key,':' ,value)
Metrics: {'r2': 0.41770236548696527} Dataset : California Housing Date Column : date Inputs : MedInc, HouseAge, AveRooms, AveOccup, Latitude, Longitude max_iter : 1000 Model : Linear Regression Shuffle : False Target : target Train Size : 0.8