# Copyright 2021 QuantumBlack Visual Analytics Limited
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND
# NONINFRINGEMENT. IN NO EVENT WILL THE LICENSOR OR OTHER CONTRIBUTORS
# BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER IN AN
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# The QuantumBlack Visual Analytics Limited ("QuantumBlack") name and logo
# (either separately or in combination, "QuantumBlack Trademarks") are
# trademarks of QuantumBlack. The License does not grant you any right or
# license to the QuantumBlack Trademarks. You may not use the QuantumBlack
# Trademarks or any confusingly similar mark as a trademark for your product,
# or use the QuantumBlack Trademarks in any other manner that might cause
# confusion in the marketplace, including but not limited to in advertising,
# on websites, or on software.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example code for the nodes in the example pipeline. This code is meant
just for illustrating basic Kedro features.
Delete this when you start working on your own Kedro project.
# pylint: disable=invalid-name
import matplotlib.pyplot as plt
import neptune.new as neptune
from scikitplot.metrics import plot_confusion_matrix
from typing import Any, Dict
train_x: pd.DataFrame, train_y: pd.DataFrame, parameters: Dict[str, Any]
"""Node for training a simple multi-class logistic regression model. The
number of training iterations as well as the learning rate are taken from
conf/project/parameters.yml. All of the data as well as the parameters
will be provided to this function at the time of execution.
num_iter = parameters["example_num_train_iter"]
lr = parameters["example_learning_rate"]
# Add bias to the features
bias = np.ones((X.shape[0], 1))
X = np.concatenate((bias, X), axis=1)
# Train one model for each class in Y
for k in range(Y.shape[1]):
theta = np.zeros(X.shape[1])
for _ in range(num_iter):
gradient = np.dot(X.T, (h - y)) / y.size
# Save the weights for each model
# Return a joint multi-class model with weights for all classes
return np.vstack(weights).transpose()
def predict(model: np.ndarray, test_x: pd.DataFrame) -> np.ndarray:
"""Node for making predictions given a pre-trained model and a test set."""
# Add bias to the features
bias = np.ones((X.shape[0], 1))
X = np.concatenate((bias, X), axis=1)
# Predict "probabilities" for each class
result = _sigmoid(np.dot(X, model))
# Return the index of the class with max probability for all samples
return np.argmax(result, axis=1)
def report_accuracy(predictions: np.ndarray, test_y: pd.DataFrame,
neptune_run: neptune.run.Handler) -> None:
"""Node for reporting the accuracy of the predictions performed by the
previous node. Notice that this function has no outputs, except logging.
target = np.argmax(test_y.to_numpy(), axis=1)
# Calculate accuracy of predictions
accuracy = np.sum(predictions == target) / target.shape[0]
# Log the accuracy of the model
log = logging.getLogger(__name__)
log.info("Model accuracy on test set: %0.2f%%", accuracy * 100)
# Log accuracy to Neptune
neptune_run['nodes/report/accuracy'] = accuracy * 100
# Log confusion matrix to Neptune
plot_confusion_matrix(target, predictions, ax=ax)
neptune_run['nodes/report/confusion_matrix'].upload(fig)
"""A helper sigmoid function used by the training and the scoring nodes."""
return 1 / (1 + np.exp(-z))