Data file: yelp_labelled.txt

SentimentData.cs

using Microsoft.ML.Data;
// </SnippetAddUsings>

namespace SentimentAnalysis
{
    // <SnippetDeclareTypes>
    public class SentimentData
    {
        [LoadColumn(0)]
        public string SentimentText;

        [LoadColumn(1), ColumnName("Label")]
        public bool Sentiment;
    }

    public class SentimentPrediction : SentimentData
    {

        [ColumnName("PredictedLabel")]
        public bool Prediction { get; set; }

        public float Probability { get; set; }

        public float Score { get; set; }
    }
    // </SnippetDeclareTypes>
}

Program.cs

// &lt;SnippetAddUsings&gt;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms.Text;
// &lt;/SnippetAddUsings&gt;

namespace SentimentAnalysis
{
    class Program
    {
        // &lt;SnippetDeclareGlobalVariables&gt;
        static readonly string _dataPath = Path.Combine(Environment.CurrentDirectory, &quot;Data&quot;, &quot;yelp_labelled.txt&quot;);
        // &lt;/SnippetDeclareGlobalVariables&gt;

        static void Main(string[] args)
        {
            // Create ML.NET context/local environment - allows you to add steps in order to keep everything together
            // as you discover the ML.NET trainers and transforms
            // &lt;SnippetCreateMLContext&gt;
            MLContext mlContext = new MLContext();
            // &lt;/SnippetCreateMLContext&gt;

            // &lt;SnippetCallLoadData&gt;
            TrainTestData splitDataView = LoadData(mlContext);
            // &lt;/SnippetCallLoadData&gt;

            // &lt;SnippetCallBuildAndTrainModel&gt;
            ITransformer model = BuildAndTrainModel(mlContext, splitDataView.TrainSet);
            // &lt;/SnippetCallBuildAndTrainModel&gt;

            // &lt;SnippetCallEvaluate&gt;
            Evaluate(mlContext, model, splitDataView.TestSet);
            // &lt;/SnippetCallEvaluate&gt;

            // &lt;SnippetCallUseModelWithSingleItem&gt;
            UseModelWithSingleItem(mlContext, model);
            // &lt;/SnippetCallUseModelWithSingleItem&gt;

            // &lt;SnippetCallUseModelWithBatchItems&gt;
            UseModelWithBatchItems(mlContext, model);
            // &lt;/SnippetCallUseModelWithBatchItems&gt;

            Console.WriteLine();
            Console.WriteLine(&quot;=============== End of process ===============&quot;);
        }

        public static TrainTestData LoadData(MLContext mlContext)
        {
            // Note that this case, loading your training data from a file,
            // is the easiest way to get started, but ML.NET also allows you
            // to load data from databases or in-memory collections.
            // &lt;SnippetLoadData&gt;
            IDataView dataView = mlContext.Data.LoadFromTextFile&lt;SentimentData&gt;(_dataPath, hasHeader: false);
            // &lt;/SnippetLoadData&gt;

            // You need both a training dataset to train the model and a test dataset to evaluate the model.
            // Split the loaded dataset into train and test datasets
            // Specify test dataset percentage with the <code>testFraction</code>parameter
            // &lt;SnippetSplitData&gt;
            TrainTestData splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            // &lt;/SnippetSplitData&gt;

            // &lt;SnippetReturnSplitData&gt;
            return splitDataView;
            // &lt;/SnippetReturnSplitData&gt;
        }

        public static ITransformer BuildAndTrainModel(MLContext mlContext, IDataView splitTrainSet)
        {
            // Create a flexible pipeline (composed by a chain of estimators) for creating/training the model.
            // This is used to format and clean the data.
            // Convert the text column to numeric vectors (Features column)
            // &lt;SnippetFeaturizeText&gt;
            var estimator = mlContext.Transforms.Text.FeaturizeText(outputColumnName: &quot;Features&quot;, inputColumnName: nameof(SentimentData.SentimentText))
            //&lt;/SnippetFeaturizeText&gt;
            // append the machine learning task to the estimator
            // &lt;SnippetAddTrainer&gt;
            .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: &quot;Label&quot;, featureColumnName: &quot;Features&quot;));
            // &lt;/SnippetAddTrainer&gt;

            // Create and train the model based on the dataset that has been loaded, transformed.
            // &lt;SnippetTrainModel&gt;
            Console.WriteLine(&quot;=============== Create and Train the Model ===============&quot;);
            var model = estimator.Fit(splitTrainSet);
            Console.WriteLine(&quot;=============== End of training ===============&quot;);
            Console.WriteLine();
            // &lt;/SnippetTrainModel&gt;

            // Returns the model we trained to use for evaluation.
            // &lt;SnippetReturnModel&gt;
            return model;
            // &lt;/SnippetReturnModel&gt;
        }

        public static void Evaluate(MLContext mlContext, ITransformer model, IDataView splitTestSet)
        {
            // Evaluate the model and show accuracy stats

            //Take the data in, make transformations, output the data.
            // &lt;SnippetTransformData&gt;
            Console.WriteLine(&quot;=============== Evaluating Model accuracy with Test data===============&quot;);
            IDataView predictions = model.Transform(splitTestSet);
            // &lt;/SnippetTransformData&gt;

            // BinaryClassificationContext.Evaluate returns a BinaryClassificationEvaluator.CalibratedResult
            // that contains the computed overall metrics.
            // &lt;SnippetEvaluate&gt;
            CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(predictions, &quot;Label&quot;);
            // &lt;/SnippetEvaluate&gt;

            // The Accuracy metric gets the accuracy of a model, which is the proportion
            // of correct predictions in the test set.

            // The AreaUnderROCCurve metric is equal to the probability that the algorithm ranks
            // a randomly chosen positive instance higher than a randomly chosen negative one
            // (assuming 'positive' ranks higher than 'negative').

            // The F1Score metric gets the model's F1 score.
            // The F1 score is the harmonic mean of precision and recall:
            //  2 * precision * recall / (precision + recall).

            // &lt;SnippetDisplayMetrics&gt;
            Console.WriteLine();
            Console.WriteLine(&quot;Model quality metrics evaluation&quot;);
            Console.WriteLine(&quot;--------------------------------&quot;);
            Console.WriteLine($&quot;Accuracy: {metrics.Accuracy:P2}&quot;);
            Console.WriteLine($&quot;Auc: {metrics.AreaUnderRocCurve:P2}&quot;);
            Console.WriteLine($&quot;F1Score: {metrics.F1Score:P2}&quot;);
            Console.WriteLine(&quot;=============== End of model evaluation ===============&quot;);
            //&lt;/SnippetDisplayMetrics&gt;
        }

        private static void UseModelWithSingleItem(MLContext mlContext, ITransformer model)
        {
            // &lt;SnippetCreatePredictionEngine1&gt;
            PredictionEngine&lt;SentimentData, SentimentPrediction&gt; predictionFunction = mlContext.Model.CreatePredictionEngine&lt;SentimentData, SentimentPrediction&gt;(model);
            // &lt;/SnippetCreatePredictionEngine1&gt;

            // &lt;SnippetCreateTestIssue1&gt;
            SentimentData sampleStatement = new SentimentData
            {
                SentimentText = &quot;This was a very bad steak&quot;
            };
            // &lt;/SnippetCreateTestIssue1&gt;

            // &lt;SnippetPredict&gt;
            var resultPrediction = predictionFunction.Predict(sampleStatement);
            // &lt;/SnippetPredict&gt;
            // &lt;SnippetOutputPrediction&gt;
            Console.WriteLine();
            Console.WriteLine(&quot;=============== Prediction Test of model with a single sample and test dataset ===============&quot;);

            Console.WriteLine();
            Console.WriteLine($&quot;Sentiment: {resultPrediction.SentimentText} | Prediction: {(Convert.ToBoolean(resultPrediction.Prediction) ? &quot;Positive&quot; : &quot;Negative&quot;)} | Probability: {resultPrediction.Probability} &quot;);

            Console.WriteLine(&quot;=============== End of Predictions ===============&quot;);
            Console.WriteLine();
            // &lt;/SnippetOutputPrediction&gt;
        }

        public static void UseModelWithBatchItems(MLContext mlContext, ITransformer model)
        {
            // Adds some comments to test the trained model's data points.
            // &lt;SnippetCreateTestIssues&gt;
            IEnumerable&lt;SentimentData&gt; sentiments = new[]
            {
                new SentimentData
                {
                    SentimentText = &quot;This was a horrible meal&quot;
                },
                new SentimentData
                {
                    SentimentText = &quot;I love this spaghetti.&quot;
                }
            };
            // &lt;/SnippetCreateTestIssues&gt;

            // Load batch comments just created
            // &lt;SnippetPrediction&gt;
            IDataView batchComments = mlContext.Data.LoadFromEnumerable(sentiments);

            IDataView predictions = model.Transform(batchComments);

            // Use model to predict whether comment data is Positive (1) or Negative (0).
            IEnumerable&lt;SentimentPrediction&gt; predictedResults = mlContext.Data.CreateEnumerable&lt;SentimentPrediction&gt;(predictions, reuseRowObject: false);
            // &lt;/SnippetPrediction&gt;

            // &lt;SnippetAddInfoMessage&gt;
            Console.WriteLine();

            Console.WriteLine(&quot;=============== Prediction Test of loaded model with multiple samples ===============&quot;);
            // &lt;/SnippetAddInfoMessage&gt;

            Console.WriteLine();

            // &lt;SnippetDisplayResults&gt;
            foreach (SentimentPrediction prediction in predictedResults)
            {
                Console.WriteLine($&quot;Sentiment: {prediction.SentimentText} | Prediction: {(Convert.ToBoolean(prediction.Prediction) ? &quot;Positive&quot; : &quot;Negative&quot;)} | Probability: {prediction.Probability} &quot;);
            }
            Console.WriteLine(&quot;=============== End of predictions ===============&quot;);
            // &lt;/SnippetDisplayResults&gt;
        }
    }
}

Output:

Model quality metrics evaluation
--------------------------------
Accuracy: 83.96%
Auc: 89.88%
F1Score: 84.38%
=============== End of model evaluation ===============

=============== Prediction Test of model with a single sample and test dataset ===============

Sentiment: This was a very bad steak | Prediction: Negative | Probability: 0.026840538
=============== End of Predictions ===============


=============== Prediction Test of loaded model with multiple samples ===============

Sentiment: This was a horrible meal | Prediction: Negative | Probability: 0.041398816
Sentiment: I love this spaghetti. | Prediction: Positive | Probability: 0.99719304

https://github.com/gantovnik/wordpress_examples/tree/main/ex306

Discover more from Tips and Hints for Aerospace Engineers

Subscribe now to keep reading and get access to the full archive.

Continue reading