Training neural network of deeplearning4j for price prediction

Hi,

I need to document my first implementation of learning algorithm for usage of neural networks for training

package org.deeplearning4j.examples.recurrent;

import org.deeplearning4j.datasets.iterator.impl.ListDataSetIterator;
import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.BackpropType;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.Updater;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.GravesLSTM;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
import org.deeplearning4j.util.ModelSerializer;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.lossfunctions.LossFunctions;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.util.*;

/**
 * Created by Administrator on 11/23/2016.
 */


public class ForexForecaster {

    //Random number generator seed, for reproducability
    public static final int seed = 12345;
    public static final Random rng = new Random(seed);
    //Batch size: i.e., each epoch has nSamples/batchSize parameter updates
    public static final int batchSize = 100;

    public static int currentSampleIndex = 0;
    public static int sizeOfOneRow = 52;//open, close, high, low, ma89, ma200, month in binary format, number of week in binary format, day of week in binary format, hour in binary format.
    // Week in binary format has 6 values, because there are some months with 6 weeks fragments
    static int numberOfCanlesForInput = 40;
    static int sizeOfOneRowOut = 4; //open, close, high, low
    static int lengthOfOneSample = numberOfCanlesForInput * sizeOfOneRow;
    static int numberOfCandlesForOutput = 1; //number of candles for prediction
    static int lengthOfOneOut = sizeOfOneRowOut * numberOfCandlesForOutput;
    static int learningSize = 227000;

    public static void main( String[] args ) throws Exception {
        int lstmLayerSize = 130;					//Number of units in each GravesLSTM layer
        DataSetIterator iterator = getTrainingData("d:\\pricesFormatted.csv", numberOfCanlesForInput, numberOfCandlesForOutput);

        System.out.println("All training data was read");


        int iterations = 1;
        Double learningRate = 0.001;
        int numInput = lengthOfOneSample;
        int numOutputs = lengthOfOneOut;
        int nHidden = numInput;

        MultiLayerNetwork net = new MultiLayerNetwork(new NeuralNetConfiguration.Builder()
            .seed(seed)
            .iterations(iterations)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .learningRate(learningRate)
            .weightInit(WeightInit.XAVIER)
            .updater(Updater.NESTEROVS).momentum(0.9)
            .list()
            .layer(0, new DenseLayer.Builder().nIn(numInput).nOut(nHidden)
                .activation("sigmoid")
                .build())
            .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE)
                .activation("sigmoid")
                .nIn(nHidden).nOut(numOutputs).build())
            .pretrain(true).backprop(true).build()
        );

        net.init();
        net.setListeners(new ScoreIterationListener(1));



        //Print the  number of parameters in the network (and for each layer)
        Layer[] layers = net.getLayers();
        int totalNumParams = 0;
        for( int i=0; i<layers.length; i++ ){
            int nParams = layers[i].numParams();
            System.out.println("Number of parameters in layer " + i + ": " + nParams);
            totalNumParams += nParams;
        }
        System.out.println("Total number of network parameters: " + totalNumParams);

        for( int i=0; i<1800; i++ ){
            while(iterator.hasNext())
            {
                DataSet ds = iterator.next();
                net.fit(ds);
            }
        }
        System.out.println("\n\nExample complete");

        File locationToSave = new File("d:\\MyMultiLayerNetwork.zip");      //Where to save the network. Note: the file is in .zip format - can be opened externally
        boolean saveUpdater = true;                                     //Updater: i.e., the state for Momentum, RMSProp, Adagrad etc. Save this if you want to train your network more in the future
        ModelSerializer.writeModel(net, locationToSave, saveUpdater);

    }

    private static DataSetIterator getTrainingData(String filename, Integer numberOfCandlesForInput, Integer numberOfCandlesForOutput) throws InterruptedException, IOException
    {
        List<String> lines = Files.readAllLines(new File(filename).toPath(),Charset.forName("UTF-8"));
        lines = lines.subList(1, lines.size());



        int lengthInputs = learningSize * lengthOfOneSample;
        int lengthOutputs = learningSize * lengthOfOneOut;
        double []inpsArr = new double[lengthInputs];
        double [] outpsArr = new double[lengthOutputs];

        INDArray allInputs = Nd4j.create(inpsArr, new int[] {lengthInputs, 1});
        INDArray allOutputs = Nd4j.create(outpsArr, new int[] { lengthOutputs, 1});

        int indexForAllInputs = 0;
        int indexForAllOutputs = 0;

        int currentLine = 0; // for debugging only

        for (String s : lines)
        {
            if (currentLine == learningSize)
            {
                break;
            }
            double [] arr =  new double[lengthOfOneSample];
            INDArray inputsCurrent = Nd4j.create(arr, new int[] {lengthOfOneSample, 1});
            double [] arr2 = new double [lengthOfOneOut];

            INDArray outputsCurrent = Nd4j.create(arr2, new int[] {lengthOfOneOut, 1});
            if( (currentLine % 10000) == 0)
            {
                System.out.println("current line= " + currentLine);
            }
            currentLine++;


            GetInputsOutputs(lines, currentSampleIndex, numberOfCandlesForInput, numberOfCandlesForOutput, inputsCurrent, outputsCurrent);
            currentSampleIndex++;

            int endOfInputs = indexForAllInputs + inputsCurrent.size(0);

            try
            {
                for(int i = indexForAllInputs; i < endOfInputs; i++)
                {
                    double inputVal = inputsCurrent.getDouble(i - indexForAllInputs);
                    allInputs.putScalar(i, inputVal);
                }
            }
            catch(Exception ex)
            {
                System.out.println("currentSampleIndex " + currentSampleIndex);
                System.out.println(ex);
            }


            indexForAllInputs += lengthOfOneSample;

            int endOfOutputs = indexForAllOutputs + outputsCurrent.size(0);
            for(int i = indexForAllOutputs; i < endOfOutputs; i++)
            {
                double outputVal = outputsCurrent.getDouble(i - indexForAllOutputs);
                allOutputs.putScalar(i, outputVal);
            }
            indexForAllOutputs += lengthOfOneOut;
        }

        allInputs = allInputs.reshape(learningSize, lengthOfOneSample);
        allOutputs = allOutputs.reshape(learningSize, lengthOfOneOut);

        DataSet dataSet = new DataSet(allInputs, allOutputs);
        List<DataSet> listDs = dataSet.asList();
        Collections.shuffle(listDs, rng);

        return new ListDataSetIterator(listDs, batchSize);
    }

    private static double CalculateSumm(List<Double> array)
    {
        double result = 0.0;
        for (Double d : array)
        {
            result +=d;
        }
        return result;
    }



    private static void GetInputsOutputs(List<String> lines, int startFrom, int numberOfSamples, int predictionNumber, INDArray inputs, INDArray outputs)
    {
        List<String> inputsAsString = lines.subList(startFrom, numberOfSamples + predictionNumber + startFrom);
        List<Double> allValues= new ArrayList<Double>();

        for( String s : inputsAsString )
        {
            String[] numbers = s.split(";");
            for(String sNumber: numbers)
            {
                double parsedValue = Double.parseDouble(sNumber);
                allValues.add(parsedValue);
            }
        }

        double max = Collections.max(allValues);
        double min = Collections.min(allValues);
        double distance = Math.abs(max - min);


        List<Double> trainingData = new ArrayList<Double>();
        List<String> trainingStrings = lines.subList(startFrom, numberOfSamples + startFrom);
        for(String s : trainingStrings)
        {
            String[] numbers = s.split(";");
            for(String sNumber: numbers)
            {
                if (!sNumber.equals("1"))
                {
                    double parsedValue = Double.parseDouble(sNumber);
                    double valueForAddition = parsedValue/distance;
                    trainingData.add(valueForAddition);
                }
                else
                {
                    trainingData.add(0.5);
                }
            }
        }

        List<Double> forecastData = new ArrayList<Double>();
        List<String> forecastStrings = lines.subList(startFrom + numberOfSamples, startFrom + numberOfSamples + predictionNumber);

        for(String s : forecastStrings)
        {
            String[] numbers = s.split(";");
            int i = 0;
            for(String sNumber: numbers)
            {
                if (! (sNumber.equals("1") || sNumber.equals("0")))
                {
                    double parsedValue = Double.parseDouble(sNumber);
                    double valueForAddition = parsedValue/distance;
                    forecastData.add(valueForAddition);
                }
                i++;
                if(i == 4)
                {
                    break;
                }
            }
        }

        int i = 0;
        try{
            for(Double t: trainingData)
            {
                inputs.putScalar(i, t);
                i++;
            }
        }
        catch(Exception ex)
        {
            throw ex;
        }


        i = 0;
        for(Double t : forecastData)
        {
            outputs.putScalar(i, t);
            i++;
        }
    }
}

Hope it can help somebody

No Comments

Add a Comment