-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfire.py
More file actions
executable file
·129 lines (105 loc) · 6.17 KB
/
fire.py
File metadata and controls
executable file
·129 lines (105 loc) · 6.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Based on Daniel Johnson's code in https://github.com/hexahedria/biaxial-rnn-music-composition
import numpy as np
import pandas as pd
import msgpack
import glob
import tensorflow as tf
from tensorflow.python.ops import control_flow_ops
from tqdm import tqdm
import midi_manipulation
def get_songs(path):
files = glob.glob('{}/*.mid*'.format(path))
songs = []
for f in tqdm(files):
try:
song = np.array(midi_manipulation.midiToNoteStateMatrix(f))
if np.array(song).shape[0] > 50:
songs.append(song)
except Exception as e:
raise e
return songs
songs = get_songs('reggaeton_samples')
print "{} songs processed".format(len(songs))
### HyperParameters
# First, let's take a look at the hyperparameters of our model:
lowest_note = midi_manipulation.lowerBound #the index of the lowest note on the piano roll
highest_note = midi_manipulation.upperBound #the index of the highest note on the piano roll
note_range = highest_note-lowest_note #the note range
num_timesteps = 15 #This is the number of timesteps that we will create at a time
n_visible = 2*note_range*num_timesteps #This is the size of the visible layer.
n_hidden = 50 #This is the size of the hidden layer
num_epochs = 200 #The number of training epochs that we are going to run. For each epoch we go through the entire data set.
batch_size = 100 #The number of training examples that we are going to send through the RBM at a time.
lr = tf.constant(0.005, tf.float32) #The learning rate of our model
### Variables:
# Next, let's look at the variables we're going to use:
x = tf.placeholder(tf.float32, [None, n_visible], name="x") #The placeholder variable that holds our data
W = tf.Variable(tf.random_normal([n_visible, n_hidden], 0.01), name="W") #The weight matrix that stores the edge weights
bh = tf.Variable(tf.zeros([1, n_hidden], tf.float32, name="bh")) #The bias vector for the hidden layer
bv = tf.Variable(tf.zeros([1, n_visible], tf.float32, name="bv")) #The bias vector for the visible layer
#### Helper functions.
#This function lets us easily sample from a vector of probabilities
def sample(probs):
#Takes in a vector of probabilities, and returns a random vector of 0s and 1s sampled from the input vector
return tf.floor(probs + tf.random_uniform(tf.shape(probs), 0, 1))
#This function runs the gibbs chain. We will call this function in two places:
# - When we define the training update step
# - When we sample our music segments from the trained RBM
def gibbs_sample(k):
#Runs a k-step gibbs chain to sample from the probability distribution of the RBM defined by W, bh, bv
def gibbs_step(count, k, xk):
#Runs a single gibbs step. The visible values are initialized to xk
hk = sample(tf.sigmoid(tf.matmul(xk, W) + bh)) #Propagate the visible values to sample the hidden values
xk = sample(tf.sigmoid(tf.matmul(hk, tf.transpose(W)) + bv)) #Propagate the hidden values to sample the visible values
return count+1, k, xk
#Run gibbs steps for k iterations
ct = tf.constant(0) #counter
[_, _, x_sample] = control_flow_ops.while_loop(lambda count, num_iter, *args: count < num_iter,
gibbs_step, [ct, tf.constant(k), x])
#This is not strictly necessary in this implementation, but if you want to adapt this code to use one of TensorFlow's
#optimizers, you need this in order to stop tensorflow from propagating gradients back through the gibbs step
x_sample = tf.stop_gradient(x_sample)
return x_sample
### Training Update Code
# Now we implement the contrastive divergence algorithm. First, we get the samples of x and h from the probability distribution
#The sample of x
x_sample = gibbs_sample(1)
#The sample of the hidden nodes, starting from the visible state of x
h = sample(tf.sigmoid(tf.matmul(x, W) + bh))
#The sample of the hidden nodes, starting from the visible state of x_sample
h_sample = sample(tf.sigmoid(tf.matmul(x_sample, W) + bh))
#Next, we update the values of W, bh, and bv, based on the difference between the samples that we drew and the original values
size_bt = tf.cast(tf.shape(x)[0], tf.float32)
W_adder = tf.multiply(lr/size_bt, tf.subtract(tf.matmul(tf.transpose(x), h), tf.matmul(tf.transpose(x_sample), h_sample)))
bv_adder = tf.multiply(lr/size_bt, tf.reduce_sum(tf.subtract(x, x_sample), 0, True))
bh_adder = tf.multiply(lr/size_bt, tf.reduce_sum(tf.subtract(h, h_sample), 0, True))
#When we do sess.run(updt), TensorFlow will run all 3 update steps
updt = [W.assign_add(W_adder), bv.assign_add(bv_adder), bh.assign_add(bh_adder)]
### Run the graph!
# Now it's time to start a session and run the graph!
with tf.Session() as sess:
#First, we train the model
#initialize the variables of the model
init = tf.global_variables_initializer()
sess.run(init)
#Run through all of the training data num_epochs times
for epoch in tqdm(range(num_epochs)):
for song in songs:
#The songs are stored in a time x notes format. The size of each song is timesteps_in_song x 2*note_range
#Here we reshape the songs so that each training example is a vector with num_timesteps x 2*note_range elements
song = np.array(song)
song = song[:int(np.floor(song.shape[0]/num_timesteps)*num_timesteps)]
song = np.reshape(song, [song.shape[0]/num_timesteps, song.shape[1]*num_timesteps])
#Train the RBM on batch_size examples at a time
for i in range(1, len(song), batch_size):
tr_x = song[i:i+batch_size]
sess.run(updt, feed_dict={x: tr_x})
#Now the model is fully trained, so let's make some music!
#Run a gibbs chain where the visible nodes are initialized to 0
sample = gibbs_sample(1).eval(session=sess, feed_dict={x: np.zeros((10, n_visible))})
for i in range(sample.shape[0]):
if not any(sample[i,:]):
continue
#Here we reshape the vector to be time x notes, and then save the vector as a midi file
S = np.reshape(sample[i,:], (num_timesteps, 2*note_range))
midi_manipulation.noteStateMatrixToMidi(S, "generated_chord_{}".format(i))