[신경망] LSTM을 이용한 "나비야" 작곡하기 코드

2020. 5. 8. 10:43노트/Python : 프로그래밍

 

# (text) 텍스트를 읽어주는 라이브러리 

from gtts import gTTS
import numpy as np

texte = "Hi, everybody, Playing with TF is fun"
tts = gTTS(text=text , lang="en")
tts.save("hi.mp3")
textk = "안녕하세요. 여러분 텐서플로우 재미있어요"
tts = gTTS(text=text , lang="ko")
tts.save("hiko.mp3")
ttsEn=gTTS(text=texte, lang="en")
ttsKr=gTTS(text=textk, lang="ko")

f = open("enkr.mp3","wb")
ttsEn.write_to_fp(f)
ttsEn.write_to_fp(f)
ttsKr.write_to_fp(f)
f.close()

 

 

음계 코드

code2idx = {'c4':0, 'd4':1, 'e4':2, 'f4':3, 'g4':4, 'a4':5, 'b4':6,
            'c8':7, 'd8':8, 'e8':9, 'f8':10, 'g8':11, 'a8':12, 'b8':13}

idx2code = {0:'c4', 1:'d4', 2:'e4', 3:'f4', 4:'g4', 5:'a4', 6:'b4',
            7:'c8', 8:'d8', 9:'e8', 10:'f8', 11:'g8', 12:'a8', 13:'b8'}
#c(도) d e f g a b(시)
#4(4분음표) ,8(8분음표)

seq = ['g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'd8', 'e8', 'f8', 'g8', 'g8', 'g4',
       'g8', 'e8', 'e8', 'e8', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4',
       'd8', 'd8', 'd8', 'd8', 'd8', 'e8', 'f4', 'e8', 'e8', 'e8', 'e8', 'e8', 'f8', 'g4',
       'g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4']
# pip install gTTS 설치

소리 데이터

소리.zip
0.02MB

 

데이터 전처리

def seq2dataset(seq, windowSize):
    dataset =[]
    for i in range(len(seq)-windowSize): 
        subset = seq[i:(i+windowSize+1)]
        dataset.append([code2idx[item] for item in subset] )
    return np.array(dataset)
dataset=seq2dataset(seq,windowSize=4)
dataset.shape

>>> (50, 5)
xTrain=dataset[:,0:4] # 나 비 야 나  ,.... 
yTrain=dataset[:,4] # 비  ,.... 
maxIdxValue=13
# 입력값 정규화 
xTrain=xTrain/maxIdxValue # 정규화 
xTrain 

from keras.utils import np_utils
yTrain=np_utils.to_categorical(yTrain)
yTrain.shape # 원핫인코딩 
# 12 : 음계 종류 
>>> (50, 12)

 

모델생성

oneHotVecSize = yTrain.shape[1] #y:12가지 음계
# 모델 생성 
from keras.models import Sequential
from keras.layers import Dense
import keras
# 모델 생성 
model = Sequential()
model.add(Dense(128, input_dim=4, activation="relu"))
model.add(Dense(128, activation = "relu"))
model.add(Dense(oneHotVecSize, activation = "softmax"))

model.summary()
>>> 
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 128)               640       
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_3 (Dense)              (None, 12)                1548      
=================================================================
Total params: 18,700
Trainable params: 18,700
Non-trainable params: 0
_________________________________________________________________

# 환경설정 
model.compile(loss="categorical_crossentropy", 
              optimizer="adam", 
              metrics=["accuracy"])

 

모델학습

class LossHistory(keras.callbacks.Callback):
    def init(self):
        self.losses=[]
    def onEpochEnd(self,batch,logs={}):
        self.losses.append(logs.get("loss"))
        
history=LossHistory()

model.fit(xTrain, yTrain, epochs= 2000, batch_size=10,callbacks=[history])

 

모델평가 

scores = model.evaluate(xTrain,yTrain)
print("%s : %.2f%%" %(model.metrics_names[1], scores[1]*100))
print("%s : %.2f%%" %(model.metrics_names[0], scores[0]*100))

>>> 
50/50 [==============================] - 0s 59us/step
accuracy : 92.00%
loss : 15.08%

 

모델예측 

# 예측
seqOut=["g8","e8","e4","f8"]
predOut=model.predict(xTrain)

predCount = 50
for i in range(predCount):
    idx = np.argmax(predOut[i])
    seqOut.append( idx2code[idx])
print(seqOut)

>>> 
['g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'e8', 'e8', 'f8', 'g8', 'g8', 'g4', 'g8', 'e8', 'e8', 'e8', 'f8', 'g4', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4', 'd8', 'd8', 'd8', 'd8', 'e8', 'e8', 'f4', 'e8', 'e8', 'e8', 'e8', 'e8', 'e8', 'g4', 'g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4']

 

# LSTM으로 정확도 높여 전체 곡 작곡하기 

 

from keras.layers import LSTM

model=Sequential()
model.add(LSTM(128, input_shape=(4,1)))
# 128 출력셀, 4 타임스텝, 1 속성개수 
#stateful= False 
model.add(Dense(oneHotVecSize, activation="softmax"))

np.shape(xTrain) # 샘플 수, 타임스탭 수, 속성 수 
xTrain = np.reshape(xTrain, (50,4,1))

# 환경설정 
model.compile(loss="categorical_crossentropy", 
              optimizer="adam", 
              metrics=["accuracy"])


타임스텝 : 샘플 1개에 포함된 시퀀스 갯수를 의미함. 
속성: 입력되는 음표 1개당 index 1개 입력 

 

model.fit(xTrain, yTrain, epochs= 2000, batch_size=14,callbacks=[history])

 

scores = model.evaluate(xTrain,yTrain)
print("%s : %.2f%%" %(model.metrics_names[1], scores[1]*100))
print("%s : %.2f%%" %(model.metrics_names[0], scores[0]*100))

>>> 
50/50 [==============================] - 0s 1ms/step
accuracy : 88.00%
loss : 29.21%

LSTM(메모리셀 개수, input_dim= 입력속성수, input_length, return_sequences: 시퀀스 출력여부, 
- False = 마지막 시퀀스에서 한번만 출력(many to one )
- True = 각 시퀀스에서 출력 (many to many)

# stateful: 상태유지모드 
학습샘플에 가장 마지막 상태가 다음 샘플 학습할 때, 입력으로 전달할 것인지에 대한 여부
(False = 전달안함 )
상태유지모드 (stateful)에서 현재 샘플의 학습 상태가 다음 샘플의 초기 상태로 전달 

LSTM 셀 내부적으로 기억할 것은 기억하고, 버릴 것은 버리도록 하기 위한 옵션 : stateful = True 

상태유지모드에서는 입력형태를 (배치사이즈, 타임스텝, 속성)으로 설정 

 

model=Sequential()
model.add(LSTM(128, batch_input_shape=(1,4,1), stateful=True))
# 128 출력셀, 4 타임스텝, 1 속성개수 
#stateful= False 
model.add(Dense(oneHotVecSize, activation="softmax"))

# 환경설정 
model.compile(loss="categorical_crossentropy", 
              optimizer="adam", 
              metrics=["accuracy"])

- 한 에폭안에서 여러개의 시퀀스 데이터가 있을 때, 
새로운 시퀀스 데이터를 학습하기 전에 상태 초기화 필요

- 마지막 샘플학습을 마치고 나서, 새로운 에폭이 시작될 때, 
새로운 샘플 학습을 해야하므로 상태 초기화 필요 

 

모델학습

numEpochs=2000

model.reset_states()

for i in range(numEpochs):
    print("에폭:"+str(i))
    model.fit(xTrain,yTrain,epochs=1,batch_size=1,callbacks=[history], shuffle=False)
    model.reset_states()
    #state 정보를 초기화

모델 평가

scores=model.evaluate(xTrain,yTrain,batch_size=1)
print("%s: %.2f%%" %(model.metrics_names[1],scores[1]*100))
print("%s: %.2f%%" %(model.metrics_names[0],scores[0]*100))

>>>
50/50 [==============================] - 0s 860us/step
accuracy: 94.00%
loss: 45.64%

 

모델 예측

# 예측
seqOut=['g8','e8','e4','f8']
predOut=model.predict(xTrain, batch_size=1)
predCount=50
for i in range(predCount):
    idx=np.argmax(predOut[i])
    seqOut.append(idx2code[idx])
print(seqOut)

>>>
['g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'g8', 'g8', 'g4', 'g8', 'e8', 'e8', 'e8', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4', 'd8', 'd8', 'd8', 'd8', 'd8', 'e8', 'f4', 'e8', 'e8', 'e8', 'e8', 'e8', 'f8', 'g4', 'g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4']
model.reset_states()

#곡 전체 예측
seqIn=['g8','e8','e4','f8']
seqOut=seqIn
seqIn=[code2idx[it]/float(maxIdxValue) for it in seqIn]

for i in range(predCount):
    seqIn2=np.array(seqIn) #[0.13,...]
    seqIn2=np.reshape(seqIn2, (1,4,1))#샘플수, 타임스텝수, 속성수
    predOut=model.predict(seqIn2)
    idx=np.argmax(predOut)
    seqOut.append(idx2code[idx])
    seqIn.append(idx/float(maxIdxValue))
    seqIn.pop(0)
print(seqOut)
    
>>> 
['g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'd8', 'e8', 'f8', 'g8', 'g8', 'g4', 'g8', 'e8', 'e8', 'e8', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4', 'd8', 'd8', 'd8', 'd8', 'd8', 'e8', 'f4', 'e8', 'e8', 'e8', 'e8', 'e8', 'f8', 'g4', 'g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4']