'분류 전체보기'에 해당되는 글 11건
- 2022.09.19
- 2021.05.18
-
2019.07.10
8 강화학습 1
- 2019.05.27
- 2019.05.27
- 2019.05.24
- 2019.05.24
- 2019.05.24
- 2019.05.24
- 2017.02.26
https://www.samsungcard.com/LkpmOutLinkfdkDta.do?linkInf=u8Rkb7XAanPY3RumBfTb6uDIwZlaAg%252Bg%252FLv7fRIwatjl1kqA3DVA3SFjt%252FZgOguP
https://www.samsungcard.com/LkpmOutLinkfdkDta.do?linkInf=u8Rkb7XAanPY3RumBfTb6uDIwZlaAg%2Bg%2FLv7fRIwatjl1kqA3DVA3SFjt%2FZgOguP
https://www.samsungcard.com/LkpmOutLinkfdkDta.do?linkInf=u8Rkb7XAanPY3RumBfTb6uDIwZlaAg%2Bg%2FLv7fRIwatjl1kqA3DVA3SFjt%2FZgOguP
https://www.samsungcard.com/personal/services/link/UHPPBE0408M0.jsp?linkInf=u8Rkb7XAanPY3RumBfTb6uDIwZlaAg%2Bg%2FLv7fRIwatjl1kqA3DVA3SFjt%2FZgOguP
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 강화 학습\n",
"\n",
"상태(state)는 주식값에 대한 이전값, 현재 예산, 그리고 주식의 수를 나타내는 것이다.\n",
"\n",
"행동(action)은 사거나, 팔거나, 아니면 그냥 가지고 있는 것을 말한다.\n",
"\n",
"주식 시장 데이터는 Yahoo Finance library에서 가져오며, pip install yahoofinancials 로 설치할 수 있다.\n",
"(참조 : https://github.com/JECSand/yahoofinancials )"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# 주식 library\n",
"from yahoofinancials import YahooFinancials\n",
"\n",
"# 필요 Library\n",
"%matplotlib inline\n",
"from matplotlib import pyplot as plt \n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#라이브러리에서 주가 호출\n",
"def get_prices(share_symbol, start_date, end_date, interval='daily'): \n",
" share = YahooFinancials(share_symbol)\n",
" stock_hist = share.get_historical_price_data(start_date, end_date, interval)\n",
" stock_prices = list()\n",
" price_tab = stock_hist[share_symbol]['prices']\n",
" \n",
" for i in range(len(price_tab)):\n",
" stock_prices.append(price_tab[i]['open'])\n",
" stock_prices = np.array(stock_prices)\n",
" return stock_prices.astype(float)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# 주식값 그래프 출력\n",
"def plot_prices(prices):\n",
" plt.title('Opening stock prices')\n",
" plt.xlabel('day')\n",
" plt.ylabel('price ($)')\n",
" plt.plot(prices)\n",
" plt.savefig('prices.png')\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"class DecisionPolicy:\n",
" def select_action(self, current_state):\n",
" pass\n",
" \n",
" def update_q(self, state, action, reward, next_state):\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# 무작위 행동 선정\n",
"class RandomDecisionPolicy(DecisionPolicy):\n",
" def __init__(self, actions):\n",
" self.actions = actions\n",
" \n",
" def select_action(self, current_state, step):\n",
" action = self.actions[random.randint(0, len(self.actions) - 1)]\n",
" return action"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"class QLearningDecisionPolicy(DecisionPolicy):\n",
" def __init__(self, actions, input_dim):\n",
" # Neural Network 설계시 필요한 Hyperparameter\n",
" self.epsilon = 0.9\n",
" self.gamma = 0.001\n",
" self.actions = actions\n",
" output_dim = len(actions)\n",
" h1_dim = 200\n",
" \n",
" # Neural Network 설계\n",
" self.x = tf.placeholder(tf.float32, [None, input_dim])\n",
" self.y = tf.placeholder(tf.float32, [output_dim])\n",
" W1 = tf.Variable(tf.random_normal([input_dim, h1_dim]))\n",
" b1 = tf.Variable(tf.constant(0.1, shape=[h1_dim]))\n",
" h1 = tf.nn.relu(tf.matmul(self.x, W1) + b1)\n",
" W2 = tf.Variable(tf.random_normal([h1_dim, output_dim]))\n",
" b2 = tf.Variable(tf.constant(0.1, shape=[output_dim]))\n",
" self.q = tf.nn.relu(tf.matmul(h1, W2) + b2)\n",
" loss = tf.square(self.y - self.q)\n",
" self.train_op = tf.train.AdagradOptimizer(0.01).minimize(loss)\n",
" \n",
" self.sess = tf.Session()\n",
" self.sess.run(tf.global_variables_initializer())\n",
" \n",
" def select_action(self, current_state, step):\n",
" threshold = min(self.epsilon, step / 1000.)\n",
" if random.random() < threshold:\n",
" # epsilon 확률 내에서 최고의 행동을 수행함(Exploit)\n",
" action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state})\n",
" action_idx = np.argmax(action_q_vals) \n",
" action = self.actions[action_idx]\n",
" else:\n",
" # 1 – epsilon 확률 내에서 새로운 행동을 무작위로 탐색함(Explore)\n",
" action = self.actions[random.randint(0, len(self.actions) - 1)]\n",
" return action\n",
" \n",
" def update_q(self, state, action, reward, next_state):\n",
" action_q_vals = self.sess.run(self.q, feed_dict={self.x: state})\n",
" next_action_q_vals = self.sess.run(self.q, feed_dict={self.x: next_state})\n",
" next_action_idx = np.argmax(next_action_q_vals)\n",
" action_q_vals[0, next_action_idx] = reward + self.gamma * next_action_q_vals[0, next_action_idx]\n",
" action_q_vals = np.squeeze(np.asarray(action_q_vals))\n",
" self.sess.run(self.train_op, feed_dict={self.x: state, self.y: action_q_vals})"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def run_simulation(policy, initial_budget, initial_num_stocks, prices, hist):\n",
" budget = initial_budget\n",
" num_stocks = initial_num_stocks\n",
" share_value = 0\n",
" transitions = list()\n",
" for i in range(len(prices) - hist - 1):\n",
" if i % 1000 == 0:\n",
" print('progress {:.2f}%'.format(float(100*i) / (len(prices) - hist - 1)))\n",
" current_state = np.asmatrix(np.hstack((prices[i:i+hist], budget, num_stocks)))\n",
" current_portfolio = budget + num_stocks * share_value\n",
" action = policy.select_action(current_state, i)\n",
" share_value = float(prices[i + hist])\n",
" if action == 'Buy' and budget >= share_value:\n",
" budget -= share_value\n",
" num_stocks += 1\n",
" elif action == 'Sell' and num_stocks > 0:\n",
" budget += share_value\n",
" num_stocks -= 1\n",
" else:\n",
" action = 'Hold'\n",
" new_portfolio = budget + num_stocks * share_value\n",
" reward = new_portfolio - current_portfolio\n",
" next_state = np.asmatrix(np.hstack((prices[i+1:i+hist+1], budget,num_stocks)))\n",
" transitions.append((current_state, action, reward, next_state))\n",
" policy.update_q(current_state, action, reward, next_state)\n",
" portfolio = budget + num_stocks * share_value\n",
" return portfolio"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def run_simulations(policy, budget, num_stocks, prices, hist):\n",
" num_tries = 10\n",
" final_portfolios = list()\n",
" for _ in range(num_tries):\n",
" final_portfolio = run_simulation(policy, budget, num_stocks, prices,hist)\n",
" final_portfolios.append(final_portfolio)\n",
" print('Final portfolio: ${}'.format(final_portfolio))\n",
" plt.title('Final Portfolio Value')\n",
" plt.xlabel('Simulation #')\n",
" plt.ylabel('Net worth')\n",
" plt.hlines(budget, 0, num_tries, linestyles='dashed')\n",
" plt.plot(final_portfolios)\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $227077.02664661407\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $227642.20277404785\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $227440.80604076385\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $228530.6992225647\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $227601.0991754532\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $228938.4273405075\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $228326.875831604\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $229112.49336242676\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $229102.22488307953\n",
"progress 0.00%\n",
"progress 16.55%\n",
"progress 33.10%\n",
"progress 49.65%\n",
"progress 66.20%\n",
"progress 82.75%\n",
"progress 99.30%\n",
"Final portfolio: $229053.33343029022\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"if __name__ == '__main__':\n",
" prices = get_prices('MSFT', '1992-07-22', '2016-07-22')\n",
" plot_prices(prices)\n",
" actions = ['Buy', 'Sell', 'Hold']\n",
" hist = 3\n",
" #policy = RandomDecisionPolicy(actions)\n",
" policy = QLearningDecisionPolicy(actions, hist + 2)\n",
" budget = 100000.0\n",
" num_stocks = 0\n",
" run_simulations(policy, budget, num_stocks, prices, hist) "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
import tensorflow as tf
learning_rate = 0.01
training_epochs = 1000
num_labels = 3
batch_size = 100
X = tf.placeholder(tf.float32, shape=[None, num_features])
Y = tf.placeholder(tf.float32, shape=[None, num_labels])
w=tf.Variable(tf.zeros[num_features, num_labels])
b=tf.Variable(tf.zeros[num_labels])
y_model = tf.nn.softmax(tf.matmul(X, W) + b)
cost = -tf.reduce_sum(Y * tf.log(y_model))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
correnct_prediction = tf.equal(tf.argmax(y_model, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
with tf.Session() as sess:
tf.grobal_variables_initializer().run()
fpr step in range(training_epochs * train_size // batch_size):
offset = (step * batch_size) % train_size
batch_xs = xs[offset: ( offset + batch_size), :]
batch_labels = labels[offset: (offset + batch_size)]
err, _ = sess.run([cost, train_op], feed_dict={X: batch_xs, Y: batch_labels})
if step % 100 == 0:
print(step, err)
W_val = sess.run(W)
print('w', W_val)
b_val = sess.run(b)
print('b', b_val)
print("accuracy", accuracy.eval(feed_dict={X: test_xs, Y: test_labels}))
import numpy as np
import matplotlib.pyplot as plt
x1_label0 = np.random.normal(1, 1, (100,1))
x2_label0 = np.random.normal(1, 1, (100,1))
x1_label1 = np.random.normal(5, 1, (100,1))
x2_label1 = np.random.normal(4, 1, (100,1))
x1_label2 = np.random.normal(8, 1, (100,1))
x2_label2 = np.random.normal(0, 1, (100,1))
plt.scatter(x1_label1, x2_label0, c='r', marker='o', s=60)
plt.scatter(x1_label1, x2_label1, c='g', marker='x', s=60)
plt.scatter(x1_label2, x2_label2, c='b', marker='_', s=60)
plt.show()
import numpy as np
import tensorflow as tf
import matplotib.pyplot as plt
learning_rate = 0.001
training_epochs = 1000
def sigmoid(x):
return 1. / (1. + np.exp(-x))
x1_label1 = np.random.normal(3, 1, 1000)
x2_label1 = np.random.normal(2, 1, 1000)
x1_label2 = np.random.normal(7, 1, 1000)
x2_label2 = np.random.normal(6, 1, 1000)
x1s = np.append(x1_label1, x1_label2)
x2s = np.append(x2_label1, x2_label2)
ys = np.asarray([0.] * len(x1_label1) + [1.] * len(x1_label2))
X1 = tf.placeholder(tf.float32, shape=(None,), name='x1')
X2 = tf.placeholder(tf.float32, shape=(None,), name='x2')
Y = tf.placeholder(tf.float32, shape=(None,), name='y')
w = tf.Variable([0., 0., 0.], name="w", trainable=True)
y_model = tf.sigmoid(-(w[2] * X2 + w[1] * X1 + w[0]))
cost = tf.reduce_mean(-tf.log(y_model * Y + (1 - y_model) * (1 - Y)))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
prev_err = 0
for epoch in range(training_epochs):
err, _ = sess.run([cost, train_op], {X1: x1s, X2: x2s, Y: ys})
if epoch % 100 == 0:
print(epoch, err)
if abs(prev_err - err) < 0.0001:
break
prev_err = err
w_val = sess.run(w, {X1: x1s, X2: x2s, Y: ys})
x1_boundary, x2_boundary = [], []
for x1_test in np.linspace(0, 10, 100):
for x2_test in np.linspace(0, 10, 100):
z = sigmoid(-x2_test*w_val[2] - x1_test*w_val[1] - w_val[0])
if abs(z - 0.5) < 0.01:
x1_boundary.append(x1_test)
x2_boundary.append(x2_test)
plt.scatter(x1_boundary, x2_boundary, c='b', marker='o', s=20)
plt.scatter(x1_label1, x2_label1, c='r', marker='x', s=10)
plt.scatter(x1_label2, x2_label2, c='g', marker='1', s=10)
plt.show()
import numpy as np
import tensorflow as tf
import matplotib.pyplot as plt
learning_rate = 0.001
training_epochs = 1000
def sigmoid(x):
return 1. / (1. + np.exp(-x))
x1 = np.random.normal(-4, 2, 1000)
x2 = np.random.normal(4, 2, 1000)
xs = np.append(x1, x2)
ys = np.asarray([0.] * len(x1) + [1.] * len(x2))
plt.scatter(xs, ys)
X = tf.placeholder(tf.float32, shape=(None,), name='x')
Y = tf.placeholder(tf.float32, shape=(None,), name='y')
w=tf.Variable([0., 0.], name="parameter", trainable=True)
y_model = tf.sigmoid(w[1] * X + w[0])
cost = tf.reduce_mean(-Y * tf.log(y_model) - (1 - Y) * tf.log(1 - y_model))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
prev_err = 0
for epoch in range(training_epochs):
err, _ = sess.run([cost, train_op], {X: xs, Y: ys})
print(epoch, err)
if abs(prev_err - err) < 0.0001:
break
prev_err = err
w_val = sess.run(w, {X: xs, Y: ys})
all_xs = np.linspace(-10, 10, 100)
plt.plot(all_xs, sigmoid((all_xs * w_val[1] + w_val[0])))
plt.show()
{
"cmd": ["C:/ProgramData/Anaconda3/python.exe", "-u", "$file"],
"file_regex": "^[ ]*File \"(...*?)\", line ([0-9]*)",
"selector": "source.python"
}
import tensorflow as tf
import numpy as np
import matplotib.pyplot as plt
x_label0 = np.random.normal(5, 1, 10)
x_label1 = np.random.normal(2, 1, 10)
xs = np.append(x_label0, x_label1)
labels = [0.] * len(x_label0) + [1.] * len(x_label1)
plt.scatter(xs, labels)
learning_rate = 0.001
training_epochs = 1000
X = tf.placeholder("float")
Y = tf.placeholder("float")
def model(X, w):
return tf.add(tf.multiply(w[1], tf.pow(X, 1)), tf.multiply(w[0], tf.pow(X, 0)))
w = tf.Variable([0., 0.], name="parameters")
y_model = model(X, w)
cost = tf.reduce_sum(tf.square(Y-y_model))
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(training_epochs):
sess.run(train_op, feed_dict={X: xs, Y: labels})
current_cost = sess.run(cost, feed_dict={X: xs, Y: labels})
if epoch % 100 ==0:
print(epoch, current_cost)
w_val = sess.run(w)
print('leaned parameters', w_val)
correct_prediction = tf.equal(Y, tf.to_float(tf.greater(y_model, 0.5)))
accuracy = tf.reduce_mean(tf.to_float(correct_prediction))
print('accuracy', sess.run(accuracy, feed_dict={X: xs, Y: labels}))
sess.close()
all_xs = np.linspace(0, 10, 100)
plt.plot(all_xs, all_xs*w_val[1] + w_val[0])
plt.show()