import tensorflow as tf@tf.RegisterGradient("CustomClipGrad")
def _clip_grad(unused_op, grad):#grad是从上一层传过来的梯度 就是对应output_clip的梯度 根据链式法则 往前乘print(unused_op.inputs)print(unused_op.inputs[0])print(unused_op.inputs[1])#对应存在两个输入with tf.Session() as sess:sess.run(tf.global_variables_initializer())print(sess.run(unused_op.inputs[0]))#对应variableprint(sess.run(unused_op.inputs[1]))#对应tensor的Mul:0就是13return grad*10,None#这个地方的原因是有两个输入 必须对应两个梯度才行,所以必须加一个None 对应因为常数不需要梯度(乘法因子) 因为求导是对应每一个都求#return grad,None#return tf.clip_by_value(grad, -0.1, 0.1)#return grad[0]*10,grad[1]*10input = tf.Variable([3.0], dtype=tf.float32, name='w1')g = tf.get_default_graph()
with g.gradient_override_map({"Mul": "CustomClipGrad"}):output_clip = tf.multiply(input,13)#output_clip = tf.identity(input,name='Identity')
output_2 = output_clip*2.0
grad_clip = tf.gradients(output_2, input)#梯度从头开始传播 刚开始相当于是identity 为1 然后慢慢向前传播 根据链式法则 乘法# output without gradient clipping in the backwards pass for comparison:
output = tf.identity(input)
grad = tf.gradients(output, input)
print(output_clip)
with tf.Session() as sess:sess.run(tf.global_variables_initializer())print("with clipping:", sess.run(grad_clip)[0])print("without clipping:", sess.run(grad)[0])