3
GY^$                 @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd Z dd Z!G dd dej"Z#dS )z"Module for constructing RNN Cells.    )absolute_import)division)print_functionN)jit)layers)dtypes)op_def_registry)ops)	array_ops)clip_ops)init_ops)math_ops)nn_ops)
random_ops)rnn_cell_impl)variable_scope)
tf_logging)nestc       	      C   s   t | |||}t|dkr"|d S | d }tj jd | d }x$tjtjjD ]}|j|krP|S qPW t	j
|d|d}tjtjj| |S )z4Get a sharded variable concatenated into one tensor.   r   z/concat/z:0)name)_get_sharded_variablelenvsget_variable_scoper   r	   get_collection	GraphKeysCONCATENATED_VARIABLESr
   concatadd_to_collection)	r   shapedtype
num_shardssharded_variableconcat_nameconcat_full_namevalueconcat_variable r(   8D:\pycharm\NER-BERT-BiLSTM-CRF--master\models\rnncell.py_get_concat_variable   s    

r*   c       	      C   s   ||d krt d||f ttj|d | }|d ||  }g }xPt|D ]D}|}||k rh|d7 }|jtj| d|  |g|dd  |d qPW |S )z5Get a list of sharded variables with the given dtype.r   z(Too many shards: shape=%s, num_shards=%dr   z_%dN)r!   )
ValueErrorintmathfloorrangeappendr   get_variable)	r   r    r!   r"   unit_shard_sizeremaining_rowsshardsicurrent_sizer(   r(   r)   r   ,   s    "r   c            
       sX   e Zd ZdZddddddddejdf
 fdd	Zed	d
 Zedd Z	dd Z
  ZS )CoupledInputForgetGateLSTMCella  Long short-term memory unit (LSTM) recurrent network cell.

  The default non-peephole implementation is based on:

    http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf

  S. Hochreiter and J. Schmidhuber.
  "Long Short-Term Memory". Neural Computation, 9(8):1735-1780, 1997.

  The peephole implementation is based on:

    https://research.google.com/pubs/archive/43905.pdf

  Hasim Sak, Andrew Senior, and Francoise Beaufays.
  "Long short-term memory recurrent neural network architectures for
   large scale acoustic modeling." INTERSPEECH, 2014.

  The coupling of input and forget gate is based on:

    http://arxiv.org/pdf/1503.04069.pdf

  Greff et al. "LSTM: A Search Space Odyssey"

  The class uses optional peep-hole connections, and an optional projection
  layer.
  FNr   g      ?Tc                s   t t| j|d |	s"tjd|  || _|| _|| _|| _|| _	|| _
|| _|| _|	| _|
| _|| _|r|	rxtj||n|| | _|| _n |	rtj||nd| | _|| _dS )a  Initialize the parameters for an LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell
      use_peepholes: bool, set True to enable diagonal/peephole connections.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
      provided, then the projected values are clipped elementwise to within
      `[-proj_clip, proj_clip]`.
      num_unit_shards: How to split the weight matrix.  If >1, the weight
        matrix is stored across num_unit_shards.
      num_proj_shards: How to split the projection matrix.  If >1, the
        projection matrix is stored across num_proj_shards.
      forget_bias: Biases of the forget gate are initialized by default to 1
        in order to reduce the scale of forgetting at the beginning of
        the training.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  By default (False), they are concatenated
        along the column axis.  This default behavior will soon be deprecated.
      activation: Activation function of the inner states.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
    )_reusez_%s: Using a concatenated state is slower and will soon be deprecated.  Use state_is_tuple=True.   N)superr7   __init__loggingwarn
_num_units_use_peepholes_initializer	_num_proj
_proj_clip_num_unit_shards_num_proj_shards_forget_bias_state_is_tuple_activationr8   r   LSTMStateTuple_state_size_output_size)self	num_unitsuse_peepholesinitializernum_proj	proj_clipnum_unit_shardsnum_proj_shardsforget_biasstate_is_tuple
activationreuse)	__class__r(   r)   r;   Z   s*     z'CoupledInputForgetGateLSTMCell.__init__c             C   s   | j S )N)rI   )rK   r(   r(   r)   
state_size   s    z)CoupledInputForgetGateLSTMCell.state_sizec             C   s   | j S )N)rJ   )rK   r(   r(   r)   output_size   s    z*CoupledInputForgetGateLSTMCell.output_sizec             C   s^  t j}| jdkr| jn| j}| jr,|\}}n0tj|ddgd| jg}tj|d| jgd|g}|j}|j j	dd }|j
dkrtdtjd|j
| jg| _tjd| j| jg| _tjd| j| jg| _tjd	|j
| jg| _tjd
| j| jg| _tjd| j| jg| _tjd|j
| jg| _tjd| j| jg| _tjd| jgtj d| _tjd| jgtj d| _tjd| jgtj d| _|t j|| jt j|| j t j|| j | j }	d|	 | |	| jt j|| jt j|| j | j   }
|t j|| jt j|| j t j|
| j | j }|| j|
 }| jrFtj|
|ntj|
|gd}||fS )a  Run one step of LSTM.

    Args:
      inputs: input Tensor, 2D, batch x num_units.
      state: if `state_is_tuple` is False, this must be a state Tensor,
        `2-D, batch x state_size`.  If `state_is_tuple` is True, this must be a
        tuple of state Tensors, both `2-D`, with column sizes `c_state` and
        `m_state`.
      scope: VariableScope for the created subgraph; defaults to "LSTMCell".

    Returns:
      A tuple containing:
      - A `2-D, [batch x output_dim]`, Tensor representing the output of the
        LSTM after reading `inputs` when previous state was `state`.
        Here output_dim is:
           num_proj if num_proj was set,
           num_units otherwise.
      - Tensor(s) representing the new state of LSTM after reading `inputs` when
        the previous state was `state`.  Same type and shape(s) as `state`.

    Raises:
      ValueError: If input size cannot be inferred from inputs via
        static shape inference.
    Nr   r   r9   z6Could not infer input size from inputs.get_shape()[-1]Z_w_xiZ_w_hiZ_w_ciZ_w_xoZ_w_hoZ_w_coZ_w_xcZ_w_hcZ_b_i)rN   Z_b_cZ_b_orZ   ) r   sigmoidrA   r>   rF   r
   slicer!   	get_shape	with_rankr&   r+   tfr1   Zw_xiZw_hiZw_ciZw_xoZw_hoZw_coZw_xcZw_hcr   zeros_initializerb_ib_cb_omatmulrG   r   rH   r   )rK   inputsstater[   rO   c_prevm_prevr!   
input_sizeZi_tc_tZo_tZh_t	new_stater(   r(   r)   call   sB    

(
((
z#CoupledInputForgetGateLSTMCell.call)__name__
__module____qualname____doc__r   tanhr;   propertyrX   rY   rl   __classcell__r(   r(   )rW   r)   r7   >   s   6r7   )$rp   
__future__r   r   r   collectionsr-   
tensorflowr_   tensorflow.contrib.compilerr   'tensorflow.contrib.layers.python.layersr   tensorflow.python.frameworkr   r   r	   tensorflow.python.opsr
   r   r   r   r   r   r   r   r   tensorflow.python.platformr   r<   tensorflow.python.utilr   r*   r   RNNCellr7   r(   r(   r(   r)   <module>   s0   