@@ -1811,55 +1811,6 @@ public <T extends TNumber> Selu<T> selu(Operand<T> features) {
18111811 return Selu .create (scope , features );
18121812 }
18131813
1814- /**
1815- * Computes sigmoid cross entropy given <code>logits</code>.
1816- *
1817- * <p>Measures the probability error in discrete classification tasks in which each class is
1818- * independent and not mutually exclusive. For instance, one could perform multilabel
1819- * classification where a picture can contain both an elephant and a dog at the same time.
1820- *
1821- * <p>For brevity, let <code>x = logits</code>, <code>z = labels</code>. The logistic loss in
1822- * pseudo-code is
1823- *
1824- * <pre>
1825- * z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
1826- * = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
1827- * = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
1828- * = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
1829- * = (1 - z) * x + log(1 + exp(-x))
1830- * = x - x * z + log(1 + exp(-x))
1831- * </pre>
1832- *
1833- * <p>For <code>x < 0</code>, to avoid overflow in <code>exp(-x)</code>, we reformulate the above
1834- *
1835- * <pre>
1836- * x - x * z + log(1 + exp(-x))
1837- * = log(exp(x)) - x * z + log(1 + exp(-x))
1838- * = - x * z + log(1 + exp(x))
1839- * </pre>
1840- *
1841- * <p>Hence, to ensure stability and avoid overflow, the implementation uses this equivalent
1842- * formulation
1843- *
1844- * <pre>
1845- * max(x, 0) - x * z + log(1 + exp(-abs(x)))
1846- * </pre>
1847- *
1848- * <p></ode>logits</code> and <code>labels</code> must have the same type and shape.
1849- *
1850- * <p>
1851- *
1852- * @param labels the labels
1853- * @param logits the logits of type float32 or float64
1854- * @param <T> the type of labels and logits
1855- * @return the component-wise logistic losses.
1856- * @throws IllegalArgumentException if logits' and labels' do not have the same shape
1857- */
1858- public <T extends TNumber > Operand <T > sigmoidCrossEntropyWithLogits (Operand <T > labels ,
1859- Operand <T > logits ) {
1860- return SigmoidCrossEntropyWithLogits .sigmoidCrossEntropyWithLogits (scope , labels , logits );
1861- }
1862-
18631814 /**
18641815 * Computes softmax activations.
18651816 * For each batch {@code i} and class {@code j} we have
@@ -2084,30 +2035,12 @@ public <T extends TType> SpaceToDepth<T> spaceToDepth(Operand<T> input, Long blo
20842035 * given row.
20852036 * <p>Inputs are the logits, not probabilities.
20862037 *
2087- * <p>This op expects unscaled logits, since it performs a <code>softmax</code> on <code>logits
2088- * </code> internally for efficiency. Do not call this op with the output of <code>softmax</code>,
2089- * as it will produce incorrect results.
2090- *
2091- * <p>A common use case is to have logits of shape <code>[batchSize, numClasses]</code> and have
2092- * labels of shape <code>[batchSize]</code>, but higher dimensions are supported, in which case
2093- * the <code>dim</code>-th dimension is assumed to be of size <code>numClasses</code>. <code>
2094- * logits</code> must have the <cod>dataType</cod> of <code>TFloat16</code>, <code>TFloat32</code>
2095- * , or <code>TFloat64</code>, and <code>labels</code> must have the dtype of <code>TInt32</code>
2096- * or <code>TInt64</code>.
2097- *
2098- * @param labels <code>Tensor</code> of shape <code>[d_0, d_1, ..., d_{r-1}]</code> (where <code>r
2099- * </code> is rank of <code>labels</code> and result) and the dataType is <code>TInt32</code>
2100- * or <code>TInt64</code>. Each entry in <code>labels</code> must be an index in <code>[0,
2101- * numClasses)</code>. Other values will raise an exception when this op is run on CPU, and
2102- * return <code>NaN</code> for corresponding loss and gradient rows on GPU.
2103- * @param logits Per-label activations (typically a linear output) of shape <code>[d_0, d_1, ...,
2104- * d_{r-1}, numClasses]</code> and dataType of <code>TFloat16</code>, <code>TFloat32</code>,
2105- * or <code>TFloat64</code>. These activation energies are interpreted as unnormalized log
2106- * probabilities.
2107- * @return A <code>Tensor</code> of the same shape as <code>labels</code> and of the same type as
2108- * <code>logits</code> with the softmax cross entropy loss.
2109- * @throws IllegalArgumentException If logits are scalars (need to have rank >= 1) or if the rank
2110- * of the labels is not equal to the rank of the logits minus one.
2038+ * @param <T> data type for {@code loss} output
2039+ * @param features batch_size x num_classes matrix
2040+ * @param labels batch_size vector with values in [0, num_classes).
2041+ * This is the label for the given minibatch entry.
2042+ * @param <T> data type for {@code SparseSoftmaxCrossEntropyWithLogits} output and operands
2043+ * @return a new instance of SparseSoftmaxCrossEntropyWithLogits
21112044 */
21122045 public <T extends TNumber > SparseSoftmaxCrossEntropyWithLogits <T > sparseSoftmaxCrossEntropyWithLogits (
21132046 Operand <T > features , Operand <? extends TNumber > labels ) {
0 commit comments