List of examples

Basic workflow.
import pandas as pd
from numba import njit

# Dataset for analysis
FNAME = "employees.csv"


# This function gets compiled by Numba*
@njit
def get_analyzed_data():
    df = pd.read_csv(FNAME)
    s_bonus = pd.Series(df['Bonus %'])
    s_first_name = pd.Series(df['First Name'])
    m = s_bonus.mean()
    names = s_first_name.sort_values()
    return m, names


# Printing names and their average bonus percent
mean_bonus, sorted_first_names = get_analyzed_data()
print(sorted_first_names)
print('Average Bonus %:', mean_bonus)
$ python ./basic_workflow.py
7      ALEXANDER
4    CHRISTOPHER
0          EMILY
2          ISAAC
8         JOSEPH
9         JOSEPH
5            MIA
1           NOAH
6         OLIVIA
3            NaN
dtype: object
Average Bonus %: 11.204399999999998
Expanded basic workflow.
import pandas as pd
from numba import njit
import numpy as np


# Datasets for analysis
file_names = [
    "employees_batch1.csv",
    "employees_batch2.csv",
]


# This function gets compiled by Numba*
# For scalability use @njit(parallel=True)
@njit
def get_analyzed_data(file_name):
    df = pd.read_csv(file_name,
                     dtype={'Bonus %': np.float64, 'First Name': str},
                     usecols=['Bonus %', 'First Name'])
    s_bonus = pd.Series(df['Bonus %'])
    s_first_name = pd.Series(df['First Name'])
    m = s_bonus.mean()
    names = s_first_name.sort_values()
    return m, names


# Printing names and their average bonus percent
for file_name in file_names:
    mean_bonus, sorted_first_names = get_analyzed_data(file_name)
    print(file_name)
    print(sorted_first_names)
    print('Average Bonus %:', mean_bonus)
$ python ./basic_workflow_batch.py
employees_batch1.csv
4    CHRISTOPHER
0          EMILY
2          ISAAC
5            MIA
1           NOAH
3            NaN
dtype: object
Average Bonus %: 8.984
employees_batch2.csv
1    ALEXANDER
2       JOSEPH
3       JOSEPH
0       OLIVIA
dtype: object
Average Bonus %: 14.535
Basic workflow in parallel.
import pandas as pd
from numba import njit, prange

# Dataset for analysis
FNAME = "employees.csv"


# This function gets compiled by Numba* and multi-threaded
@njit(parallel=True)
def get_analyzed_data():
    df = pd.read_csv(FNAME)
    s_bonus = pd.Series(df['Bonus %'])
    s_first_name = pd.Series(df['First Name'])

    # Use explicit loop to compute the mean. It will be compiled as parallel loop
    m = 0.0
    for i in prange(s_bonus.size):
        m += s_bonus.values[i]
    m /= s_bonus.size

    names = s_first_name.sort_values()
    return m, names


# Printing names and their average bonus percent
mean_bonus, sorted_first_names = get_analyzed_data()
print(sorted_first_names)
print('Average Bonus %:', mean_bonus)
$ python ./basic_workflow_parallel.py
7      ALEXANDER
4    CHRISTOPHER
0          EMILY
2          ISAAC
8         JOSEPH
9         JOSEPH
5            MIA
1           NOAH
6         OLIVIA
3            NaN
dtype: object
Average Bonus %: 11.204399999999998
The index (axis labels) of the Series.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_index():
    series = pd.Series(np.arange(5), index=['one', 'two', 'three', 'four', 'five'])

    return series.index  # Expect array of 'one' 'two' 'three' 'four' 'five'


print(series_index())
$ python ./series/series_index.py
['one' 'two' 'three' 'four' 'five']
Return Series as ndarray or ndarray-like depending on the dtype.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_values():
    series = pd.Series(np.arange(5))

    return series.values  # Expect array of 0, 1, 2, 3, 4


print(series_values())
$ python ./series/series_values.py
[0 1 2 3 4]
Return a tuple of the shape of the underlying data.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_shape():
    series = pd.Series(np.arange(10))

    return series.shape  # Expect (10,)


print(series_shape())
$ python ./series/series_shape.py
(10,)
Number of dimensions of the underlying data, by definition 1.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_ndim():
    series = pd.Series(np.arange(10))

    return series.ndim  # Expect value: 1


print(series_ndim())
$ python ./series/series_ndim.py
1
Return the number of elements in the underlying data.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_size():
    series = pd.Series(np.arange(10))

    return series.size  # Expect value: 10


print(series_size())
$ python ./series/series_size.py
10
Return the transpose, which is by definition self.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_T():
    series = pd.Series(np.arange(5))

    return series.T  # Expect array of 0, 1, 2, 3, 4


print(series_T())
$ python ./series/series_T.py
[0 1 2 3 4]
Getting Pandas Series elements. Returns single value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_getitem_scalar():
    series = pd.Series(np.arange(10, 0, -1))  # Series of 10, 9, ..., 1

    return series[0]  # Accessing series by scalar index


print(series_getitem_scalar())
$ python ./series/series_getitem/series_getitem_scalar_single_result.py
0    10
dtype: int64
Getting Pandas Series elements. Returns multiple value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_getitem_scalar_many_idx():
    series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 0, 6, 0])

    return series[0]


print(series_getitem_scalar_many_idx())
$ python ./series/series_getitem/series_getitem_scalar_multiple_result.py
0    5
0    3
0    1
dtype: int64
Getting Pandas Series elements by slice.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_getitem_slice():
    series = pd.Series(np.arange(10, 0, -1))  # Series of 10, 9, ..., 1

    return series[3:7]  # Accessing series by slice index


print(series_getitem_slice())
$ python ./series/series_getitem/series_getitem_slice.py
3    7
4    6
5    5
6    4
dtype: int64
Getting Pandas Series elements by array of booleans.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_getitem_array():
    series = pd.Series(np.arange(10, 0, -1))  # Series of 10, 9, ..., 1
    array = np.array([True, False, True, True, False] * 2)

    return series[array]  # Accessing series by array


print(series_getitem_array())
$ python ./series/series_getitem/series_getitem_bool_array.py
0    10
2     8
3     7
5     5
7     3
8     2
dtype: int64
Getting Pandas Series elements by another Series.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_getitem_series():
    series = pd.Series(np.arange(10, 0, -1))  # Series of 10, 9, ..., 1
    indices = pd.Series(np.asarray([1, 6, 7, 8, 9]))

    return series[indices]  # Accessing series by series


print(series_getitem_series())
$ python ./series/series_getitem/series_getitem_series.py
1    9
6    4
7    3
8    2
9    1
dtype: int64
Setting Pandas Series elements
import numpy as np
import pandas as pd

from numba import njit


@njit
def series_setitem():
    value = 0
    series = pd.Series(np.arange(5, 0, -1))  # Series of 5, 4, 3, 2, 1

    series[0] = value

    return series   # result Series of 0, 4, 3, 2, 1


print(series_setitem())
$ python ./series/series_setitem_int.py
0    0
1    4
2    3
3    2
4    1
dtype: int64
Setting Pandas Series elements by slice
import numpy as np
import pandas as pd

from numba import njit


@njit
def series_setitem():
    value = 0
    series = pd.Series(np.arange(5, 0, -1))  # Series of 5, 4, 3, 2, 1

    series[2:5] = value

    return series   # result Series of 5, 4, 0, 0, 0


print(series_setitem())
$ python ./series/series_setitem_slice.py
0    5
1    4
2    0
3    0
4    0
dtype: int64
Setting Pandas Series elements by series
import numpy as np
import pandas as pd

from numba import njit


@njit
def series_setitem():
    value = 0
    series = pd.Series(np.arange(5, 0, -1))  # Series of 5, 4, 3, 2, 1

    indices = pd.Series(np.asarray([1, 3]))
    series[indices] = value

    return series       # result Series of 5, 0, 3, 0, 1


print(series_setitem())
$ python ./series/series_setitem_series.py
0    5
1    0
2    3
3    0
4    1
dtype: int64
Cast a pandas object to a specified dtype dtype.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_astype():
    series = pd.Series([3, -10, np.nan, 0, 92])

    return series.astype(str)


print(series_astype())
$ python ./series/series_astype.py
0      3.000000
1    -10.000000
2           nan
3      0.000000
4     92.000000
dtype: object
Make a copy of this object’s indices and data.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_copy():
    s1 = pd.Series(np.arange(5))
    s2 = s1.copy()

    return s2  # Expect new series of 0, 1, 2, 3, 4


print(series_copy())
$ python ./series/series_copy.py
0    0
1    1
2    2
3    3
4    4
dtype: int64
With a scalar integer. Returns single value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_at_value():
    series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])

    return series.at[4]  # Expect array: [3]


print(series_at_value())
$ python ./series/series_at/series_at_single_result.py
[3]
With a scalar integer. Returns multiple value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_at_many_idx():
    series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 0, 6, 0])

    return series.at[0]  # Expect array: [5 3 1]


print(series_at_many_idx())
$ python ./series/series_at/series_at_multiple_result.py
[5 3 1]
Get value at specified index position.
import numpy as np
import pandas as pd
from numba import njit

from numba import njit


@njit
def series_iat():
    series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])

    return series.iat[4]  # Expect value: 1


print(series_iat())
$ python ./series/series_iat.py
1
With a scalar integer. Returns single value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_loc_value():
    series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])

    return series.loc[4]


print(series_loc_value())
$ python ./series/series_loc/series_loc_single_result.py
4    3
dtype: int64
With a scalar integer. Returns multiple value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_loc_many_idx():
    series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 0, 6, 0])

    return series.loc[0]


print(series_loc_many_idx())
$ python ./series/series_loc/series_loc_multiple_result.py
0    5
0    3
0    1
dtype: int64
With a slice object. Returns multiple value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_loc_slice():
    series = pd.Series([5, 4, 3, 2, 1])

    return series.loc[1:3]


print(series_loc_slice())
$ python ./series/series_loc/series_loc_slice.py
1    4
2    3
3    2
dtype: int64
With a scalar integer.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_iloc_value():
    series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])

    return series.iloc[4]  # Expect value: 1


print(series_iloc_value())
$ python ./series/series_iloc/series_iloc_value.py
1
With a slice object.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_iloc_slice():
    series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])

    return series.iloc[2:4]


print(series_iloc_slice())
$ python ./series/series_iloc/series_iloc_slice.py
4    3
6    2
dtype: int64
Getting the addition of Series and other
import pandas as pd
from numba import njit


@njit
def series_add():
    s1 = pd.Series([1, 2, 3])
    s2 = pd.Series([4, 5, 6])
    out_series = s1.add(s2)

    return out_series  # Expect series of 5, 7, 9


print(series_add())
$ python ./series/series_add.py
0    5
1    7
2    9
dtype: int64
Return Subtraction of series and other, element-wise (binary operator sub).
import pandas as pd
from numba import njit


@njit
def series_sub():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.sub(s2)  # Expect series of 5, 2, 0, -4, -7


print(series_sub())
$ python ./series/series_sub.py
0    5
1    2
2    0
3   -4
4   -7
dtype: int64
Element-wise multiplication of two Series
import pandas as pd
from numba import njit


@njit
def series_mul():
    s1 = pd.Series([1, 3, 100])
    s2 = pd.Series([0, 1, 2])
    out_series = s1.mul(s2)

    return out_series  # Expect series of 0, 3, 200


print(series_mul())
$ python ./series/series_mul.py
0      0
1      3
2    200
dtype: int64
Element-wise division of one Series by another (binary operator div)
import pandas as pd
from numba import njit


@njit
def series_div():
    s1 = pd.Series([1, 2, 4])
    s2 = pd.Series([4, 4, 16])
    out_series = s1.div(s2)

    return out_series  # Expect series of 0.25, 0.50, 0.25


print(series_div())
$ python ./series/series_div.py
0    0.25
1    0.50
2    0.25
dtype: float64
Element-wise division of one Series by another (binary operator truediv)
import pandas as pd
from numba import njit


@njit
def series_truediv():
    s1 = pd.Series([1, 2, 4])
    s2 = pd.Series([4, 4, 16])
    out_series = s1.truediv(s2)

    return out_series  # Expect series of 0.25, 0.50, 0.25


print(series_truediv())
$ python ./series/series_truediv.py
0    0.25
1    0.50
2    0.25
dtype: float64
Return Integer division of series and other, element-wise (binary operator floordiv).
import pandas as pd
from numba import njit


@njit
def series_floordiv():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.floordiv(s2)  # Expect series of 0, 2, 1, 0, 0


print(series_floordiv())
$ python ./series/series_floordiv.py
0    0
1    2
2    1
3    0
4    0
dtype: int64
Return Modulo of series and other, element-wise (binary operator mod).
import pandas as pd
from numba import njit


@njit
def series_mod():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.mod(s2)  # Expect series of 0, 0, 0, 2, 1


print(series_mod())
$ python ./series/series_mod.py
0    0
1    0
2    0
3    2
4    1
dtype: int64
Element-wise power of one Series by another (binary operator pow)
import pandas as pd
from numba import njit


@njit
def series_pow():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.pow(s2)  # Expect series of 1, 16, 27, 64, 1


print(series_pow())
$ python ./series/series_pow.py
0     1
1    16
2    27
3    64
4     1
dtype: int64
Element-wise less than of one Series by another (binary operator lt)
import pandas as pd
from numba import njit


@njit
def series_lt():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.lt(s2)  # Expect series of False, False, False, True, True


print(series_lt())
$ python ./series/series_lt.py
0    False
1    False
2    False
3     True
4     True
dtype: bool
Element-wise greater than of one Series by another (binary operator gt)
import pandas as pd
from numba import njit


@njit
def series_gt():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.gt(s2)  # Expect series of True, True, False, False, False


print(series_gt())
$ python ./series/series_gt.py
0     True
1     True
2    False
3    False
4    False
dtype: bool
Element-wise less than or equal of one Series by another (binary operator le)
import pandas as pd
from numba import njit


@njit
def series_le():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.le(s2)  # Expect series of False, False, True, True, True


print(series_le())
$ python ./series/series_le.py
0    False
1    False
2     True
3     True
4     True
dtype: bool
Element-wise greater than or equal of one Series by another (binary operator ge)
import pandas as pd
from numba import njit


@njit
def series_ge():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.ge(s2)  # Expect series of True, True, True, False, False


print(series_ge())
$ python ./series/series_ge.py
0     True
1     True
2     True
3    False
4    False
dtype: bool
Element-wise not equal of one Series by another (binary operator ne)
import pandas as pd
from numba import njit


@njit
def series_ne():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.ne(s2)  # Expect series of True, True, False, True, True


print(series_ne())
$ python ./series/series_ne.py
0     True
1     True
2    False
3     True
4     True
dtype: bool
Element-wise equal of one Series by another (binary operator eq)
import pandas as pd
from numba import njit


@njit
def series_eq():
    s1 = pd.Series([5, 4, 3, 2, 1])
    s2 = pd.Series([0, 2, 3, 6, 8])

    return s1.eq(s2)  # Expect series of False, False, True, False, False


print(series_eq())
$ python ./series/series_mod.py
0    0
1    0
2    0
3    2
4    1
dtype: int64
Square the values by defining a function and passing it as an argument to apply().
import pandas as pd
import numpy as np
from numba import njit


@njit
def series_apply():
    s = pd.Series([20, 21, 12],
                  index=['London', 'New York', 'Helsinki'])

    def square(x):
        return x ** 2

    return s.apply(square)


print(series_apply())
$ python ./series/series_apply.py
London      400
New York    441
Helsinki    144
dtype: int64
Square the values by passing an anonymous function as an argument to apply().
import pandas as pd
import numpy as np
from numba import njit


@njit
def series_apply():
    s = pd.Series([20, 21, 12],
                  index=['London', 'New York', 'Helsinki'])

    return s.apply(lambda x: x ** 2)


print(series_apply())
$ python ./series/series_apply_lambda.py
London      400
New York    441
Helsinki    144
dtype: int64
Use a function from the Numpy library.
import pandas as pd
import numpy as np
from numba import njit


@njit
def series_apply():
    s = pd.Series([20, 21, 12],
                  index=['London', 'New York', 'Helsinki'])

    return s.apply(np.log)


print(series_apply())
$ python ./series/series_apply_log.py
London      2.995732
New York    3.044522
Helsinki    2.484907
dtype: float64
map() accepts a function.
import pandas as pd
from numba import njit


@njit
def series_map():
    s = pd.Series([1., 2., 3., 4., 5.])
    return s.map(lambda x: x ** 2)


print(series_map())
$ python ./series/series_map.py
0     1.0
1     4.0
2     9.0
3    16.0
4    25.0
dtype: float64
Return the mean of the values grouped by numpy array.
import pandas as pd
import numpy as np
from numba import njit


@njit
def series_groupby():
    S = pd.Series([390., 350., 30., 20.])
    by = np.asarray([0, 1, 0, 1])

    # Expect Series of pd.Series([210.0, 185.0], index=[0, 1])
    return S.groupby(by).mean()


print(series_groupby())
$ python ./series/series_groupby.py
0    210.0
1    185.0
dtype: float64
Calculate the rolling minimum.
import pandas as pd
from numba import njit


@njit
def series_rolling_min():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).min()

    return out_series  # Expect series of NaN, NaN, 3.0, 2.0, 2.0


print(series_rolling_min())
$ python ./series/rolling/series_rolling_min.py
0    NaN
1    NaN
2    3.0
3    2.0
4    2.0
dtype: float64
Getting the absolute value of each element in Series
import pandas as pd
from numba import njit


@njit
def series_abs():
    s = pd.Series([-1.10, 2, -3.33])
    out_series = s.abs()

    return out_series  # Expect series of 1.10, 2.00, 3.33


print(series_abs())
$ python ./series/series_abs.py
0    1.10
1    2.00
2    3.33
dtype: float64
Compute correlation with other Series, excluding missing values.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_corr():
    s1 = pd.Series([3.2, -10, np.nan, 0.23, 9.2])
    s2 = pd.Series([5., 0, 3.3, np.nan, 9.2])

    return s1.corr(s2)  # Expect value: 0.98673...


print(series_corr())
$ python ./series/series_corr.py
0.9867362434412106
Counting non-NaN values in Series
import pandas as pd
import numpy as np
from numba import njit


@njit
def series_count():
    s = pd.Series([1, 2, np.nan])
    out_series = s.count()

    return out_series  # Expect the number of non-Nan values == '2'


print(series_count())
$ python ./series/series_count.py
2
Compute covariance with Series, excluding missing values.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_cov():
    s1 = pd.Series([3.2, -10, np.nan, 0.23, 9.2])
    s2 = pd.Series([5., 0, 3.3, np.nan, 9.2])

    return s1.cov(s2)  # Expect value: 44.639...


print(series_cov())
$ python ./series/series_cov.py
44.63999999999999
Returns cumulative sum over Series.
import pandas as pd
from numba import njit


@njit
def series_cumsum():
    s = pd.Series([1, 2, 3, 4])

    return s.cumsum()  # Expect series of 1, 3, 6, 10


print(series_cumsum())
$ python ./series/series_cumsum.py
0     1
1     3
2     6
3    10
dtype: int64
Generate descriptive statistics.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_describe():
    s = pd.Series([5., 0, 3.3, 4.4, 9.2])

    return s.describe()


print(series_describe())
$ python ./series/series_describe.py
count    5.000000
mean     4.380000
std      3.315419
min      0.000000
25%      3.300000
50%      4.400000
75%      5.000000
max      9.200000
dtype: float64
Getting the maximum value of Series elements
import pandas as pd
from numba import njit


@njit
def series_max():
    s = pd.Series([1, 4, 2, 0])
    out_series = s.max()

    return out_series  # Expect maximum value 4


print(series_max())
$ python ./series/series_max.py
4
Return the mean of the values.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_mean():
    series = pd.Series([3.2, -10, np.nan, 0.23, 9.2])

    return series.mean()  # Expect value: 0.6575


print(series_mean())
$ python ./series/series_mean.py
0.6575
Return the median of the values for the requested axis.
import pandas as pd
from numba import njit


@njit
def series_median():
    series = pd.Series([1, 2, 3, 4])

    return series.median()  # Expect value: 2.5


print(series_median())
$ python ./series/series_median.py
2.5
Getting the minimum value of Series elements
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_min():
    series = pd.Series([4, np.nan, 2, 1])

    return series.min()  # Expect minimum value 1.0


print(series_min())
$ python ./series/series_min.py
1.0
Returns the largest n elements.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_nlargest():
    series = pd.Series(np.arange(10))

    return series.nlargest(4)  # Expect series of 9, 8, 7, 6


print(series_nlargest())
$ python ./series/series_nlargest.py
9    9
8    8
7    7
6    6
dtype: int64
Returns the smallest n elements.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_nsmallest():
    series = pd.Series(np.arange(10))

    return series.nsmallest(4)  # Expect series of 0, 1, 2, 3


print(series_nsmallest())
$ python ./series/series_nsmallest.py
0    0
1    1
2    2
3    3
dtype: int64
Percentage change between the current and a prior element.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_pct_change():
    s = pd.Series([5., 0, 3.3, np.nan, 9.2])

    return s.pct_change(periods=2, fill_method=None, limit=None, freq=None)


print(series_pct_change())
$ python ./series/series_pct_change.py
0         NaN
1         NaN
2   -0.340000
3         NaN
4    1.787879
dtype: float64
Return the product of the values.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_prod():
    series = pd.Series([3.2, -10, np.nan, 0.23, 9.2])

    return series.prod()  # Expect value: -67.712


print(series_prod())
$ python ./series/series_prod.py
-67.712
Computing quantile for the Series
import pandas as pd
from numba import njit


@njit
def series_quantile():
    s = pd.Series([1, 2, 3, 4])
    median = .5  # compute median
    out_series = s.quantile(median)

    return out_series # Expect median value == 2.5


print(series_quantile())
$ python ./series/series_quantile.py
2.5
Returns sample standard deviation over Series.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_std():
    series = pd.Series(np.arange(10))

    return series.std()  # Expect value: 3.0276503540974917


print(series_std())
$ python ./series/series_std.py
3.0276503540974917
Return the sum of the values for the requested axis.
import pandas as pd
from numba import njit


@njit
def series_sum():
    series = pd.Series([5, 4, 3, 2, 1])

    return series.sum()  # Expect value: 15


print(series_sum())
$ python ./series/series_sum.py
15.0
Returns unbiased variance over Series.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_var():
    series = pd.Series(np.arange(10))

    return series.var()  # Expect value: 9.16666...


print(series_var())
$ python ./series/series_var.py
9.166666666666666
Getting unique values in Series
import pandas as pd
from numba import njit


@njit
def series_unique():
    s = pd.Series([2, 1, 3, 3])
    out_series = s.unique()

    return out_series  # Expect array of unique values [1, 2, 3]


print(series_unique())
$ python ./series/series_unique.py
[1 2 3]
Return number of unique elements in the object.
import pandas as pd
from numba import njit


@njit
def series_nunique():
    series = pd.Series([2, 8, 2, 1])

    return series.nunique()  # Expect value: 3


print(series_nunique())
$ python ./series/series_nunique.py
3
Getting the number of values excluding NaNs
import pandas as pd
import numpy as np
from numba import njit


@njit
def series_value_counts():
    s = pd.Series([3, 1, 2, 3, 4, np.nan])
    out_series = s.value_counts()

    return out_series


print(series_value_counts())
$ python ./series/series_value_counts.py
3.0    2
4.0    1
2.0    1
1.0    1
dtype: int64
Getting the first n rows.
import pandas as pd
from numba import njit


@njit
def series_head():
    s = pd.Series([7, 6, 5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8, 10, 12])

    return s.head(3)


print(series_head())
$ python ./series/series_head.py
0    7
2    6
4    5
dtype: int64
Getting the row label of the maximum value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_idxmax():
    s = pd.Series([4, np.nan, 2, 1], index=['A', 'B', 'C', 'D'])

    return s.idxmax()  # Expect index of maximum value A


print(series_idxmax())
$ python ./series/series_idxmax.py
A
Getting the row label of the minimum value.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_idxmin():
    s = pd.Series([4, np.nan, 2, 1], index=['A', 'B', 'C', 'D'])

    return s.idxmin()  # Expect index of minimum value D


print(series_idxmin())
$ python ./series/series_idxmin.py
D
Check whether values are contained in Series.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_isin():
    s = pd.Series([4, np.nan, 2, 1])

    return s.isin([4, 1])  # Expect series of True, False, False, True


print(series_isin())
$ python ./series/series_isin.py
0     True
1    False
2    False
3     True
dtype: bool
Alter Series index labels or name.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_rename():
    s = pd.Series(np.arange(5))
    s.rename("new_series")

    return s


print(series_rename())
$ python ./series/series_rename.py
0    0
1    1
2    2
3    3
4    4
dtype: int64
Return the elements in the given positional indices along an axis.
import pandas as pd
from numba import njit


@njit
def series_take():
    series = pd.Series([5, 4, 3, 2, 1])

    return series.take([4, 1])  # Expect series of 4, 1


print(series_take())
$ python ./series/series_take.py
4    1
1    4
dtype: int64
Detect missing values.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_isna():
    s = pd.Series([4, np.nan, 2, 1])

    return s.isna()  # Expect series of False, True, False, False


print(series_isna())
$ python ./series/series_isna.py
0    False
1     True
2    False
3    False
dtype: bool
Detect existing (non-missing) values.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_notna():
    series = pd.Series([4, np.nan, 2, 1])

    return series.notna()  # Expect series of True, False, True, True


print(series_notna())
$ python ./series/series_notna.py
0     True
1    False
2     True
3     True
dtype: bool
Return a new Series with missing values removed.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_dropna():
    s = pd.Series([4, np.nan, 2, 1])

    return s.dropna()


print(series_dropna())
$ python ./series/series_dropna.py
0    4.0
2    2.0
3    1.0
dtype: float64
Fill NA/NaN values using the specified method.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_fillna():
    s = pd.Series([4, np.nan, 2, 1])

    return s.fillna(0)


print(series_fillna())
$ python ./series/series_fillna.py
0    4.0
1    0.0
2    2.0
3    1.0
dtype: float64
Override ndarray.argsort.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_argsort():
    s = pd.Series([3, -10, np.nan, 0, 92])

    return s.argsort()  # Expect series of 1, 2, -1, 0, 3


print(series_argsort())
$ python ./series/series_argsort.py
0    1
1    2
2   -1
3    0
4    3
dtype: int64
Sort by the values.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_sort_values():
    series = pd.Series([3, -10, np.nan, 0, 92])

    return series.sort_values()


print(series_sort_values())
$ python ./series/series_sort_values.py
1   -10.0
3     0.0
0     3.0
4    92.0
2     NaN
dtype: float64
Concatenate two or more Series.
import pandas as pd
from numba import njit


@njit
def series_append():
    s1 = pd.Series(['one', 'two', 'three'])
    s2 = pd.Series(['four', 'five', 'six'])

    return s1.append(s2)


print(series_append())
$ python ./series/series_append.py
0      one
1      two
2    three
0     four
1     five
2      six
dtype: object
Shift index by desired number of periods with an optional time freq.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_shift():
    series = pd.Series([3, -10, np.nan, 0, 92])

    return series.shift()


print(series_shift())
$ python ./series/series_shift.py
0     NaN
1     3.0
2   -10.0
3     NaN
4     0.0
dtype: float64
Convert strings in the Series to be capitalized.
import pandas as pd
from numba import njit


@njit
def series_str_capitalize():
    series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
    out_series = series.str.capitalize()

    return out_series  # Expect series of 'Lower', 'Capitals', 'This is a sentence', 'Swapcase'


print(series_str_capitalize())
$ python ./series/str/series_str_capitalize.py
0                 Lower
1              Capitals
2    This is a sentence
3              Swapcase
dtype: object
Convert strings in the Series to be casefolded.
import pandas as pd
from numba import njit


@njit
def series_str_casefold():
    series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
    out_series = series.str.casefold()

    return out_series  # Expect series of 'lower', 'capitals', 'this is a sentence', 'swapcase'


print(series_str_casefold())
$ python ./series/str/series_str_casefold.py
0                 lower
1              capitals
2    this is a sentence
3              swapcase
dtype: object
Filling left and right side of strings in the Series with an additional character
import pandas as pd
from numba import njit


@njit
def series_str_center():
    series = pd.Series(['dog', 'foo', 'bar'])  # Series of 'dog', 'foo', 'bar'
    out_series = series.str.center(5, '*')

    return out_series  # Expect series of '*dog*', '*foo*', '*bar*'


print(series_str_center())
$ python ./series/str/series_str_center.py
0    *dog*
1    *foo*
2    *bar*
dtype: object
Test if the end of each string element matches a string
import pandas as pd
from numba import njit


@njit
def series_str_endswith():
    series = pd.Series(['foo', 'bar', 'foobar'])  # Series of 'foo', 'bar', 'foobar'
    out_series = series.str.endswith('bar')

    return out_series  # Expect series of False, True, True


print(series_str_endswith())
$ python ./series/str/series_str_endswith.py
0    False
1     True
2     True
dtype: bool
Return lowest indexes in each strings in the Series
import pandas as pd
from numba import njit


@njit
def series_str_find():
    series = pd.Series(['foo', 'bar', 'foobar'])  # Series of 'foo', 'bar', 'foobar'
    out_series = series.str.find('bar')

    return out_series  # Expect series of -1, 0, 3


print(series_str_find())
$ python ./series/str/series_str_find.py
0   -1
1    0
2    3
dtype: int64
Compute the length of each element in the Series
import pandas as pd
from numba import njit


@njit
def series_str_len():
    series = pd.Series(['foo', 'bar', 'foobar'])  # Series of 'foo', 'bar', 'foobar'
    out_series = series.str.len()

    return out_series  # Expect series of 3, 3, 6


print(series_str_len())
$ python ./series/str/series_str_len.py
0    3
1    3
2    6
dtype: int64
Filling right side of strings in the Series with an additional character
import pandas as pd
from numba import njit


@njit
def series_str_ljust():
    series = pd.Series(['dog', 'foo', 'bar'])  # Series of 'dog', 'foo', 'bar'
    out_series = series.str.ljust(5, '*')

    return out_series  # Expect series of 'dog**', 'foo**', 'bar**'


print(series_str_ljust())
$ python ./series/str/series_str_ljust.py
0    dog**
1    foo**
2    bar**
dtype: object
Convert strings in the Series to lowercase.
import pandas as pd
from numba import njit


@njit
def series_str_lower():
    series = pd.Series(['DOG', 'foo', 'BaR'])
    out_series = series.str.lower()

    return out_series


print(series_str_lower())
$ python ./series/str/series_str_lower.py
0    dog
1    foo
2    bar
dtype: object
Remove leading and trailing characters.
import pandas as pd
from numba import njit


@njit
def series_str_lstrip():
    series = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t'])

    return series.str.lstrip('123.')


print(series_str_lstrip())
$ python ./series/str/series_str_lstrip.py
0     Ant.  
1     Bee!\n
2     Cat?\t
dtype: object
Filling left side of strings in the Series with an additional character
import pandas as pd
from numba import njit


@njit
def series_str_rjust():
    series = pd.Series(['dog', 'foo', 'bar'])  # Series of 'dog', 'foo', 'bar'
    out_series = series.str.rjust(5, '*')

    return out_series  # Expect series of '**dog', '**foo', '**bar'


print(series_str_rjust())
$ python ./series/str/series_str_rjust.py
0    **dog
1    **foo
2    **bar
dtype: object
Remove leading and trailing characters.
import pandas as pd
from numba import njit


@njit
def series_str_rstrip():
    series = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t'])

    return series.str.rstrip('.!? \n\t')


print(series_str_rstrip())
$ python ./series/str/series_str_rstrip.py
0    1. Ant
1    2. Bee
2    3. Cat
dtype: object
Test if the start of each string element matches a string
import pandas as pd
from numba import njit


@njit
def series_str_startswith():
    series = pd.Series(['foo', 'bar', 'foobar'])  # Series of 'foo', 'bar', 'foobar'
    out_series = series.str.startswith('foo')

    return out_series  # Expect series of True, False, True


print(series_str_startswith())
$ python ./series/str/series_str_startswith.py
0     True
1    False
2     True
dtype: bool
Remove leading and trailing characters.
import pandas as pd
from numba import njit


@njit
def series_str_strip():
    series = pd.Series(['1. Ant.  ', '2. Bee!\n', '3. Cat?\t'])

    return series.str.strip('123.!? \n\t')


print(series_str_strip())
$ python ./series/str/series_str_strip.py
0    Ant
1    Bee
2    Cat
dtype: object
Convert strings in the Series to be swapcased.
import pandas as pd
from numba import njit


@njit
def series_str_swapcase():
    series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
    out_series = series.str.swapcase()

    return out_series  # Expect series of 'LOWER', 'capitals', 'THIS IS A SENTENCE', 'sWaPcAsE'


print(series_str_swapcase())
$ python ./series/str/series_str_swapcase.py
0                 LOWER
1              capitals
2    THIS IS A SENTENCE
3              sWaPcAsE
dtype: object
Convert strings in the Series to titlecase.
import pandas as pd
from numba import njit


@njit
def series_str_title():
    series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
    out_series = series.str.title()

    return out_series  # Expect series of 'Lower', 'Capitals', 'This Is A Sentence', 'Swapcase'


print(series_str_title())
$ python ./series/str/series_str_title.py
0                 Lower
1              Capitals
2    This Is A Sentence
3              Swapcase
dtype: object
Convert strings in the Series to upper case.
import pandas as pd
from numba import njit


@njit
def series_str_upper():
    series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])

    return series.str.upper()


print(series_str_upper())
$ python ./series/str/series_str_upper.py
0                 LOWER
1              CAPITALS
2    THIS IS A SENTENCE
3              SWAPCASE
dtype: object
Pad strings in the Series by prepending ‘0’ characters
import pandas as pd
from numba import njit


@njit
def series_str_zfill():
    series = pd.Series(['dog', 'foo', 'bar'])  # Series of 'dog', 'foo', 'bar'
    out_series = series.str.zfill(5)

    return out_series  # Expect series of '00dog', '00foo', '00bar'


print(series_str_zfill())
$ python ./series/str/series_str_zfill.py
0    00dog
1    00foo
2    00bar
dtype: object
Check if all the characters in the text are alphanumeric
import pandas as pd
from numba import njit


@njit
def series_str_isalnum():
    series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
    out_series = series.str.isalnum()

    return out_series  # Expect series of True, False, True, False


print(series_str_isalnum())
$ python ./series/str/series_str_isalnum.py
0     True
1    False
2     True
3    False
dtype: bool
Check whether all characters in each string are alphabetic
import pandas as pd
from numba import njit


@njit
def series_str_isalpha():
    series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
    out_series = series.str.isalpha()

    return out_series  # Expect series of True, False, True, False


print(series_str_isalpha())
$ python ./series/str/series_str_isalpha.py
0     True
1    False
2     True
3    False
dtype: bool
Check whether all characters in each string in the Series are digits.
import pandas as pd
from numba import njit


@njit
def series_str_isdigit():
    series = pd.Series(['23', '³', '⅕', ''])
    out_series = series.str.isdigit()

    return out_series  # Expect series of True, True, False, False


print(series_str_isdigit())
$ python ./series/str/series_str_isdigit.py
0     True
1     True
2    False
3    False
dtype: bool
Check if all the characters in the text are whitespaces
import pandas as pd
from numba import njit


@njit
def series_str_isspace():
    series = pd.Series([' ', ' c ', '  b ', '     a     '])
    out_series = series.str.isspace()

    return out_series  # Expect series of True, False, False, False


print(series_str_isspace())
$ python ./series/str/series_str_isspace.py
0     True
1    False
2    False
3    False
dtype: bool
Check if all the characters in the text are alphanumeric
import pandas as pd
from numba import njit


@njit
def series_str_islower():
    series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
    out_series = series.str.islower()

    return out_series  # Expect series of True, False, False, False


print(series_str_islower())
$ python ./series/str/series_str_islower.py
0     True
1    False
2    False
3    False
dtype: bool
Check whether all characters in each string are uppercase.
import pandas as pd
from numba import njit


@njit
def series_str_isupper():
    series = pd.Series(['FOO', 'BAr', 'FooBar'])  # Series of 'FOO', 'BAr', 'FooBar'
    out_series = series.str.isupper()

    return out_series  # Expect series of True, False, False


print(series_str_isupper())
$ python ./series/str/series_str_isupper.py
0     True
1    False
2    False
dtype: bool
Check if each word start with an upper case letter
import pandas as pd
from numba import njit


@njit
def series_str_istitle():
    series = pd.Series(['Cat', 'dog', 'Bird'])
    out_series = series.str.istitle()

    return out_series  # Expect series of True, False, True


print(series_str_istitle())
$ python ./series/str/series_str_istitle.py
0     True
1    False
2     True
dtype: bool
Check whether all characters in each string are numeric.
import pandas as pd
from numba import njit


@njit
def series_str_isnumeric():
    series = pd.Series(['one', 'one1', '1', ''])
    out_series = series.str.isnumeric()

    return out_series  # Expect series of False, False, True, False


print(series_str_isnumeric())
$ python ./series/str/series_str_isnumeric.py
0    False
1    False
2     True
3    False
dtype: bool
Check whether all characters in each string are decimal.
import pandas as pd
from numba import njit


@njit
def series_str_isdecimal():
    series = pd.Series(['23', '³', '⅕', ''])
    out_series = series.str.isdecimal()

    return out_series  # Expect series of True, False, False, False


print(series_str_isdecimal())
$ python ./series/str/series_str_isdecimal.py
0     True
1    False
2    False
3    False
dtype: bool
The index (row labels) of the DataFrame.
import pandas as pd
from numba import njit


@njit
def dataframe_index():
    df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['a', 'b'])
    result = df.index

    return result  # Numpy array of index values ['a', 'b']


print(dataframe_index())
$ python ./dataframe/dataframe_index.py
['a' 'b']
The values data of the DataFrame.
import pandas as pd
from numba import njit


@njit
def dataframe_values():
    df = pd.DataFrame({'age': [3,  29], 'height': [94, 170], 'weight': [31, 115]})
    result = df.values

    return result  # Numpy array of dataframe values: array([[3, 94, 31], [29, 170, 115]], dtype=int64)


print(dataframe_values())
$ python ./dataframe/dataframe_values.py
[[  3  94  31]
 [ 29 170 115]]
Getting Pandas DataFrame column through getting attribute.
import pandas as pd
from numba import njit


@njit
def dataframe_getitem():
    df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                       'B': [1, 2, 3, 4, 5],
                       'C': [2, 3, 4, 5, 6]})

    return df.C


print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_attr.py
0    2
1    3
2    4
3    5
4    6
Name: C, dtype: int64
Getting Pandas DataFrame column where key is a string.
import pandas as pd
from numba import njit


@njit
def dataframe_getitem():
    df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                       'B': [1, 2, 3, 4, 5],
                       'C': [2, 3, 4, 5, 6]})

    return df['A']


print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem.py
0    0
1    1
2    2
3    3
4    4
Name: A, dtype: int64
Getting slice of Pandas DataFrame.
import pandas as pd
from numba import njit


@njit
def dataframe_getitem():
    df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                       'B': [1, 2, 3, 4, 5],
                       'C': [2, 3, 4, 5, 6]})

    return df[1:3]


print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_slice.py
   A  B  C
1  1  2  3
2  2  3  4
Getting Pandas DataFrame elements where key is a tuple of strings.
import pandas as pd
from numba import njit


@njit
def dataframe_getitem():
    df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                       'B': [1, 2, 3, 4, 5],
                       'C': [2, 3, 4, 5, 6]})

    return df[('A', 'C')]


print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_tuple.py
   A  C
0  0  2
1  1  3
2  2  4
3  3  5
4  4  6
Getting Pandas DataFrame elements where key is an array of booleans.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_getitem():
    df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                       'B': [1, 2, 3, 4, 5],
                       'C': [2, 3, 4, 5, 6]})
    arr = np.array([False, True, False, False, True])

    return df[arr]


print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_array.py
   A  B  C
1  1  2  3
4  4  5  6
Getting Pandas DataFrame elements where key is series of booleans.
import pandas as pd
from numba import njit


@njit
def dataframe_getitem():
    df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                       'B': [1, 2, 3, 4, 5],
                       'C': [2, 3, 4, 5, 6]})
    val = pd.Series([True, False, True, False, False])

    return df[val]


print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_series.py
   A  B  C
0  0  1  2
2  2  3  4
Make a copy of this object’s indices and data.
import pandas as pd
from numba import njit


@njit
def dataframe_copy():
    df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7]})
    new_df = df.copy(deep=True)
    return new_df


print(dataframe_copy())
$ python ./dataframe/dataframe_copy.py
     A  B
0  1.0  4
1  2.0  5
2  3.0  6
3  1.0  7
Detect missing values.

import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_isna():
    df = pd.DataFrame({'A': [1.0, np.nan, 3.0, 1.0], 'B': [4, 5, 6, 7], 'C': [None, 'b', 'c', 'd']})

    return df.isna()


print(dataframe_isna())
$ python ./dataframe/dataframe_isna.py
       A      B      C
0  False  False   True
1   True  False  False
2  False  False  False
3  False  False  False
Return the first n rows.

import pandas as pd
from numba import njit


@njit
def dataframe_head():
    df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
                                  'monkey', 'parrot', 'shark', 'whale', 'zebra']})

    return df.head(n=6)


print(dataframe_head())
$ python ./dataframe/dataframe_head.py
      animal
0  alligator
1        bee
2     falcon
3       lion
4     monkey
5     parrot
Get value at specified index position.
import pandas as pd
from numba import njit


@njit
def dataframe_iat():
    df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7], 'C': ['a', 'b', 'c', 'd']})

    return df.iat[1, 2]  # value b


print(dataframe_iat())
$ python ./dataframe/dataframe_iat.py
b
Groupby and calculate the minimum in each group.
import pandas as pd
from numba import njit


@njit
def df_groupby_min():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').min()

    # Expect DataFrame of
    # {'B': [0, 1, 2], 'C': [1, 2, 3} with index=[1, 2, 3]
    return out_df


print(df_groupby_min())
$ python ./dataframe/groupby/dataframe_groupby_min.py
   B  C
1  0  1
2  1  2
3  2  3
Calculate the rolling minimum.
import pandas as pd
from numba import njit


@njit
def df_rolling_min():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).min()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 3.0, 2.0, 2.0], 'B': [NaN, NaN, -5.0, -5.0, -6.0]}
    return out_df


print(df_rolling_min())
$ python ./dataframe/rolling/dataframe_rolling_min.py
     A    B
0  NaN  NaN
1  NaN  NaN
2  3.0 -5.0
3  2.0 -5.0
4  2.0 -6.0
Count non-NA cells for each column or row.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_count():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.count()


print(dataframe_count())
$ python ./dataframe/dataframe_count.py
A    4
B    4
C    3
dtype: int64
Return the maximum of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_max():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.max()


print(dataframe_max())
$ python ./dataframe/dataframe_max.py
A    0.6
B    6.0
C    inf
dtype: float64
Return the mean of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_mean():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.mean()


print(dataframe_mean())
$ python ./dataframe/dataframe_mean.py
A    0.25
B    2.50
C     inf
dtype: float64
Return the median of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_median():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.median()


print(dataframe_median())
$ python ./dataframe/dataframe_median.py
A    0.2
B    2.0
C    1.0
dtype: float64
Return the minimum of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_min():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.min()


print(dataframe_min())
$ python ./dataframe/dataframe_min.py
A    0.0
B    0.0
C   -1.0
dtype: float64
Percentage change between the current and a prior element.
import pandas as pd
from numba import njit


@njit
def dataframe_pct_change():
    df = pd.DataFrame({"A": [14, 4, 5, 4, 1, 55],
                       "B": [5, 2, 54, 3, 2, 32],
                       "C": [20, 20, 7, 21, 8, 5],
                       "D": [14, 3, 6, 2, 6, 4]})
    out_df = df.pct_change()

    return out_df


print(dataframe_pct_change())
$ python ./dataframe/dataframe_pct_change.py
           A          B         C         D
0        NaN        NaN       NaN       NaN
1  -0.714286  -0.600000  0.000000 -0.785714
2   0.250000  26.000000 -0.650000  1.000000
3  -0.200000  -0.944444  2.000000 -0.666667
4  -0.750000  -0.333333 -0.619048  2.000000
5  54.000000  15.000000 -0.375000 -0.333333
Return the product of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_prod():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.prod()


print(dataframe_prod())
$ python ./dataframe/dataframe_prod.py
A    0.0
B    0.0
C   -inf
dtype: float64
Return the sum of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_sum():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.sum()


print(dataframe_sum())
$ python ./dataframe/dataframe_sum.py
A     1.0
B    10.0
C     inf
dtype: float64
Return sample standard deviation over columns.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_std():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.std()


print(dataframe_std())
$ python ./dataframe/dataframe_std.py
A    0.251661
B    2.516611
C         NaN
dtype: float64
Return unbiased variance over requested axis.
import pandas as pd
import numpy as np
from numba import njit


@njit
def dataframe_var():
    df = pd.DataFrame({"A": [.2, .0, .6, .2],
                       "B": [2, 0, 6, 2],
                       "C": [-1, np.nan, 1, np.inf]})

    return df.var()


print(dataframe_var())
$ python ./dataframe/dataframe_var.py
A    0.063333
B    6.333333
C         NaN
dtype: float64
Drop specified columns from DataFrame.
import pandas as pd
from numba import njit


@njit
def dataframe_drop():
    df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7], 'C': ['a', 'b', 'c', 'd']})

    return df.drop(columns='A')


print(dataframe_drop())
$ python ./dataframe/dataframe_drop.py
   B  C
0  4  a
1  5  b
2  6  c
3  7  d
Return the first n rows.

import pandas as pd
from numba import njit


@njit
def dataframe_head():
    df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
                                  'monkey', 'parrot', 'shark', 'whale', 'zebra']})

    return df.head(n=6)


print(dataframe_head())
$ python ./dataframe/dataframe_head.py
      animal
0  alligator
1        bee
2     falcon
3       lion
4     monkey
5     parrot
Appending rows of other to the end of caller, returning a new object. Columns in other that are not in the caller are added as new columns.
import pandas as pd
from numba import njit


@njit
def dataframe_append():
    df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
    df2 = pd.DataFrame({'B': [5, 6], 'C': [7, 8]})
    result = df.append(df2)

    return result


print(dataframe_append())
$ python ./dataframe/dataframe_append.py
     A  B    C
0  1.0  3  NaN
1  2.0  4  NaN
0  NaN  5  7.0
1  NaN  6  8.0
Count of any non-NaN observations inside the window.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_rolling_count():
    series = pd.Series([4, 3, 2, np.nan, 6])  # Series of 4, 3, 2, np.nan, 6
    out_series = series.rolling(3).count()

    return out_series  # Expect series of 1.0, 2.0, 3.0, 2.0, 2.0


print(series_rolling_count())
$ python ./series/rolling/series_rolling_count.py
0    1.0
1    2.0
2    3.0
3    2.0
4    2.0
dtype: float64
Count of any non-NaN observations inside the window.
import numpy as np
import pandas as pd
from numba import njit


@njit
def df_rolling_count():
    df = pd.DataFrame({'A': [4, 3, 2, np.nan, 6], 'B': [4, np.nan, 2, np.nan, 6]})
    out_df = df.rolling(3).count()

    # Expect DataFrame of
    # {'A': [1.0, 2.0, 3.0, 2.0, 2.0], 'B': [1.0, 1.0, 2.0, 1.0, 2.0]}
    return out_df


print(df_rolling_count())
$ python ./dataframe/rolling/dataframe_rolling_count.py
     A    B
0  1.0  1.0
1  2.0  1.0
2  3.0  2.0
3  2.0  1.0
4  2.0  2.0
Calculate rolling sum
import pandas as pd
from numba import njit


@njit
def series_rolling_sum():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).sum()

    return out_series  # Expect series of NaN, NaN, 12.0, 10.0, 13.0


print(series_rolling_sum())
$ python ./series/rolling/series_rolling_sum.py
0     NaN
1     NaN
2    12.0
3    10.0
4    13.0
dtype: float64
Calculate rolling sum
import pandas as pd
from numba import njit


@njit
def df_rolling_sum():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).sum()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 12.0, 10.0, 13.0], 'B': [NaN, NaN, -12.0, -10.0, -13.0]}
    return out_df


print(df_rolling_sum())
$ python ./dataframe/rolling/dataframe_rolling_sum.py
      A     B
0   NaN   NaN
1   NaN   NaN
2  12.0 -12.0
3  10.0 -10.0
4  13.0 -13.0
Calculate the rolling mean of the values.
import pandas as pd
from numba import njit


@njit
def series_rolling_mean():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).mean()

    return out_series  # Expect series of NaN, NaN, 4.000000, 3.333333, 4.333333


print(series_rolling_mean())
$ python ./series/rolling/series_rolling_mean.py
0         NaN
1         NaN
2    4.000000
3    3.333333
4    4.333333
dtype: float64
Calculate the rolling mean of the values.
import pandas as pd
from numba import njit


@njit
def df_rolling_mean():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).mean()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 4.000000, 3.333333, 4.333333],
    #  'B': [NaN, NaN, -4.000000, -3.333333, -4.333333]}
    return out_df


print(df_rolling_mean())
$ python ./dataframe/rolling/dataframe_rolling_mean.py
          A         B
0       NaN       NaN
1       NaN       NaN
2  4.000000 -4.000000
3  3.333333 -3.333333
4  4.333333 -4.333333
Calculate the rolling median.
import pandas as pd
from numba import njit


@njit
def series_rolling_median():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).median()

    return out_series  # Expect series of NaN, NaN, 4.0, 3.0, 5.0


print(series_rolling_median())
$ python ./series/rolling/series_rolling_median.py
0    NaN
1    NaN
2    4.0
3    3.0
4    5.0
dtype: float64
Calculate the rolling median.
import pandas as pd
from numba import njit


@njit
def df_rolling_median():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).median()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 4.0, 3.0, 5.0], 'B': [NaN, NaN, -4.0, -3.0, -5.0]}
    return out_df


print(df_rolling_median())
$ python ./dataframe/rolling/dataframe_rolling_median.py
     A    B
0  NaN  NaN
1  NaN  NaN
2  4.0 -4.0
3  3.0 -3.0
4  5.0 -5.0
Calculate unbiased rolling variance.
import pandas as pd
from numba import njit


@njit
def series_rolling_var():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).var()

    return out_series  # Expect series of NaN, NaN, 1.000000, 2.333333, 4.333333


print(series_rolling_var())
$ python ./series/rolling/series_rolling_var.py
0         NaN
1         NaN
2    1.000000
3    2.333333
4    4.333333
dtype: float64
Calculate unbiased rolling variance.
import pandas as pd
from numba import njit


@njit
def df_rolling_var():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).var()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 1.000000, 2.333333, 4.333333],
    #  'B': [NaN, NaN, 1.000000, 2.333333, 4.333333]}
    return out_df


print(df_rolling_var())
$ python ./dataframe/rolling/dataframe_rolling_var.py
          A         B
0       NaN       NaN
1       NaN       NaN
2  1.000000  1.000000
3  2.333333  2.333333
4  4.333333  4.333333
Calculate rolling standard deviation.
import pandas as pd
from numba import njit


@njit
def series_rolling_std():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).std()

    return out_series  # Expect series of NaN, NaN, 1.000000, 1.527525, 2.081666


print(series_rolling_std())
$ python ./series/rolling/series_rolling_std.py
0         NaN
1         NaN
2    1.000000
3    1.527525
4    2.081666
dtype: float64
Calculate rolling standard deviation.
import pandas as pd
from numba import njit


@njit
def df_rolling_std():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).std()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 1.000000, 1.527525, 2.081666],
    #  'B': [NaN, NaN, 1.000000, 1.527525, 2.081666]}
    return out_df


print(df_rolling_std())
$ python ./dataframe/rolling/dataframe_rolling_std.py
          A         B
0       NaN       NaN
1       NaN       NaN
2  1.000000  1.000000
3  1.527525  1.527525
4  2.081666  2.081666
Calculate the rolling minimum.
import pandas as pd
from numba import njit


@njit
def series_rolling_min():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).min()

    return out_series  # Expect series of NaN, NaN, 3.0, 2.0, 2.0


print(series_rolling_min())
$ python ./series/rolling/series_rolling_min.py
0    NaN
1    NaN
2    3.0
3    2.0
4    2.0
dtype: float64
Calculate the rolling minimum.
import pandas as pd
from numba import njit


@njit
def df_rolling_min():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).min()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 3.0, 2.0, 2.0], 'B': [NaN, NaN, -5.0, -5.0, -6.0]}
    return out_df


print(df_rolling_min())
$ python ./dataframe/rolling/dataframe_rolling_min.py
     A    B
0  NaN  NaN
1  NaN  NaN
2  3.0 -5.0
3  2.0 -5.0
4  2.0 -6.0
Calculate the rolling maximum.
import pandas as pd
from numba import njit


@njit
def series_rolling_max():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).max()

    return out_series  # Expect series of NaN, NaN, 5.0, 5.0, 6.0


print(series_rolling_max())
$ python ./series/rolling/series_rolling_max.py
0    NaN
1    NaN
2    5.0
3    5.0
4    6.0
dtype: float64
Calculate the rolling maximum.
import pandas as pd
from numba import njit


@njit
def df_rolling_max():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).max()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 5.0, 5.0, 6.0], 'B': [NaN, NaN, -3.0, -2.0, -2.0]}
    return out_df


print(df_rolling_max())
$ python ./dataframe/rolling/dataframe_rolling_max.py
     A    B
0  NaN  NaN
1  NaN  NaN
2  5.0 -3.0
3  5.0 -2.0
4  6.0 -2.0
Calculate rolling correlation.
import pandas as pd
from numba import njit


@njit
def series_rolling_corr():
    series = pd.Series([3, 3, 3, 5, 8])  # Series of 3, 3, 3, 5, 8
    other = pd.Series([3, 4, 4, 4, 8])  # Series of 3, 4, 4, 4, 8
    out_series = series.rolling(4).corr(other)

    return out_series  # Expect series of NaN, NaN, NaN, 0.333333, 0.916949


print(series_rolling_corr())
$ python ./series/rolling/series_rolling_corr.py
0         NaN
1         NaN
2         NaN
3    0.333333
4    0.916949
dtype: float64
Calculate rolling correlation.
import pandas as pd
from numba import njit


@njit
def df_rolling_corr():
    df = pd.DataFrame({'A': [3, 3, 3, 5, 8], 'B': [-3, -3, -3, -5, -8]})
    other = pd.DataFrame({'A': [3, 4, 4, 4, 8], 'B': [-3, -4, -4, -4, -8]})
    out_df = df.rolling(4).corr(other)

    # Expect DataFrame of
    # {'A': [NaN, NaN, NaN, 0.333333, 0.916949],
    #  'B': [NaN, NaN, NaN, 0.333333, 0.916949]}
    return out_df


print(df_rolling_corr())
$ python ./dataframe/rolling/dataframe_rolling_corr.py
          A         B
0       NaN       NaN
1       NaN       NaN
2       NaN       NaN
3  0.333333  0.333333
4  0.916949  0.916949
Calculate rolling covariance.
import pandas as pd
from numba import njit


@njit
def series_rolling_cov():
    series = pd.Series([3, 3, 3, 5, 8])  # Series of 3, 3, 3, 5, 8
    other = pd.Series([3, 4, 4, 4, 8])  # Series of 3, 4, 4, 4, 8
    out_series = series.rolling(4).cov(other)

    return out_series  # Expect series of NaN, NaN, NaN, 0.166667, 4.333333


print(series_rolling_cov())
$ python ./series/rolling/series_rolling_cov.py
0         NaN
1         NaN
2         NaN
3    0.166667
4    4.333333
dtype: float64
Calculate rolling covariance.
import pandas as pd
from numba import njit


@njit
def df_rolling_cov():
    df = pd.DataFrame({'A': [3, 3, 3, 5, 8], 'B': [-3, -3, -3, -5, -8]})
    other = pd.DataFrame({'A': [3, 4, 4, 4, 8], 'B': [-3, -4, -4, -4, -8]})
    out_df = df.rolling(4).cov(other)

    # Expect DataFrame of
    # {'A': [NaN, NaN, NaN, 0.166667, 4.333333],
    #  'B': [NaN, NaN, NaN, 0.166667, 4.333333]}
    return out_df


print(df_rolling_cov())
$ python ./dataframe/rolling/dataframe_rolling_cov.py
          A         B
0       NaN       NaN
1       NaN       NaN
2       NaN       NaN
3  0.166667  0.166667
4  4.333333  4.333333
Unbiased rolling skewness.
import pandas as pd
from numba import njit


@njit
def series_rolling_skew():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).skew()

    return out_series  # Expect series of NaN, NaN, 0.000000, 0.935220, -1.293343


print(series_rolling_skew())
$ python ./series/rolling/series_rolling_skew.py
0         NaN
1         NaN
2    0.000000
3    0.935220
4   -1.293343
dtype: float64
Unbiased rolling skewness.
import pandas as pd
from numba import njit


@njit
def df_rolling_skew():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).skew()

    # Expect DataFrame of
    # {'A': [NaN, NaN, 0.000000, 0.935220, -1.293343],
    #  'B': [NaN, NaN, 0.000000, -0.935220, 1.293343]}
    return out_df


print(df_rolling_skew())
$ python ./dataframe/rolling/dataframe_rolling_skew.py
          A         B
0       NaN       NaN
1       NaN       NaN
2  0.000000  0.000000
3  0.935220 -0.935220
4 -1.293343  1.293343
Calculate unbiased rolling kurtosis.
import pandas as pd
from numba import njit


@njit
def series_rolling_kurt():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(4).kurt()

    return out_series  # Expect series of NaN, NaN, NaN, -1.2, -3.3


print(series_rolling_kurt())
$ python ./series/rolling/series_rolling_kurt.py
0    NaN
1    NaN
2    NaN
3   -1.2
4   -3.3
dtype: float64
Calculate unbiased rolling kurtosis.
import pandas as pd
from numba import njit


@njit
def df_rolling_kurt():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(4).kurt()

    # Expect DataFrame of
    # {'A': [NaN, NaN, NaN, -1.2, -3.3], 'B': [NaN, NaN, NaN, -1.2, -3.3]}
    return out_df


print(df_rolling_kurt())
$ python ./dataframe/rolling/dataframe_rolling_kurt.py
     A    B
0  NaN  NaN
1  NaN  NaN
2  NaN  NaN
3 -1.2 -1.2
4 -3.3 -3.3
Calculate the rolling apply.
import numpy as np
import pandas as pd
from numba import njit


@njit
def series_rolling_apply():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6

    def get_median(x):
        return np.median(x)

    out_series = series.rolling(3).apply(get_median)

    return out_series  # Expect series of NaN, NaN, 4.0, 3.0, 5.0


print(series_rolling_apply())
$ python ./series/rolling/series_rolling_apply.py
0    NaN
1    NaN
2    4.0
3    3.0
4    5.0
dtype: float64
Calculate the rolling apply.
import numpy as np
import pandas as pd
from numba import njit


@njit
def df_rolling_apply():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})

    def get_median(x):
        return np.median(x)

    out_df = df.rolling(3).apply(get_median)

    # Expect DataFrame of
    # {'A': [NaN, NaN, 4.0, 3.0, 5.0], 'B': [NaN, NaN, -4.0, -3.0, -5.0]}
    return out_df


print(df_rolling_apply())
$ python ./dataframe/rolling/dataframe_rolling_apply.py
     A    B
0  NaN  NaN
1  NaN  NaN
2  4.0 -4.0
3  3.0 -3.0
4  5.0 -5.0
Calculate the rolling quantile.
import pandas as pd
from numba import njit


@njit
def series_rolling_quantile():
    series = pd.Series([4, 3, 5, 2, 6])  # Series of 4, 3, 5, 2, 6
    out_series = series.rolling(3).quantile(0.25)

    return out_series  # Expect series of NaN, NaN, 3.5, 2.5, 3.5


print(series_rolling_quantile())
$ python ./series/rolling/series_rolling_quantile.py
0    NaN
1    NaN
2    3.5
3    2.5
4    3.5
dtype: float64
Calculate the rolling quantile.
import pandas as pd
from numba import njit


@njit
def df_rolling_quantile():
    df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
    out_df = df.rolling(3).quantile(0.25)

    # Expect DataFrame of
    # {'A': [NaN, NaN, 3.5, 2.5, 3.5], 'B': [NaN, NaN, -4.5, -4.0, -5.5]}
    return out_df


print(df_rolling_quantile())
$ python ./dataframe/rolling/dataframe_rolling_quantile.py
     A    B
0  NaN  NaN
1  NaN  NaN
2  3.5 -4.5
3  2.5 -4.0
4  3.5 -5.5
Compute count of group, excluding missing values.
import pandas as pd
import numpy as np
from numba import njit


@njit
def df_groupby_count():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, np.nan, np.nan, 2, 4, 3, 2, np.inf],
                       'C': [np.nan, 2, 3, np.nan, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').count()

    # Expect DataFrame of
    # {'B': [1, 3, 3], 'C': [0, 3, 4} with index=[1, 2, 3]
    return out_df


print(df_groupby_count())
$ python ./dataframe/groupby/dataframe_groupby_count.py
   B  C
1  1  0
2  3  3
3  3  4
Compute max of group values.
import pandas as pd
from numba import njit


@njit
def df_groupby_max():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').max()

    # Expect DataFrame of
    # {'B': [0, 3, 5], 'C': [4, 9, 8} with index=[1, 2, 3]
    return out_df


print(df_groupby_max())
$ python ./dataframe/groupby/dataframe_groupby_max.py
   B  C
1  0  4
2  3  9
3  5  8
Compute mean of groups, excluding missing values.
import pandas as pd
from numba import njit


@njit
def df_groupby_mean():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').mean()

    # Expect DataFrame of
    # {'B': [0.0, 2.0, 3.5], 'C': [2.500000, 5.333333, 6.000000} with index=[1, 2, 3]
    return out_df


print(df_groupby_mean())
$ python ./dataframe/groupby/dataframe_groupby_mean.py
     B         C
1  0.0  2.500000
2  2.0  5.333333
3  3.5  6.000000
Compute median of groups, excluding missing values.
import pandas as pd
from numba import njit


@njit
def df_groupby_median():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 3, 4, 3, 2, 4],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').median()

    # Expect DataFrame of
    # {'B': [0.0, 3.0, 3.5], 'C': [2.5, 5.0, 6.5} with index=[1, 2, 3]
    return out_df


print(df_groupby_median())
$ python ./dataframe/groupby/dataframe_groupby_median.py
     B    C
1  0.0  2.5
2  3.0  5.0
3  3.5  6.5
Compute min of group values.
import pandas as pd
from numba import njit


@njit
def df_groupby_min():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').min()

    # Expect DataFrame of
    # {'B': [0, 1, 2], 'C': [1, 2, 3} with index=[1, 2, 3]
    return out_df


print(df_groupby_min())
$ python ./dataframe/groupby/dataframe_groupby_min.py
   B  C
1  0  1
2  1  2
3  2  3
Compute prod of group values.
import pandas as pd
from numba import njit


@njit
def df_groupby_prod():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').prod()

    # Expect DataFrame of
    # {'B': [0, 6, 120], 'C': [4, 90, 1008} with index=[1, 2, 3]
    return out_df


print(df_groupby_prod())
$ python ./dataframe/groupby/dataframe_groupby_prod.py
     B     C
1    0     4
2    6    90
3  120  1008
Compute standard deviation of groups, excluding missing values.
import pandas as pd
from numba import njit


@njit
def df_groupby_std():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').std()

    # Expect DataFrame of
    # {'B': [0.000000, 1.000000, 1.290994], 'C': [2.121320, 3.511885, 2.160247} with index=[1, 2, 3]
    return out_df


print(df_groupby_std())
$ python ./dataframe/groupby/dataframe_groupby_std.py
          B         C
1  0.000000  2.121320
2  1.000000  3.511885
3  1.290994  2.160247
Compute sum of groups, excluding missing values.
import pandas as pd
from numba import njit


@njit
def df_groupby_sum():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').sum()

    # Expect DataFrame of
    # {'B': [0, 6, 14], 'C': [5, 16, 24} with index=[1, 2, 3]
    return out_df


print(df_groupby_sum())
$ python ./dataframe/groupby/dataframe_groupby_sum.py
      B     C
1   0.0   5.0
2   6.0  16.0
3  14.0  24.0
Compute variance of groups, excluding missing values.
import pandas as pd
from numba import njit


@njit
def df_groupby_var():
    df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
                       'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
                       'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
    out_df = df.groupby('A').var()

    # Expect DataFrame of
    # {'B': [0.000000, 1.000000, 1.666667], 'C': [4.500000, 12.333333, 4.666667} with index=[1, 2, 3]
    return out_df


print(df_groupby_var())
$ python ./dataframe/groupby/dataframe_groupby_var.py
          B          C
1  0.000000   4.500000
2  1.000000  12.333333
3  1.666667   4.666667