List of examples
Basic workflow.
import pandas as pd
from numba import njit
# Dataset for analysis
FNAME = "employees.csv"
# This function gets compiled by Numba*
@njit
def get_analyzed_data():
df = pd.read_csv(FNAME)
s_bonus = pd.Series(df['Bonus %'])
s_first_name = pd.Series(df['First Name'])
m = s_bonus.mean()
names = s_first_name.sort_values()
return m, names
# Printing names and their average bonus percent
mean_bonus, sorted_first_names = get_analyzed_data()
print(sorted_first_names)
print('Average Bonus %:', mean_bonus)
$ python ./basic_workflow.py
7 ALEXANDER
4 CHRISTOPHER
0 EMILY
2 ISAAC
8 JOSEPH
9 JOSEPH
5 MIA
1 NOAH
6 OLIVIA
3 NaN
dtype: object
Average Bonus %: 11.204399999999998
Expanded basic workflow.
import pandas as pd
from numba import njit
import numpy as np
# Datasets for analysis
file_names = [
"employees_batch1.csv",
"employees_batch2.csv",
]
# This function gets compiled by Numba*
# For scalability use @njit(parallel=True)
@njit
def get_analyzed_data(file_name):
df = pd.read_csv(file_name,
dtype={'Bonus %': np.float64, 'First Name': str},
usecols=['Bonus %', 'First Name'])
s_bonus = pd.Series(df['Bonus %'])
s_first_name = pd.Series(df['First Name'])
m = s_bonus.mean()
names = s_first_name.sort_values()
return m, names
# Printing names and their average bonus percent
for file_name in file_names:
mean_bonus, sorted_first_names = get_analyzed_data(file_name)
print(file_name)
print(sorted_first_names)
print('Average Bonus %:', mean_bonus)
$ python ./basic_workflow_batch.py
employees_batch1.csv
4 CHRISTOPHER
0 EMILY
2 ISAAC
5 MIA
1 NOAH
3 NaN
dtype: object
Average Bonus %: 8.984
employees_batch2.csv
1 ALEXANDER
2 JOSEPH
3 JOSEPH
0 OLIVIA
dtype: object
Average Bonus %: 14.535
Basic workflow in parallel.
import pandas as pd
from numba import njit, prange
# Dataset for analysis
FNAME = "employees.csv"
# This function gets compiled by Numba* and multi-threaded
@njit(parallel=True)
def get_analyzed_data():
df = pd.read_csv(FNAME)
s_bonus = pd.Series(df['Bonus %'])
s_first_name = pd.Series(df['First Name'])
# Use explicit loop to compute the mean. It will be compiled as parallel loop
m = 0.0
for i in prange(s_bonus.size):
m += s_bonus.values[i]
m /= s_bonus.size
names = s_first_name.sort_values()
return m, names
# Printing names and their average bonus percent
mean_bonus, sorted_first_names = get_analyzed_data()
print(sorted_first_names)
print('Average Bonus %:', mean_bonus)
$ python ./basic_workflow_parallel.py
7 ALEXANDER
4 CHRISTOPHER
0 EMILY
2 ISAAC
8 JOSEPH
9 JOSEPH
5 MIA
1 NOAH
6 OLIVIA
3 NaN
dtype: object
Average Bonus %: 11.204399999999998
The index (axis labels) of the Series.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_index():
series = pd.Series(np.arange(5), index=['one', 'two', 'three', 'four', 'five'])
return series.index # Expect array of 'one' 'two' 'three' 'four' 'five'
print(series_index())
$ python ./series/series_index.py
['one' 'two' 'three' 'four' 'five']
Return Series as ndarray or ndarray-like depending on the dtype.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_values():
series = pd.Series(np.arange(5))
return series.values # Expect array of 0, 1, 2, 3, 4
print(series_values())
$ python ./series/series_values.py
[0 1 2 3 4]
Return a tuple of the shape of the underlying data.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_shape():
series = pd.Series(np.arange(10))
return series.shape # Expect (10,)
print(series_shape())
$ python ./series/series_shape.py
(10,)
Number of dimensions of the underlying data, by definition 1.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_ndim():
series = pd.Series(np.arange(10))
return series.ndim # Expect value: 1
print(series_ndim())
$ python ./series/series_ndim.py
1
Return the number of elements in the underlying data.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_size():
series = pd.Series(np.arange(10))
return series.size # Expect value: 10
print(series_size())
$ python ./series/series_size.py
10
Return the transpose, which is by definition self.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_T():
series = pd.Series(np.arange(5))
return series.T # Expect array of 0, 1, 2, 3, 4
print(series_T())
$ python ./series/series_T.py
[0 1 2 3 4]
Getting Pandas Series elements. Returns single value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_getitem_scalar():
series = pd.Series(np.arange(10, 0, -1)) # Series of 10, 9, ..., 1
return series[0] # Accessing series by scalar index
print(series_getitem_scalar())
$ python ./series/series_getitem/series_getitem_scalar_single_result.py
0 10
dtype: int64
Getting Pandas Series elements. Returns multiple value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_getitem_scalar_many_idx():
series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 0, 6, 0])
return series[0]
print(series_getitem_scalar_many_idx())
$ python ./series/series_getitem/series_getitem_scalar_multiple_result.py
0 5
0 3
0 1
dtype: int64
Getting Pandas Series elements by slice.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_getitem_slice():
series = pd.Series(np.arange(10, 0, -1)) # Series of 10, 9, ..., 1
return series[3:7] # Accessing series by slice index
print(series_getitem_slice())
$ python ./series/series_getitem/series_getitem_slice.py
3 7
4 6
5 5
6 4
dtype: int64
Getting Pandas Series elements by array of booleans.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_getitem_array():
series = pd.Series(np.arange(10, 0, -1)) # Series of 10, 9, ..., 1
array = np.array([True, False, True, True, False] * 2)
return series[array] # Accessing series by array
print(series_getitem_array())
$ python ./series/series_getitem/series_getitem_bool_array.py
0 10
2 8
3 7
5 5
7 3
8 2
dtype: int64
Getting Pandas Series elements by another Series.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_getitem_series():
series = pd.Series(np.arange(10, 0, -1)) # Series of 10, 9, ..., 1
indices = pd.Series(np.asarray([1, 6, 7, 8, 9]))
return series[indices] # Accessing series by series
print(series_getitem_series())
$ python ./series/series_getitem/series_getitem_series.py
1 9
6 4
7 3
8 2
9 1
dtype: int64
Setting Pandas Series elements
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_setitem():
value = 0
series = pd.Series(np.arange(5, 0, -1)) # Series of 5, 4, 3, 2, 1
series[0] = value
return series # result Series of 0, 4, 3, 2, 1
print(series_setitem())
$ python ./series/series_setitem_int.py
0 0
1 4
2 3
3 2
4 1
dtype: int64
Setting Pandas Series elements by slice
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_setitem():
value = 0
series = pd.Series(np.arange(5, 0, -1)) # Series of 5, 4, 3, 2, 1
series[2:5] = value
return series # result Series of 5, 4, 0, 0, 0
print(series_setitem())
$ python ./series/series_setitem_slice.py
0 5
1 4
2 0
3 0
4 0
dtype: int64
Setting Pandas Series elements by series
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_setitem():
value = 0
series = pd.Series(np.arange(5, 0, -1)) # Series of 5, 4, 3, 2, 1
indices = pd.Series(np.asarray([1, 3]))
series[indices] = value
return series # result Series of 5, 0, 3, 0, 1
print(series_setitem())
$ python ./series/series_setitem_series.py
0 5
1 0
2 3
3 0
4 1
dtype: int64
Cast a pandas object to a specified dtype dtype.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_astype():
series = pd.Series([3, -10, np.nan, 0, 92])
return series.astype(str)
print(series_astype())
$ python ./series/series_astype.py
0 3.000000
1 -10.000000
2 nan
3 0.000000
4 92.000000
dtype: object
Make a copy of this object’s indices and data.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_copy():
s1 = pd.Series(np.arange(5))
s2 = s1.copy()
return s2 # Expect new series of 0, 1, 2, 3, 4
print(series_copy())
$ python ./series/series_copy.py
0 0
1 1
2 2
3 3
4 4
dtype: int64
With a scalar integer. Returns single value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_at_value():
series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])
return series.at[4] # Expect array: [3]
print(series_at_value())
$ python ./series/series_at/series_at_single_result.py
[3]
With a scalar integer. Returns multiple value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_at_many_idx():
series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 0, 6, 0])
return series.at[0] # Expect array: [5 3 1]
print(series_at_many_idx())
$ python ./series/series_at/series_at_multiple_result.py
[5 3 1]
Get value at specified index position.
import numpy as np
import pandas as pd
from numba import njit
from numba import njit
@njit
def series_iat():
series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])
return series.iat[4] # Expect value: 1
print(series_iat())
$ python ./series/series_iat.py
1
With a scalar integer. Returns single value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_loc_value():
series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])
return series.loc[4]
print(series_loc_value())
$ python ./series/series_loc/series_loc_single_result.py
4 3
dtype: int64
With a scalar integer. Returns multiple value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_loc_many_idx():
series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 0, 6, 0])
return series.loc[0]
print(series_loc_many_idx())
$ python ./series/series_loc/series_loc_multiple_result.py
0 5
0 3
0 1
dtype: int64
With a slice object. Returns multiple value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_loc_slice():
series = pd.Series([5, 4, 3, 2, 1])
return series.loc[1:3]
print(series_loc_slice())
$ python ./series/series_loc/series_loc_slice.py
1 4
2 3
3 2
dtype: int64
With a scalar integer.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_iloc_value():
series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])
return series.iloc[4] # Expect value: 1
print(series_iloc_value())
$ python ./series/series_iloc/series_iloc_value.py
1
With a slice object.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_iloc_slice():
series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8])
return series.iloc[2:4]
print(series_iloc_slice())
$ python ./series/series_iloc/series_iloc_slice.py
4 3
6 2
dtype: int64
Getting the addition of Series and other
import pandas as pd
from numba import njit
@njit
def series_add():
s1 = pd.Series([1, 2, 3])
s2 = pd.Series([4, 5, 6])
out_series = s1.add(s2)
return out_series # Expect series of 5, 7, 9
print(series_add())
$ python ./series/series_add.py
0 5
1 7
2 9
dtype: int64
Return Subtraction of series and other, element-wise (binary operator sub).
import pandas as pd
from numba import njit
@njit
def series_sub():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.sub(s2) # Expect series of 5, 2, 0, -4, -7
print(series_sub())
$ python ./series/series_sub.py
0 5
1 2
2 0
3 -4
4 -7
dtype: int64
Element-wise multiplication of two Series
import pandas as pd
from numba import njit
@njit
def series_mul():
s1 = pd.Series([1, 3, 100])
s2 = pd.Series([0, 1, 2])
out_series = s1.mul(s2)
return out_series # Expect series of 0, 3, 200
print(series_mul())
$ python ./series/series_mul.py
0 0
1 3
2 200
dtype: int64
Element-wise division of one Series by another (binary operator div)
import pandas as pd
from numba import njit
@njit
def series_div():
s1 = pd.Series([1, 2, 4])
s2 = pd.Series([4, 4, 16])
out_series = s1.div(s2)
return out_series # Expect series of 0.25, 0.50, 0.25
print(series_div())
$ python ./series/series_div.py
0 0.25
1 0.50
2 0.25
dtype: float64
Element-wise division of one Series by another (binary operator truediv)
import pandas as pd
from numba import njit
@njit
def series_truediv():
s1 = pd.Series([1, 2, 4])
s2 = pd.Series([4, 4, 16])
out_series = s1.truediv(s2)
return out_series # Expect series of 0.25, 0.50, 0.25
print(series_truediv())
$ python ./series/series_truediv.py
0 0.25
1 0.50
2 0.25
dtype: float64
Return Integer division of series and other, element-wise (binary operator floordiv).
import pandas as pd
from numba import njit
@njit
def series_floordiv():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.floordiv(s2) # Expect series of 0, 2, 1, 0, 0
print(series_floordiv())
$ python ./series/series_floordiv.py
0 0
1 2
2 1
3 0
4 0
dtype: int64
Return Modulo of series and other, element-wise (binary operator mod).
import pandas as pd
from numba import njit
@njit
def series_mod():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.mod(s2) # Expect series of 0, 0, 0, 2, 1
print(series_mod())
$ python ./series/series_mod.py
0 0
1 0
2 0
3 2
4 1
dtype: int64
Element-wise power of one Series by another (binary operator pow)
import pandas as pd
from numba import njit
@njit
def series_pow():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.pow(s2) # Expect series of 1, 16, 27, 64, 1
print(series_pow())
$ python ./series/series_pow.py
0 1
1 16
2 27
3 64
4 1
dtype: int64
Element-wise less than of one Series by another (binary operator lt)
import pandas as pd
from numba import njit
@njit
def series_lt():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.lt(s2) # Expect series of False, False, False, True, True
print(series_lt())
$ python ./series/series_lt.py
0 False
1 False
2 False
3 True
4 True
dtype: bool
Element-wise greater than of one Series by another (binary operator gt)
import pandas as pd
from numba import njit
@njit
def series_gt():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.gt(s2) # Expect series of True, True, False, False, False
print(series_gt())
$ python ./series/series_gt.py
0 True
1 True
2 False
3 False
4 False
dtype: bool
Element-wise less than or equal of one Series by another (binary operator le)
import pandas as pd
from numba import njit
@njit
def series_le():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.le(s2) # Expect series of False, False, True, True, True
print(series_le())
$ python ./series/series_le.py
0 False
1 False
2 True
3 True
4 True
dtype: bool
Element-wise greater than or equal of one Series by another (binary operator ge)
import pandas as pd
from numba import njit
@njit
def series_ge():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.ge(s2) # Expect series of True, True, True, False, False
print(series_ge())
$ python ./series/series_ge.py
0 True
1 True
2 True
3 False
4 False
dtype: bool
Element-wise not equal of one Series by another (binary operator ne)
import pandas as pd
from numba import njit
@njit
def series_ne():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.ne(s2) # Expect series of True, True, False, True, True
print(series_ne())
$ python ./series/series_ne.py
0 True
1 True
2 False
3 True
4 True
dtype: bool
Element-wise equal of one Series by another (binary operator eq)
import pandas as pd
from numba import njit
@njit
def series_eq():
s1 = pd.Series([5, 4, 3, 2, 1])
s2 = pd.Series([0, 2, 3, 6, 8])
return s1.eq(s2) # Expect series of False, False, True, False, False
print(series_eq())
$ python ./series/series_mod.py
0 0
1 0
2 0
3 2
4 1
dtype: int64
Square the values by defining a function and passing it as an argument to apply().
import pandas as pd
import numpy as np
from numba import njit
@njit
def series_apply():
s = pd.Series([20, 21, 12],
index=['London', 'New York', 'Helsinki'])
def square(x):
return x ** 2
return s.apply(square)
print(series_apply())
$ python ./series/series_apply.py
London 400
New York 441
Helsinki 144
dtype: int64
Square the values by passing an anonymous function as an argument to apply().
import pandas as pd
import numpy as np
from numba import njit
@njit
def series_apply():
s = pd.Series([20, 21, 12],
index=['London', 'New York', 'Helsinki'])
return s.apply(lambda x: x ** 2)
print(series_apply())
$ python ./series/series_apply_lambda.py
London 400
New York 441
Helsinki 144
dtype: int64
Use a function from the Numpy library.
import pandas as pd
import numpy as np
from numba import njit
@njit
def series_apply():
s = pd.Series([20, 21, 12],
index=['London', 'New York', 'Helsinki'])
return s.apply(np.log)
print(series_apply())
$ python ./series/series_apply_log.py
London 2.995732
New York 3.044522
Helsinki 2.484907
dtype: float64
map() accepts a function.
import pandas as pd
from numba import njit
@njit
def series_map():
s = pd.Series([1., 2., 3., 4., 5.])
return s.map(lambda x: x ** 2)
print(series_map())
$ python ./series/series_map.py
0 1.0
1 4.0
2 9.0
3 16.0
4 25.0
dtype: float64
Return the mean of the values grouped by numpy array.
import pandas as pd
import numpy as np
from numba import njit
@njit
def series_groupby():
S = pd.Series([390., 350., 30., 20.])
by = np.asarray([0, 1, 0, 1])
# Expect Series of pd.Series([210.0, 185.0], index=[0, 1])
return S.groupby(by).mean()
print(series_groupby())
$ python ./series/series_groupby.py
0 210.0
1 185.0
dtype: float64
Calculate the rolling minimum.
import pandas as pd
from numba import njit
@njit
def series_rolling_min():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).min()
return out_series # Expect series of NaN, NaN, 3.0, 2.0, 2.0
print(series_rolling_min())
$ python ./series/rolling/series_rolling_min.py
0 NaN
1 NaN
2 3.0
3 2.0
4 2.0
dtype: float64
Getting the absolute value of each element in Series
import pandas as pd
from numba import njit
@njit
def series_abs():
s = pd.Series([-1.10, 2, -3.33])
out_series = s.abs()
return out_series # Expect series of 1.10, 2.00, 3.33
print(series_abs())
$ python ./series/series_abs.py
0 1.10
1 2.00
2 3.33
dtype: float64
Compute correlation with other Series, excluding missing values.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_corr():
s1 = pd.Series([3.2, -10, np.nan, 0.23, 9.2])
s2 = pd.Series([5., 0, 3.3, np.nan, 9.2])
return s1.corr(s2) # Expect value: 0.98673...
print(series_corr())
$ python ./series/series_corr.py
0.9867362434412106
Counting non-NaN values in Series
import pandas as pd
import numpy as np
from numba import njit
@njit
def series_count():
s = pd.Series([1, 2, np.nan])
out_series = s.count()
return out_series # Expect the number of non-Nan values == '2'
print(series_count())
$ python ./series/series_count.py
2
Compute covariance with Series, excluding missing values.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_cov():
s1 = pd.Series([3.2, -10, np.nan, 0.23, 9.2])
s2 = pd.Series([5., 0, 3.3, np.nan, 9.2])
return s1.cov(s2) # Expect value: 44.639...
print(series_cov())
$ python ./series/series_cov.py
44.63999999999999
Returns cumulative sum over Series.
import pandas as pd
from numba import njit
@njit
def series_cumsum():
s = pd.Series([1, 2, 3, 4])
return s.cumsum() # Expect series of 1, 3, 6, 10
print(series_cumsum())
$ python ./series/series_cumsum.py
0 1
1 3
2 6
3 10
dtype: int64
Generate descriptive statistics.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_describe():
s = pd.Series([5., 0, 3.3, 4.4, 9.2])
return s.describe()
print(series_describe())
$ python ./series/series_describe.py
count 5.000000
mean 4.380000
std 3.315419
min 0.000000
25% 3.300000
50% 4.400000
75% 5.000000
max 9.200000
dtype: float64
Getting the maximum value of Series elements
import pandas as pd
from numba import njit
@njit
def series_max():
s = pd.Series([1, 4, 2, 0])
out_series = s.max()
return out_series # Expect maximum value 4
print(series_max())
$ python ./series/series_max.py
4
Return the mean of the values.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_mean():
series = pd.Series([3.2, -10, np.nan, 0.23, 9.2])
return series.mean() # Expect value: 0.6575
print(series_mean())
$ python ./series/series_mean.py
0.6575
$ python ./series/series_median.py
2.5
Getting the minimum value of Series elements
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_min():
series = pd.Series([4, np.nan, 2, 1])
return series.min() # Expect minimum value 1.0
print(series_min())
$ python ./series/series_min.py
1.0
Returns the largest n elements.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_nlargest():
series = pd.Series(np.arange(10))
return series.nlargest(4) # Expect series of 9, 8, 7, 6
print(series_nlargest())
$ python ./series/series_nlargest.py
9 9
8 8
7 7
6 6
dtype: int64
Returns the smallest n elements.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_nsmallest():
series = pd.Series(np.arange(10))
return series.nsmallest(4) # Expect series of 0, 1, 2, 3
print(series_nsmallest())
$ python ./series/series_nsmallest.py
0 0
1 1
2 2
3 3
dtype: int64
Percentage change between the current and a prior element.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_pct_change():
s = pd.Series([5., 0, 3.3, np.nan, 9.2])
return s.pct_change(periods=2, fill_method=None, limit=None, freq=None)
print(series_pct_change())
$ python ./series/series_pct_change.py
0 NaN
1 NaN
2 -0.340000
3 NaN
4 1.787879
dtype: float64
Return the product of the values.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_prod():
series = pd.Series([3.2, -10, np.nan, 0.23, 9.2])
return series.prod() # Expect value: -67.712
print(series_prod())
$ python ./series/series_prod.py
-67.712
Computing quantile for the Series
import pandas as pd
from numba import njit
@njit
def series_quantile():
s = pd.Series([1, 2, 3, 4])
median = .5 # compute median
out_series = s.quantile(median)
return out_series # Expect median value == 2.5
print(series_quantile())
$ python ./series/series_quantile.py
2.5
Returns sample standard deviation over Series.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_std():
series = pd.Series(np.arange(10))
return series.std() # Expect value: 3.0276503540974917
print(series_std())
$ python ./series/series_std.py
3.0276503540974917
Return the sum of the values for the requested axis.
import pandas as pd
from numba import njit
@njit
def series_sum():
series = pd.Series([5, 4, 3, 2, 1])
return series.sum() # Expect value: 15
print(series_sum())
$ python ./series/series_sum.py
15.0
Returns unbiased variance over Series.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_var():
series = pd.Series(np.arange(10))
return series.var() # Expect value: 9.16666...
print(series_var())
$ python ./series/series_var.py
9.166666666666666
Getting unique values in Series
import pandas as pd
from numba import njit
@njit
def series_unique():
s = pd.Series([2, 1, 3, 3])
out_series = s.unique()
return out_series # Expect array of unique values [1, 2, 3]
print(series_unique())
$ python ./series/series_unique.py
[1 2 3]
Return number of unique elements in the object.
import pandas as pd
from numba import njit
@njit
def series_nunique():
series = pd.Series([2, 8, 2, 1])
return series.nunique() # Expect value: 3
print(series_nunique())
$ python ./series/series_nunique.py
3
Getting the number of values excluding NaNs
import pandas as pd
import numpy as np
from numba import njit
@njit
def series_value_counts():
s = pd.Series([3, 1, 2, 3, 4, np.nan])
out_series = s.value_counts()
return out_series
print(series_value_counts())
$ python ./series/series_value_counts.py
3.0 2
4.0 1
2.0 1
1.0 1
dtype: int64
Getting the first n rows.
import pandas as pd
from numba import njit
@njit
def series_head():
s = pd.Series([7, 6, 5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8, 10, 12])
return s.head(3)
print(series_head())
$ python ./series/series_head.py
0 7
2 6
4 5
dtype: int64
Getting the row label of the maximum value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_idxmax():
s = pd.Series([4, np.nan, 2, 1], index=['A', 'B', 'C', 'D'])
return s.idxmax() # Expect index of maximum value A
print(series_idxmax())
$ python ./series/series_idxmax.py
A
Getting the row label of the minimum value.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_idxmin():
s = pd.Series([4, np.nan, 2, 1], index=['A', 'B', 'C', 'D'])
return s.idxmin() # Expect index of minimum value D
print(series_idxmin())
$ python ./series/series_idxmin.py
D
Check whether values are contained in Series.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_isin():
s = pd.Series([4, np.nan, 2, 1])
return s.isin([4, 1]) # Expect series of True, False, False, True
print(series_isin())
$ python ./series/series_isin.py
0 True
1 False
2 False
3 True
dtype: bool
Alter Series index labels or name.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_rename():
s = pd.Series(np.arange(5))
s.rename("new_series")
return s
print(series_rename())
$ python ./series/series_rename.py
0 0
1 1
2 2
3 3
4 4
dtype: int64
Return the elements in the given positional indices along an axis.
import pandas as pd
from numba import njit
@njit
def series_take():
series = pd.Series([5, 4, 3, 2, 1])
return series.take([4, 1]) # Expect series of 4, 1
print(series_take())
$ python ./series/series_take.py
4 1
1 4
dtype: int64
Detect missing values.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_isna():
s = pd.Series([4, np.nan, 2, 1])
return s.isna() # Expect series of False, True, False, False
print(series_isna())
$ python ./series/series_isna.py
0 False
1 True
2 False
3 False
dtype: bool
Detect existing (non-missing) values.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_notna():
series = pd.Series([4, np.nan, 2, 1])
return series.notna() # Expect series of True, False, True, True
print(series_notna())
$ python ./series/series_notna.py
0 True
1 False
2 True
3 True
dtype: bool
Return a new Series with missing values removed.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_dropna():
s = pd.Series([4, np.nan, 2, 1])
return s.dropna()
print(series_dropna())
$ python ./series/series_dropna.py
0 4.0
2 2.0
3 1.0
dtype: float64
Fill NA/NaN values using the specified method.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_fillna():
s = pd.Series([4, np.nan, 2, 1])
return s.fillna(0)
print(series_fillna())
$ python ./series/series_fillna.py
0 4.0
1 0.0
2 2.0
3 1.0
dtype: float64
Override ndarray.argsort.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_argsort():
s = pd.Series([3, -10, np.nan, 0, 92])
return s.argsort() # Expect series of 1, 2, -1, 0, 3
print(series_argsort())
$ python ./series/series_argsort.py
0 1
1 2
2 -1
3 0
4 3
dtype: int64
Sort by the values.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_sort_values():
series = pd.Series([3, -10, np.nan, 0, 92])
return series.sort_values()
print(series_sort_values())
$ python ./series/series_sort_values.py
1 -10.0
3 0.0
0 3.0
4 92.0
2 NaN
dtype: float64
Concatenate two or more Series.
import pandas as pd
from numba import njit
@njit
def series_append():
s1 = pd.Series(['one', 'two', 'three'])
s2 = pd.Series(['four', 'five', 'six'])
return s1.append(s2)
print(series_append())
$ python ./series/series_append.py
0 one
1 two
2 three
0 four
1 five
2 six
dtype: object
Shift index by desired number of periods with an optional time freq.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_shift():
series = pd.Series([3, -10, np.nan, 0, 92])
return series.shift()
print(series_shift())
$ python ./series/series_shift.py
0 NaN
1 3.0
2 -10.0
3 NaN
4 0.0
dtype: float64
Convert strings in the Series to be capitalized.
import pandas as pd
from numba import njit
@njit
def series_str_capitalize():
series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
out_series = series.str.capitalize()
return out_series # Expect series of 'Lower', 'Capitals', 'This is a sentence', 'Swapcase'
print(series_str_capitalize())
$ python ./series/str/series_str_capitalize.py
0 Lower
1 Capitals
2 This is a sentence
3 Swapcase
dtype: object
Convert strings in the Series to be casefolded.
import pandas as pd
from numba import njit
@njit
def series_str_casefold():
series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
out_series = series.str.casefold()
return out_series # Expect series of 'lower', 'capitals', 'this is a sentence', 'swapcase'
print(series_str_casefold())
$ python ./series/str/series_str_casefold.py
0 lower
1 capitals
2 this is a sentence
3 swapcase
dtype: object
Filling left and right side of strings in the Series with an additional character
import pandas as pd
from numba import njit
@njit
def series_str_center():
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
out_series = series.str.center(5, '*')
return out_series # Expect series of '*dog*', '*foo*', '*bar*'
print(series_str_center())
$ python ./series/str/series_str_center.py
0 *dog*
1 *foo*
2 *bar*
dtype: object
Test if the end of each string element matches a string
import pandas as pd
from numba import njit
@njit
def series_str_endswith():
series = pd.Series(['foo', 'bar', 'foobar']) # Series of 'foo', 'bar', 'foobar'
out_series = series.str.endswith('bar')
return out_series # Expect series of False, True, True
print(series_str_endswith())
$ python ./series/str/series_str_endswith.py
0 False
1 True
2 True
dtype: bool
Return lowest indexes in each strings in the Series
import pandas as pd
from numba import njit
@njit
def series_str_find():
series = pd.Series(['foo', 'bar', 'foobar']) # Series of 'foo', 'bar', 'foobar'
out_series = series.str.find('bar')
return out_series # Expect series of -1, 0, 3
print(series_str_find())
$ python ./series/str/series_str_find.py
0 -1
1 0
2 3
dtype: int64
Compute the length of each element in the Series
import pandas as pd
from numba import njit
@njit
def series_str_len():
series = pd.Series(['foo', 'bar', 'foobar']) # Series of 'foo', 'bar', 'foobar'
out_series = series.str.len()
return out_series # Expect series of 3, 3, 6
print(series_str_len())
$ python ./series/str/series_str_len.py
0 3
1 3
2 6
dtype: int64
Filling right side of strings in the Series with an additional character
import pandas as pd
from numba import njit
@njit
def series_str_ljust():
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
out_series = series.str.ljust(5, '*')
return out_series # Expect series of 'dog**', 'foo**', 'bar**'
print(series_str_ljust())
$ python ./series/str/series_str_ljust.py
0 dog**
1 foo**
2 bar**
dtype: object
Convert strings in the Series to lowercase.
import pandas as pd
from numba import njit
@njit
def series_str_lower():
series = pd.Series(['DOG', 'foo', 'BaR'])
out_series = series.str.lower()
return out_series
print(series_str_lower())
$ python ./series/str/series_str_lower.py
0 dog
1 foo
2 bar
dtype: object
Remove leading and trailing characters.
import pandas as pd
from numba import njit
@njit
def series_str_lstrip():
series = pd.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t'])
return series.str.lstrip('123.')
print(series_str_lstrip())
$ python ./series/str/series_str_lstrip.py
0 Ant.
1 Bee!\n
2 Cat?\t
dtype: object
Filling left side of strings in the Series with an additional character
import pandas as pd
from numba import njit
@njit
def series_str_rjust():
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
out_series = series.str.rjust(5, '*')
return out_series # Expect series of '**dog', '**foo', '**bar'
print(series_str_rjust())
$ python ./series/str/series_str_rjust.py
0 **dog
1 **foo
2 **bar
dtype: object
Remove leading and trailing characters.
import pandas as pd
from numba import njit
@njit
def series_str_rstrip():
series = pd.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t'])
return series.str.rstrip('.!? \n\t')
print(series_str_rstrip())
$ python ./series/str/series_str_rstrip.py
0 1. Ant
1 2. Bee
2 3. Cat
dtype: object
Test if the start of each string element matches a string
import pandas as pd
from numba import njit
@njit
def series_str_startswith():
series = pd.Series(['foo', 'bar', 'foobar']) # Series of 'foo', 'bar', 'foobar'
out_series = series.str.startswith('foo')
return out_series # Expect series of True, False, True
print(series_str_startswith())
$ python ./series/str/series_str_startswith.py
0 True
1 False
2 True
dtype: bool
Remove leading and trailing characters.
import pandas as pd
from numba import njit
@njit
def series_str_strip():
series = pd.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t'])
return series.str.strip('123.!? \n\t')
print(series_str_strip())
$ python ./series/str/series_str_strip.py
0 Ant
1 Bee
2 Cat
dtype: object
Convert strings in the Series to be swapcased.
import pandas as pd
from numba import njit
@njit
def series_str_swapcase():
series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
out_series = series.str.swapcase()
return out_series # Expect series of 'LOWER', 'capitals', 'THIS IS A SENTENCE', 'sWaPcAsE'
print(series_str_swapcase())
$ python ./series/str/series_str_swapcase.py
0 LOWER
1 capitals
2 THIS IS A SENTENCE
3 sWaPcAsE
dtype: object
Convert strings in the Series to titlecase.
import pandas as pd
from numba import njit
@njit
def series_str_title():
series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
out_series = series.str.title()
return out_series # Expect series of 'Lower', 'Capitals', 'This Is A Sentence', 'Swapcase'
print(series_str_title())
$ python ./series/str/series_str_title.py
0 Lower
1 Capitals
2 This Is A Sentence
3 Swapcase
dtype: object
Convert strings in the Series to upper case.
import pandas as pd
from numba import njit
@njit
def series_str_upper():
series = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe'])
return series.str.upper()
print(series_str_upper())
$ python ./series/str/series_str_upper.py
0 LOWER
1 CAPITALS
2 THIS IS A SENTENCE
3 SWAPCASE
dtype: object
Pad strings in the Series by prepending ‘0’ characters
import pandas as pd
from numba import njit
@njit
def series_str_zfill():
series = pd.Series(['dog', 'foo', 'bar']) # Series of 'dog', 'foo', 'bar'
out_series = series.str.zfill(5)
return out_series # Expect series of '00dog', '00foo', '00bar'
print(series_str_zfill())
$ python ./series/str/series_str_zfill.py
0 00dog
1 00foo
2 00bar
dtype: object
Check if all the characters in the text are alphanumeric
import pandas as pd
from numba import njit
@njit
def series_str_isalnum():
series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
out_series = series.str.isalnum()
return out_series # Expect series of True, False, True, False
print(series_str_isalnum())
$ python ./series/str/series_str_isalnum.py
0 True
1 False
2 True
3 False
dtype: bool
Check whether all characters in each string are alphabetic
import pandas as pd
from numba import njit
@njit
def series_str_isalpha():
series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
out_series = series.str.isalpha()
return out_series # Expect series of True, False, True, False
print(series_str_isalpha())
$ python ./series/str/series_str_isalpha.py
0 True
1 False
2 True
3 False
dtype: bool
Check whether all characters in each string in the Series are digits.
import pandas as pd
from numba import njit
@njit
def series_str_isdigit():
series = pd.Series(['23', '³', '⅕', ''])
out_series = series.str.isdigit()
return out_series # Expect series of True, True, False, False
print(series_str_isdigit())
$ python ./series/str/series_str_isdigit.py
0 True
1 True
2 False
3 False
dtype: bool
Check if all the characters in the text are whitespaces
import pandas as pd
from numba import njit
@njit
def series_str_isspace():
series = pd.Series([' ', ' c ', ' b ', ' a '])
out_series = series.str.isspace()
return out_series # Expect series of True, False, False, False
print(series_str_isspace())
$ python ./series/str/series_str_isspace.py
0 True
1 False
2 False
3 False
dtype: bool
Check if all the characters in the text are alphanumeric
import pandas as pd
from numba import njit
@njit
def series_str_islower():
series = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', ''])
out_series = series.str.islower()
return out_series # Expect series of True, False, False, False
print(series_str_islower())
$ python ./series/str/series_str_islower.py
0 True
1 False
2 False
3 False
dtype: bool
Check whether all characters in each string are uppercase.
import pandas as pd
from numba import njit
@njit
def series_str_isupper():
series = pd.Series(['FOO', 'BAr', 'FooBar']) # Series of 'FOO', 'BAr', 'FooBar'
out_series = series.str.isupper()
return out_series # Expect series of True, False, False
print(series_str_isupper())
$ python ./series/str/series_str_isupper.py
0 True
1 False
2 False
dtype: bool
Check if each word start with an upper case letter
import pandas as pd
from numba import njit
@njit
def series_str_istitle():
series = pd.Series(['Cat', 'dog', 'Bird'])
out_series = series.str.istitle()
return out_series # Expect series of True, False, True
print(series_str_istitle())
$ python ./series/str/series_str_istitle.py
0 True
1 False
2 True
dtype: bool
Check whether all characters in each string are numeric.
import pandas as pd
from numba import njit
@njit
def series_str_isnumeric():
series = pd.Series(['one', 'one1', '1', ''])
out_series = series.str.isnumeric()
return out_series # Expect series of False, False, True, False
print(series_str_isnumeric())
$ python ./series/str/series_str_isnumeric.py
0 False
1 False
2 True
3 False
dtype: bool
Check whether all characters in each string are decimal.
import pandas as pd
from numba import njit
@njit
def series_str_isdecimal():
series = pd.Series(['23', '³', '⅕', ''])
out_series = series.str.isdecimal()
return out_series # Expect series of True, False, False, False
print(series_str_isdecimal())
$ python ./series/str/series_str_isdecimal.py
0 True
1 False
2 False
3 False
dtype: bool
The index (row labels) of the DataFrame.
import pandas as pd
from numba import njit
@njit
def dataframe_index():
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['a', 'b'])
result = df.index
return result # Numpy array of index values ['a', 'b']
print(dataframe_index())
$ python ./dataframe/dataframe_index.py
['a' 'b']
The values data of the DataFrame.
import pandas as pd
from numba import njit
@njit
def dataframe_values():
df = pd.DataFrame({'age': [3, 29], 'height': [94, 170], 'weight': [31, 115]})
result = df.values
return result # Numpy array of dataframe values: array([[3, 94, 31], [29, 170, 115]], dtype=int64)
print(dataframe_values())
$ python ./dataframe/dataframe_values.py
[[ 3 94 31]
[ 29 170 115]]
Getting Pandas DataFrame column through getting attribute.
import pandas as pd
from numba import njit
@njit
def dataframe_getitem():
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
'B': [1, 2, 3, 4, 5],
'C': [2, 3, 4, 5, 6]})
return df.C
print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_attr.py
0 2
1 3
2 4
3 5
4 6
Name: C, dtype: int64
Getting Pandas DataFrame column where key is a string.
import pandas as pd
from numba import njit
@njit
def dataframe_getitem():
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
'B': [1, 2, 3, 4, 5],
'C': [2, 3, 4, 5, 6]})
return df['A']
print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem.py
0 0
1 1
2 2
3 3
4 4
Name: A, dtype: int64
Getting slice of Pandas DataFrame.
import pandas as pd
from numba import njit
@njit
def dataframe_getitem():
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
'B': [1, 2, 3, 4, 5],
'C': [2, 3, 4, 5, 6]})
return df[1:3]
print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_slice.py
A B C
1 1 2 3
2 2 3 4
Getting Pandas DataFrame elements where key is a tuple of strings.
import pandas as pd
from numba import njit
@njit
def dataframe_getitem():
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
'B': [1, 2, 3, 4, 5],
'C': [2, 3, 4, 5, 6]})
return df[('A', 'C')]
print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_tuple.py
A C
0 0 2
1 1 3
2 2 4
3 3 5
4 4 6
Getting Pandas DataFrame elements where key is an array of booleans.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_getitem():
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
'B': [1, 2, 3, 4, 5],
'C': [2, 3, 4, 5, 6]})
arr = np.array([False, True, False, False, True])
return df[arr]
print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_array.py
A B C
1 1 2 3
4 4 5 6
Getting Pandas DataFrame elements where key is series of booleans.
import pandas as pd
from numba import njit
@njit
def dataframe_getitem():
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
'B': [1, 2, 3, 4, 5],
'C': [2, 3, 4, 5, 6]})
val = pd.Series([True, False, True, False, False])
return df[val]
print(dataframe_getitem())
$ python ./dataframe/getitem/df_getitem_series.py
A B C
0 0 1 2
2 2 3 4
Make a copy of this object’s indices and data.
import pandas as pd
from numba import njit
@njit
def dataframe_copy():
df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7]})
new_df = df.copy(deep=True)
return new_df
print(dataframe_copy())
$ python ./dataframe/dataframe_copy.py
A B
0 1.0 4
1 2.0 5
2 3.0 6
3 1.0 7
Detect missing values.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_isna():
df = pd.DataFrame({'A': [1.0, np.nan, 3.0, 1.0], 'B': [4, 5, 6, 7], 'C': [None, 'b', 'c', 'd']})
return df.isna()
print(dataframe_isna())
$ python ./dataframe/dataframe_isna.py
A B C
0 False False True
1 True False False
2 False False False
3 False False False
Return the first n rows.
import pandas as pd
from numba import njit
@njit
def dataframe_head():
df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
'monkey', 'parrot', 'shark', 'whale', 'zebra']})
return df.head(n=6)
print(dataframe_head())
$ python ./dataframe/dataframe_head.py
animal
0 alligator
1 bee
2 falcon
3 lion
4 monkey
5 parrot
Get value at specified index position.
import pandas as pd
from numba import njit
@njit
def dataframe_iat():
df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7], 'C': ['a', 'b', 'c', 'd']})
return df.iat[1, 2] # value b
print(dataframe_iat())
$ python ./dataframe/dataframe_iat.py
b
Groupby and calculate the minimum in each group.
import pandas as pd
from numba import njit
@njit
def df_groupby_min():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').min()
# Expect DataFrame of
# {'B': [0, 1, 2], 'C': [1, 2, 3} with index=[1, 2, 3]
return out_df
print(df_groupby_min())
$ python ./dataframe/groupby/dataframe_groupby_min.py
B C
1 0 1
2 1 2
3 2 3
Calculate the rolling minimum.
import pandas as pd
from numba import njit
@njit
def df_rolling_min():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).min()
# Expect DataFrame of
# {'A': [NaN, NaN, 3.0, 2.0, 2.0], 'B': [NaN, NaN, -5.0, -5.0, -6.0]}
return out_df
print(df_rolling_min())
$ python ./dataframe/rolling/dataframe_rolling_min.py
A B
0 NaN NaN
1 NaN NaN
2 3.0 -5.0
3 2.0 -5.0
4 2.0 -6.0
Count non-NA cells for each column or row.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_count():
df = pd.DataFrame({"A": [.2, .0, .6, .2],
"B": [2, 0, 6, 2],
"C": [-1, np.nan, 1, np.inf]})
return df.count()
print(dataframe_count())
$ python ./dataframe/dataframe_count.py
A 4
B 4
C 3
dtype: int64
Return the maximum of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_max():
df = pd.DataFrame({"A": [.2, .0, .6, .2],
"B": [2, 0, 6, 2],
"C": [-1, np.nan, 1, np.inf]})
return df.max()
print(dataframe_max())
$ python ./dataframe/dataframe_max.py
A 0.6
B 6.0
C inf
dtype: float64
Return the mean of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_mean():
df = pd.DataFrame({"A": [.2, .0, .6, .2],
"B": [2, 0, 6, 2],
"C": [-1, np.nan, 1, np.inf]})
return df.mean()
print(dataframe_mean())
$ python ./dataframe/dataframe_mean.py
A 0.25
B 2.50
C inf
dtype: float64
$ python ./dataframe/dataframe_median.py
A 0.2
B 2.0
C 1.0
dtype: float64
Return the minimum of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_min():
df = pd.DataFrame({"A": [.2, .0, .6, .2],
"B": [2, 0, 6, 2],
"C": [-1, np.nan, 1, np.inf]})
return df.min()
print(dataframe_min())
$ python ./dataframe/dataframe_min.py
A 0.0
B 0.0
C -1.0
dtype: float64
Percentage change between the current and a prior element.
import pandas as pd
from numba import njit
@njit
def dataframe_pct_change():
df = pd.DataFrame({"A": [14, 4, 5, 4, 1, 55],
"B": [5, 2, 54, 3, 2, 32],
"C": [20, 20, 7, 21, 8, 5],
"D": [14, 3, 6, 2, 6, 4]})
out_df = df.pct_change()
return out_df
print(dataframe_pct_change())
$ python ./dataframe/dataframe_pct_change.py
A B C D
0 NaN NaN NaN NaN
1 -0.714286 -0.600000 0.000000 -0.785714
2 0.250000 26.000000 -0.650000 1.000000
3 -0.200000 -0.944444 2.000000 -0.666667
4 -0.750000 -0.333333 -0.619048 2.000000
5 54.000000 15.000000 -0.375000 -0.333333
Return the product of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_prod():
df = pd.DataFrame({"A": [.2, .0, .6, .2],
"B": [2, 0, 6, 2],
"C": [-1, np.nan, 1, np.inf]})
return df.prod()
print(dataframe_prod())
$ python ./dataframe/dataframe_prod.py
A 0.0
B 0.0
C -inf
dtype: float64
Return the sum of the values for the columns.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_sum():
df = pd.DataFrame({"A": [.2, .0, .6, .2],
"B": [2, 0, 6, 2],
"C": [-1, np.nan, 1, np.inf]})
return df.sum()
print(dataframe_sum())
$ python ./dataframe/dataframe_sum.py
A 1.0
B 10.0
C inf
dtype: float64
Return sample standard deviation over columns.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_std():
df = pd.DataFrame({"A": [.2, .0, .6, .2],
"B": [2, 0, 6, 2],
"C": [-1, np.nan, 1, np.inf]})
return df.std()
print(dataframe_std())
$ python ./dataframe/dataframe_std.py
A 0.251661
B 2.516611
C NaN
dtype: float64
Return unbiased variance over requested axis.
import pandas as pd
import numpy as np
from numba import njit
@njit
def dataframe_var():
df = pd.DataFrame({"A": [.2, .0, .6, .2],
"B": [2, 0, 6, 2],
"C": [-1, np.nan, 1, np.inf]})
return df.var()
print(dataframe_var())
$ python ./dataframe/dataframe_var.py
A 0.063333
B 6.333333
C NaN
dtype: float64
Drop specified columns from DataFrame.
import pandas as pd
from numba import njit
@njit
def dataframe_drop():
df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7], 'C': ['a', 'b', 'c', 'd']})
return df.drop(columns='A')
print(dataframe_drop())
$ python ./dataframe/dataframe_drop.py
B C
0 4 a
1 5 b
2 6 c
3 7 d
Return the first n rows.
import pandas as pd
from numba import njit
@njit
def dataframe_head():
df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
'monkey', 'parrot', 'shark', 'whale', 'zebra']})
return df.head(n=6)
print(dataframe_head())
$ python ./dataframe/dataframe_head.py
animal
0 alligator
1 bee
2 falcon
3 lion
4 monkey
5 parrot
Appending rows of other to the end of caller, returning a new object. Columns in other that are not
in the caller are added as new columns.
import pandas as pd
from numba import njit
@njit
def dataframe_append():
df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'B': [5, 6], 'C': [7, 8]})
result = df.append(df2)
return result
print(dataframe_append())
$ python ./dataframe/dataframe_append.py
A B C
0 1.0 3 NaN
1 2.0 4 NaN
0 NaN 5 7.0
1 NaN 6 8.0
Count of any non-NaN observations inside the window.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_rolling_count():
series = pd.Series([4, 3, 2, np.nan, 6]) # Series of 4, 3, 2, np.nan, 6
out_series = series.rolling(3).count()
return out_series # Expect series of 1.0, 2.0, 3.0, 2.0, 2.0
print(series_rolling_count())
$ python ./series/rolling/series_rolling_count.py
0 1.0
1 2.0
2 3.0
3 2.0
4 2.0
dtype: float64
Count of any non-NaN observations inside the window.
import numpy as np
import pandas as pd
from numba import njit
@njit
def df_rolling_count():
df = pd.DataFrame({'A': [4, 3, 2, np.nan, 6], 'B': [4, np.nan, 2, np.nan, 6]})
out_df = df.rolling(3).count()
# Expect DataFrame of
# {'A': [1.0, 2.0, 3.0, 2.0, 2.0], 'B': [1.0, 1.0, 2.0, 1.0, 2.0]}
return out_df
print(df_rolling_count())
$ python ./dataframe/rolling/dataframe_rolling_count.py
A B
0 1.0 1.0
1 2.0 1.0
2 3.0 2.0
3 2.0 1.0
4 2.0 2.0
Calculate rolling sum
import pandas as pd
from numba import njit
@njit
def series_rolling_sum():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).sum()
return out_series # Expect series of NaN, NaN, 12.0, 10.0, 13.0
print(series_rolling_sum())
$ python ./series/rolling/series_rolling_sum.py
0 NaN
1 NaN
2 12.0
3 10.0
4 13.0
dtype: float64
Calculate rolling sum
import pandas as pd
from numba import njit
@njit
def df_rolling_sum():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).sum()
# Expect DataFrame of
# {'A': [NaN, NaN, 12.0, 10.0, 13.0], 'B': [NaN, NaN, -12.0, -10.0, -13.0]}
return out_df
print(df_rolling_sum())
$ python ./dataframe/rolling/dataframe_rolling_sum.py
A B
0 NaN NaN
1 NaN NaN
2 12.0 -12.0
3 10.0 -10.0
4 13.0 -13.0
Calculate the rolling mean of the values.
import pandas as pd
from numba import njit
@njit
def series_rolling_mean():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).mean()
return out_series # Expect series of NaN, NaN, 4.000000, 3.333333, 4.333333
print(series_rolling_mean())
$ python ./series/rolling/series_rolling_mean.py
0 NaN
1 NaN
2 4.000000
3 3.333333
4 4.333333
dtype: float64
Calculate the rolling mean of the values.
import pandas as pd
from numba import njit
@njit
def df_rolling_mean():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).mean()
# Expect DataFrame of
# {'A': [NaN, NaN, 4.000000, 3.333333, 4.333333],
# 'B': [NaN, NaN, -4.000000, -3.333333, -4.333333]}
return out_df
print(df_rolling_mean())
$ python ./dataframe/rolling/dataframe_rolling_mean.py
A B
0 NaN NaN
1 NaN NaN
2 4.000000 -4.000000
3 3.333333 -3.333333
4 4.333333 -4.333333
$ python ./series/rolling/series_rolling_median.py
0 NaN
1 NaN
2 4.0
3 3.0
4 5.0
dtype: float64
$ python ./dataframe/rolling/dataframe_rolling_median.py
A B
0 NaN NaN
1 NaN NaN
2 4.0 -4.0
3 3.0 -3.0
4 5.0 -5.0
Calculate unbiased rolling variance.
import pandas as pd
from numba import njit
@njit
def series_rolling_var():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).var()
return out_series # Expect series of NaN, NaN, 1.000000, 2.333333, 4.333333
print(series_rolling_var())
$ python ./series/rolling/series_rolling_var.py
0 NaN
1 NaN
2 1.000000
3 2.333333
4 4.333333
dtype: float64
Calculate unbiased rolling variance.
import pandas as pd
from numba import njit
@njit
def df_rolling_var():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).var()
# Expect DataFrame of
# {'A': [NaN, NaN, 1.000000, 2.333333, 4.333333],
# 'B': [NaN, NaN, 1.000000, 2.333333, 4.333333]}
return out_df
print(df_rolling_var())
$ python ./dataframe/rolling/dataframe_rolling_var.py
A B
0 NaN NaN
1 NaN NaN
2 1.000000 1.000000
3 2.333333 2.333333
4 4.333333 4.333333
Calculate rolling standard deviation.
import pandas as pd
from numba import njit
@njit
def series_rolling_std():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).std()
return out_series # Expect series of NaN, NaN, 1.000000, 1.527525, 2.081666
print(series_rolling_std())
$ python ./series/rolling/series_rolling_std.py
0 NaN
1 NaN
2 1.000000
3 1.527525
4 2.081666
dtype: float64
Calculate rolling standard deviation.
import pandas as pd
from numba import njit
@njit
def df_rolling_std():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).std()
# Expect DataFrame of
# {'A': [NaN, NaN, 1.000000, 1.527525, 2.081666],
# 'B': [NaN, NaN, 1.000000, 1.527525, 2.081666]}
return out_df
print(df_rolling_std())
$ python ./dataframe/rolling/dataframe_rolling_std.py
A B
0 NaN NaN
1 NaN NaN
2 1.000000 1.000000
3 1.527525 1.527525
4 2.081666 2.081666
Calculate the rolling minimum.
import pandas as pd
from numba import njit
@njit
def series_rolling_min():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).min()
return out_series # Expect series of NaN, NaN, 3.0, 2.0, 2.0
print(series_rolling_min())
$ python ./series/rolling/series_rolling_min.py
0 NaN
1 NaN
2 3.0
3 2.0
4 2.0
dtype: float64
Calculate the rolling minimum.
import pandas as pd
from numba import njit
@njit
def df_rolling_min():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).min()
# Expect DataFrame of
# {'A': [NaN, NaN, 3.0, 2.0, 2.0], 'B': [NaN, NaN, -5.0, -5.0, -6.0]}
return out_df
print(df_rolling_min())
$ python ./dataframe/rolling/dataframe_rolling_min.py
A B
0 NaN NaN
1 NaN NaN
2 3.0 -5.0
3 2.0 -5.0
4 2.0 -6.0
Calculate the rolling maximum.
import pandas as pd
from numba import njit
@njit
def series_rolling_max():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).max()
return out_series # Expect series of NaN, NaN, 5.0, 5.0, 6.0
print(series_rolling_max())
$ python ./series/rolling/series_rolling_max.py
0 NaN
1 NaN
2 5.0
3 5.0
4 6.0
dtype: float64
Calculate the rolling maximum.
import pandas as pd
from numba import njit
@njit
def df_rolling_max():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).max()
# Expect DataFrame of
# {'A': [NaN, NaN, 5.0, 5.0, 6.0], 'B': [NaN, NaN, -3.0, -2.0, -2.0]}
return out_df
print(df_rolling_max())
$ python ./dataframe/rolling/dataframe_rolling_max.py
A B
0 NaN NaN
1 NaN NaN
2 5.0 -3.0
3 5.0 -2.0
4 6.0 -2.0
Calculate rolling correlation.
import pandas as pd
from numba import njit
@njit
def series_rolling_corr():
series = pd.Series([3, 3, 3, 5, 8]) # Series of 3, 3, 3, 5, 8
other = pd.Series([3, 4, 4, 4, 8]) # Series of 3, 4, 4, 4, 8
out_series = series.rolling(4).corr(other)
return out_series # Expect series of NaN, NaN, NaN, 0.333333, 0.916949
print(series_rolling_corr())
$ python ./series/rolling/series_rolling_corr.py
0 NaN
1 NaN
2 NaN
3 0.333333
4 0.916949
dtype: float64
Calculate rolling correlation.
import pandas as pd
from numba import njit
@njit
def df_rolling_corr():
df = pd.DataFrame({'A': [3, 3, 3, 5, 8], 'B': [-3, -3, -3, -5, -8]})
other = pd.DataFrame({'A': [3, 4, 4, 4, 8], 'B': [-3, -4, -4, -4, -8]})
out_df = df.rolling(4).corr(other)
# Expect DataFrame of
# {'A': [NaN, NaN, NaN, 0.333333, 0.916949],
# 'B': [NaN, NaN, NaN, 0.333333, 0.916949]}
return out_df
print(df_rolling_corr())
$ python ./dataframe/rolling/dataframe_rolling_corr.py
A B
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 0.333333 0.333333
4 0.916949 0.916949
Calculate rolling covariance.
import pandas as pd
from numba import njit
@njit
def series_rolling_cov():
series = pd.Series([3, 3, 3, 5, 8]) # Series of 3, 3, 3, 5, 8
other = pd.Series([3, 4, 4, 4, 8]) # Series of 3, 4, 4, 4, 8
out_series = series.rolling(4).cov(other)
return out_series # Expect series of NaN, NaN, NaN, 0.166667, 4.333333
print(series_rolling_cov())
$ python ./series/rolling/series_rolling_cov.py
0 NaN
1 NaN
2 NaN
3 0.166667
4 4.333333
dtype: float64
Calculate rolling covariance.
import pandas as pd
from numba import njit
@njit
def df_rolling_cov():
df = pd.DataFrame({'A': [3, 3, 3, 5, 8], 'B': [-3, -3, -3, -5, -8]})
other = pd.DataFrame({'A': [3, 4, 4, 4, 8], 'B': [-3, -4, -4, -4, -8]})
out_df = df.rolling(4).cov(other)
# Expect DataFrame of
# {'A': [NaN, NaN, NaN, 0.166667, 4.333333],
# 'B': [NaN, NaN, NaN, 0.166667, 4.333333]}
return out_df
print(df_rolling_cov())
$ python ./dataframe/rolling/dataframe_rolling_cov.py
A B
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 0.166667 0.166667
4 4.333333 4.333333
Unbiased rolling skewness.
import pandas as pd
from numba import njit
@njit
def series_rolling_skew():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).skew()
return out_series # Expect series of NaN, NaN, 0.000000, 0.935220, -1.293343
print(series_rolling_skew())
$ python ./series/rolling/series_rolling_skew.py
0 NaN
1 NaN
2 0.000000
3 0.935220
4 -1.293343
dtype: float64
Unbiased rolling skewness.
import pandas as pd
from numba import njit
@njit
def df_rolling_skew():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).skew()
# Expect DataFrame of
# {'A': [NaN, NaN, 0.000000, 0.935220, -1.293343],
# 'B': [NaN, NaN, 0.000000, -0.935220, 1.293343]}
return out_df
print(df_rolling_skew())
$ python ./dataframe/rolling/dataframe_rolling_skew.py
A B
0 NaN NaN
1 NaN NaN
2 0.000000 0.000000
3 0.935220 -0.935220
4 -1.293343 1.293343
Calculate unbiased rolling kurtosis.
import pandas as pd
from numba import njit
@njit
def series_rolling_kurt():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(4).kurt()
return out_series # Expect series of NaN, NaN, NaN, -1.2, -3.3
print(series_rolling_kurt())
$ python ./series/rolling/series_rolling_kurt.py
0 NaN
1 NaN
2 NaN
3 -1.2
4 -3.3
dtype: float64
Calculate unbiased rolling kurtosis.
import pandas as pd
from numba import njit
@njit
def df_rolling_kurt():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(4).kurt()
# Expect DataFrame of
# {'A': [NaN, NaN, NaN, -1.2, -3.3], 'B': [NaN, NaN, NaN, -1.2, -3.3]}
return out_df
print(df_rolling_kurt())
$ python ./dataframe/rolling/dataframe_rolling_kurt.py
A B
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 -1.2 -1.2
4 -3.3 -3.3
Calculate the rolling apply.
import numpy as np
import pandas as pd
from numba import njit
@njit
def series_rolling_apply():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
def get_median(x):
return np.median(x)
out_series = series.rolling(3).apply(get_median)
return out_series # Expect series of NaN, NaN, 4.0, 3.0, 5.0
print(series_rolling_apply())
$ python ./series/rolling/series_rolling_apply.py
0 NaN
1 NaN
2 4.0
3 3.0
4 5.0
dtype: float64
Calculate the rolling apply.
import numpy as np
import pandas as pd
from numba import njit
@njit
def df_rolling_apply():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
def get_median(x):
return np.median(x)
out_df = df.rolling(3).apply(get_median)
# Expect DataFrame of
# {'A': [NaN, NaN, 4.0, 3.0, 5.0], 'B': [NaN, NaN, -4.0, -3.0, -5.0]}
return out_df
print(df_rolling_apply())
$ python ./dataframe/rolling/dataframe_rolling_apply.py
A B
0 NaN NaN
1 NaN NaN
2 4.0 -4.0
3 3.0 -3.0
4 5.0 -5.0
Calculate the rolling quantile.
import pandas as pd
from numba import njit
@njit
def series_rolling_quantile():
series = pd.Series([4, 3, 5, 2, 6]) # Series of 4, 3, 5, 2, 6
out_series = series.rolling(3).quantile(0.25)
return out_series # Expect series of NaN, NaN, 3.5, 2.5, 3.5
print(series_rolling_quantile())
$ python ./series/rolling/series_rolling_quantile.py
0 NaN
1 NaN
2 3.5
3 2.5
4 3.5
dtype: float64
Calculate the rolling quantile.
import pandas as pd
from numba import njit
@njit
def df_rolling_quantile():
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
out_df = df.rolling(3).quantile(0.25)
# Expect DataFrame of
# {'A': [NaN, NaN, 3.5, 2.5, 3.5], 'B': [NaN, NaN, -4.5, -4.0, -5.5]}
return out_df
print(df_rolling_quantile())
$ python ./dataframe/rolling/dataframe_rolling_quantile.py
A B
0 NaN NaN
1 NaN NaN
2 3.5 -4.5
3 2.5 -4.0
4 3.5 -5.5
Compute count of group, excluding missing values.
import pandas as pd
import numpy as np
from numba import njit
@njit
def df_groupby_count():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, np.nan, np.nan, 2, 4, 3, 2, np.inf],
'C': [np.nan, 2, 3, np.nan, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').count()
# Expect DataFrame of
# {'B': [1, 3, 3], 'C': [0, 3, 4} with index=[1, 2, 3]
return out_df
print(df_groupby_count())
$ python ./dataframe/groupby/dataframe_groupby_count.py
B C
1 1 0
2 3 3
3 3 4
Compute max of group values.
import pandas as pd
from numba import njit
@njit
def df_groupby_max():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').max()
# Expect DataFrame of
# {'B': [0, 3, 5], 'C': [4, 9, 8} with index=[1, 2, 3]
return out_df
print(df_groupby_max())
$ python ./dataframe/groupby/dataframe_groupby_max.py
B C
1 0 4
2 3 9
3 5 8
Compute mean of groups, excluding missing values.
import pandas as pd
from numba import njit
@njit
def df_groupby_mean():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').mean()
# Expect DataFrame of
# {'B': [0.0, 2.0, 3.5], 'C': [2.500000, 5.333333, 6.000000} with index=[1, 2, 3]
return out_df
print(df_groupby_mean())
$ python ./dataframe/groupby/dataframe_groupby_mean.py
B C
1 0.0 2.500000
2 2.0 5.333333
3 3.5 6.000000
$ python ./dataframe/groupby/dataframe_groupby_median.py
B C
1 0.0 2.5
2 3.0 5.0
3 3.5 6.5
Compute min of group values.
import pandas as pd
from numba import njit
@njit
def df_groupby_min():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').min()
# Expect DataFrame of
# {'B': [0, 1, 2], 'C': [1, 2, 3} with index=[1, 2, 3]
return out_df
print(df_groupby_min())
$ python ./dataframe/groupby/dataframe_groupby_min.py
B C
1 0 1
2 1 2
3 2 3
Compute prod of group values.
import pandas as pd
from numba import njit
@njit
def df_groupby_prod():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').prod()
# Expect DataFrame of
# {'B': [0, 6, 120], 'C': [4, 90, 1008} with index=[1, 2, 3]
return out_df
print(df_groupby_prod())
$ python ./dataframe/groupby/dataframe_groupby_prod.py
B C
1 0 4
2 6 90
3 120 1008
Compute standard deviation of groups, excluding missing values.
import pandas as pd
from numba import njit
@njit
def df_groupby_std():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').std()
# Expect DataFrame of
# {'B': [0.000000, 1.000000, 1.290994], 'C': [2.121320, 3.511885, 2.160247} with index=[1, 2, 3]
return out_df
print(df_groupby_std())
$ python ./dataframe/groupby/dataframe_groupby_std.py
B C
1 0.000000 2.121320
2 1.000000 3.511885
3 1.290994 2.160247
Compute sum of groups, excluding missing values.
import pandas as pd
from numba import njit
@njit
def df_groupby_sum():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').sum()
# Expect DataFrame of
# {'B': [0, 6, 14], 'C': [5, 16, 24} with index=[1, 2, 3]
return out_df
print(df_groupby_sum())
$ python ./dataframe/groupby/dataframe_groupby_sum.py
B C
1 0.0 5.0
2 6.0 16.0
3 14.0 24.0
Compute variance of groups, excluding missing values.
import pandas as pd
from numba import njit
@njit
def df_groupby_var():
df = pd.DataFrame({'A': [1, 2, 3, 1, 2, 3, 3, 3, 2],
'B': [0, 1, 5, 0, 2, 4, 3, 2, 3],
'C': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
out_df = df.groupby('A').var()
# Expect DataFrame of
# {'B': [0.000000, 1.000000, 1.666667], 'C': [4.500000, 12.333333, 4.666667} with index=[1, 2, 3]
return out_df
print(df_groupby_var())
$ python ./dataframe/groupby/dataframe_groupby_var.py
B C
1 0.000000 4.500000
2 1.000000 12.333333
3 1.666667 4.666667