import pandas as pd
import numpy as np
pd.Series(['a', 'b', np.nan])

0      a
1      b
2    NaN
dtype: object


pd.Series(['a', 'b', np.nan], dtype='string')

0       a
1       b
2    <NA>
dtype: string


s = pd.Series(
    ["A", "B", "Aaba", np.nan, "CABA", "dog"], dtype="string"
)


s.str.lower()

0       a
1       b
2    aaba
3    <NA>
4    caba
5     dog
dtype: string


s.str.upper()

0       A
1       B
2    AABA
3    <NA>
4    CABA
5     DOG
dtype: string


s.str.len()

0       1
1       1
2       4
3    <NA>
4       4
5       3
dtype: Int64


s2 = pd.Series(
    ["A ", " B", " Aaba ", np.nan, "CABA ", " dog"], dtype="string"
)


s2.str.strip().values

<StringArray>
['A', 'B', 'Aaba', <NA>, 'CABA', 'dog']
Length: 6, dtype: string


s2.str.lstrip().values

<StringArray>
['A ', 'B', 'Aaba ', <NA>, 'CABA ', 'dog']
Length: 6, dtype: string


s2.str.rstrip().values

<StringArray>
['A', ' B', ' Aaba', <NA>, 'CABA', ' dog']
Length: 6, dtype: string


d = [{'Col 1 ':'a', 'col 2 ':'b'},{'Col 1 ':'c', 'col 2 ':'d'}]
df = pd.DataFrame(d)
df


df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
df


s3 = pd.Series(['z y z', 'c d e', np.nan, 'f g z'], dtype="string")


s3.str.split()

0    [z, y, z]
1    [c, d, e]
2         <NA>
3    [f, g, z]
dtype: object


s4 = s3.str.split(expand=True)
s4


s3.str.replace('z', 'a')

0    a y a
1    c d e
2     <NA>
3    f g a
dtype: string


s3.str.contains('z')

0     True
1    False
2     <NA>
3     True
dtype: boolean


s4[3]= s4[0].str.cat(s4[1], sep =", ")
s4

Working With Strings In Pandas

String methods¶

	Col 1	col 2
0	a	b
1	c	d

	col_1	col_2
0	a	b
1	c	d