# pasting data from the clipboard

# Excel - Ctrl + V

# Pandas we use the read_clipboard function
import pandas as pd
pd.read_clipboard()


# Importing a CSV

# Excel - File > Open > select file > 

# Pandas we use the read_csv function
df = pd.read_csv(r'C:\file_path...')


# I will generate some random data for us to work with and printout the datframe.

import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randint(0,100,size=(6, 4)), columns=list('ABCD'))
df


# getting column and row counts

# Excel - select row one to get column count. select the index column to get a row count.

# Pandas we use the dataframe shape attribute which return the (row count, column count)
df.shape

(6, 4)


# Excel - Alt + Ctrl + T

# Pandas has a number of way to filter data
# df.filter(['A', 'B'], axis=1)
df[(df['A'] < 60) & (df['A'] > 10)]

# Other options are the popular loc() and iloc() functions also the powerful query() and isin() function. There are a bunch of other functions that can be used for filtering like between, notnull. There are also string methods for filter like str.contains


# lets make a second dataframe

df2 = pd.DataFrame(np.random.randint(0,100,size=(6, 4)), columns=list('EFGH'))

# lets add some string columns
df2['str'] = pd.Series(['a ','a ','b ','b ','c ','c '])
df2['str2'] = pd.Series(['a ','a ','b ','b ','c ','c '])
df2


# Excel = VLOOKUP (lookup_value, table_array, column_index_num, [range_lookup])

# Pandas merge or concat can be used depending on the situation.
df3 = pd.concat([df, df2], axis=1)
df3
# When merging data in Pandas this page is helpful  https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html


# Excel - Alt > N > V

# Pandas use the pivot_table function
df3.pivot_table(df3, columns = 'str')


# Both Excel and Python have the len() function.
# Excel - LEN()

# Python - note len counts spaces.
df3['str'].str.len()

0    2
1    2
2    2
3    2
4    2
5    2
Name: str, dtype: int64


# Currently we have white space at the end of each string.
df3['str'].values

array(['a ', 'a ', 'b ', 'b ', 'c ', 'c '], dtype=object)


# Removing white space at the start and end of strings.

# Excel - TRIM()

# Pandas
df3['str'].str.strip().values

array(['a', 'a', 'b', 'b', 'c', 'c'], dtype=object)


# Lets use another string function to convert all strings to upper case

# Excel - UPPER()

# Pandas
df3['str'].str.upper()

0    A 
1    A 
2    B 
3    B 
4    C 
5    C 
Name: str, dtype: object


# Excel - IF()

# Pandas - filter for a.
df3[df3['str'] == 'a ']


# Excel - 'string'&'string'

# Pandas
df3['str'] + df3['str2']

0    a a 
1    a a 
2    b b 
3    b b 
4    c c 
5    c c 
dtype: object


# Excel we can use Sumifs, Countifs to group the data and get counts and sums.

# Pandas
df3.groupby('str').sum()

	A	B	C	D
0	0	87	6	94
1	87	5	94	16
2	64	1	18	14
3	34	39	62	12
4	7	64	6	20
5	50	90	72	36

	A	B	C	D
3	34	39	62	12
5	50	90	72	36

	E	F	G	H	str	str2
0	49	89	52	93	a	a
1	68	82	51	55	a	a
2	42	46	43	5	b	b
3	14	12	42	15	b	b
4	96	46	18	32	c	c
5	59	83	97	99	c	c

	A	B	C	D	E	F	G	H	str	str2
0	0	87	6	94	49	89	52	93	a	a
1	87	5	94	16	68	82	51	55	a	a
2	64	1	18	14	42	46	43	5	b	b
3	34	39	62	12	14	12	42	15	b	b
4	7	64	6	20	96	46	18	32	c	c
5	50	90	72	36	59	83	97	99	c	c

str	a	b	c
A	43.5	49.0	28.5
B	46.0	20.0	77.0
C	50.0	40.0	39.0
D	55.0	13.0	28.0
E	58.5	28.0	77.5
F	85.5	29.0	64.5
G	51.5	42.5	57.5
H	74.0	10.0	65.5

Excel Vs Pandas

Pandas vs Excel - why it might be time to use Pandas¶

When to use Pandas¶

Advantages of Pandas¶

Compare some common functions excel and Pandas¶

Importing data¶

Filtering data¶

Vlookup¶

Pivot Table¶

Len¶

Strip¶

Upper¶

If function¶

Concatenate¶

Groupby¶

	E	F	G	H	str	str2
0	49	89	52	93	a	a
1	68	82	51	55	a	a
2	42	46	43	5	b	b
3	14	12	42	15	b	b
4	96	46	18	32	c	c
5	59	83	97	99	c	c

	A	B	C	D	E	F	G	H	str	str2
0	0	87	6	94	49	89	52	93	a	a
1	87	5	94	16	68	82	51	55	a	a
2	64	1	18	14	42	46	43	5	b	b
3	34	39	62	12	14	12	42	15	b	b
4	7	64	6	20	96	46	18	32	c	c
5	50	90	72	36	59	83	97	99	c	c

	A	B	C	D	E	F	G	H
str
a	87	92	100	110	117	171	103	148
b	98	40	80	26	56	58	85	20
c	57	154	78	56	155	129	115	131

	E	F	G	H	str	str2
0	49	89	52	93	a	a
1	68	82	51	55	a	a
2	42	46	43	5	b	b
3	14	12	42	15	b	b
4	96	46	18	32	c	c
5	59	83	97	99	c	c

	A	B	C	D	E	F	G	H	str	str2
0	0	87	6	94	49	89	52	93	a	a
1	87	5	94	16	68	82	51	55	a	a
2	64	1	18	14	42	46	43	5	b	b
3	34	39	62	12	14	12	42	15	b	b
4	7	64	6	20	96	46	18	32	c	c
5	50	90	72	36	59	83	97	99	c	c

	A	B	C	D	E	F	G	H
str
a	87	92	100	110	117	171	103	148
b	98	40	80	26	56	58	85	20
c	57	154	78	56	155	129	115	131

	E	F	G	H	str	str2
0	49	89	52	93	a	a
1	68	82	51	55	a	a
2	42	46	43	5	b	b
3	14	12	42	15	b	b
4	96	46	18	32	c	c
5	59	83	97	99	c	c

	A	B	C	D	E	F	G	H	str	str2
0	0	87	6	94	49	89	52	93	a	a
1	87	5	94	16	68	82	51	55	a	a
2	64	1	18	14	42	46	43	5	b	b
3	34	39	62	12	14	12	42	15	b	b
4	7	64	6	20	96	46	18	32	c	c
5	50	90	72	36	59	83	97	99	c	c

	A	B	C	D	E	F	G	H
str
a	87	92	100	110	117	171	103	148
b	98	40	80	26	56	58	85	20
c	57	154	78	56	155	129	115	131