import sqlalchemy as sqla
import pandas as pd
Connect to MySql with Pandas
mysql
database
pandas
This blog show how to connect to MySql database via Pandas
Import packages
Connect to a database
= sqla.create_engine('mysql+mysqlconnector://root:@localhost:3306/farmers_market') db
Get all the tables in the database
= pd.read_sql('show tables from farmers_market', db)
db_tables db_tables
Tables_in_farmers_market | |
---|---|
0 | booth |
1 | customer |
2 | customer_purchases |
3 | market_date_info |
4 | product |
5 | product_category |
6 | vendor |
7 | vendor_booth_assignments |
8 | vendor_inventory |
9 | zip_data |
Get information about the schema of each table
= db_tables['Tables_in_farmers_market']
tables
for table in tables:
= pd.read_sql('describe {}'.format(table), db)
desc print('*'*30, table, '*'*30)
print(desc, '\n')
****************************** booth ******************************
Field Type Null Key Default Extra
0 booth_number int(11) NO PRI None
1 booth_price_level varchar(45) NO None
2 booth_description varchar(255) NO None
3 booth_type varchar(45) NO None
****************************** customer ******************************
Field Type Null Key Default Extra
0 customer_id int(11) NO PRI None
1 customer_first_name varchar(45) YES None
2 customer_last_name varchar(45) YES None
3 customer_zip varchar(45) YES None
****************************** customer_purchases ******************************
Field Type Null Key Default Extra
0 product_id int(11) NO PRI None
1 vendor_id int(11) NO PRI None
2 market_date date NO PRI None
3 customer_id int(11) NO PRI None
4 quantity decimal(16,2) YES None
5 cost_to_customer_per_qty decimal(16,2) YES None
6 transaction_time time NO PRI None
****************************** market_date_info ******************************
Field Type Null Key Default Extra
0 market_date date NO PRI None
1 market_day varchar(45) YES None
2 market_week varchar(45) YES None
3 market_year varchar(45) YES None
4 market_start_time varchar(45) YES None
5 market_end_time varchar(45) YES None
6 special_notes blob YES None
7 market_season varchar(45) YES None
8 market_min_temp varchar(200) YES None
9 market_max_temp varchar(45) YES None
10 market_rain_flag int(11) YES None
11 market_snow_flag int(11) YES None
****************************** product ******************************
Field Type Null Key Default Extra
0 product_id int(11) NO PRI None
1 product_name varchar(45) YES None
2 product_size varchar(45) YES None
3 product_category_id int(11) NO PRI None
4 product_qty_type varchar(45) YES None
****************************** product_category ******************************
Field Type Null Key Default Extra
0 product_category_id int(11) NO PRI None auto_increment
1 product_category_name varchar(45) YES None
****************************** vendor ******************************
Field Type Null Key Default Extra
0 vendor_id int(11) NO PRI None auto_increment
1 vendor_name varchar(45) NO UNI None
2 vendor_type varchar(45) NO None
3 vendor_owner_first_name varchar(45) NO None
4 vendor_owner_last_name varchar(45) NO None
****************************** vendor_booth_assignments ******************************
Field Type Null Key Default Extra
0 vendor_id int(11) NO PRI None
1 booth_number int(11) NO PRI None
2 market_date date NO PRI None
****************************** vendor_inventory ******************************
Field Type Null Key Default Extra
0 market_date date NO PRI None
1 quantity decimal(16,2) YES None
2 vendor_id int(11) NO PRI None
3 product_id int(11) NO PRI None
4 original_price decimal(16,2) YES None
****************************** zip_data ******************************
Field Type Null Key Default Extra
0 zip_code_5 char(5) NO PRI None
1 median_household_income float YES None
2 percent_high_income float YES None
3 percent_under_18 float YES None
4 percent_over_65 float YES None
5 people_per_sq_mile float YES None
6 latitude float YES None
7 longitude float YES None
= pd.read_sql('select * from product', db)
product product.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 product_id 23 non-null int64
1 product_name 23 non-null object
2 product_size 22 non-null object
3 product_category_id 23 non-null int64
4 product_qty_type 21 non-null object
dtypes: int64(2), object(3)
memory usage: 1.0+ KB
'select product_id from product limit 5', db) pd.read_sql(
product_id | |
---|---|
0 | 1 |
1 | 2 |
2 | 3 |
3 | 9 |
4 | 12 |