Connect to MySql with Pandas

mysql
database
pandas
This blog show how to connect to MySql database via Pandas
Author

Youfeng Zhou

Published

August 19, 2022

Import packages

import sqlalchemy as sqla
import pandas as pd

Connect to a database

db = sqla.create_engine('mysql+mysqlconnector://root:@localhost:3306/farmers_market')

Get all the tables in the database

db_tables = pd.read_sql('show tables from farmers_market', db)
db_tables
Tables_in_farmers_market
0 booth
1 customer
2 customer_purchases
3 market_date_info
4 product
5 product_category
6 vendor
7 vendor_booth_assignments
8 vendor_inventory
9 zip_data

Get information about the schema of each table

tables = db_tables['Tables_in_farmers_market']

for table in tables:
    desc = pd.read_sql('describe {}'.format(table), db)
    print('*'*30, table, '*'*30)
    print(desc, '\n')
****************************** booth ******************************
               Field          Type Null  Key Default Extra
0       booth_number       int(11)   NO  PRI    None      
1  booth_price_level   varchar(45)   NO         None      
2  booth_description  varchar(255)   NO         None      
3         booth_type   varchar(45)   NO         None       

****************************** customer ******************************
                 Field         Type Null  Key Default Extra
0          customer_id      int(11)   NO  PRI    None      
1  customer_first_name  varchar(45)  YES         None      
2   customer_last_name  varchar(45)  YES         None      
3         customer_zip  varchar(45)  YES         None       

****************************** customer_purchases ******************************
                      Field           Type Null  Key Default Extra
0                product_id        int(11)   NO  PRI    None      
1                 vendor_id        int(11)   NO  PRI    None      
2               market_date           date   NO  PRI    None      
3               customer_id        int(11)   NO  PRI    None      
4                  quantity  decimal(16,2)  YES         None      
5  cost_to_customer_per_qty  decimal(16,2)  YES         None      
6          transaction_time           time   NO  PRI    None       

****************************** market_date_info ******************************
                Field          Type Null  Key Default Extra
0         market_date          date   NO  PRI    None      
1          market_day   varchar(45)  YES         None      
2         market_week   varchar(45)  YES         None      
3         market_year   varchar(45)  YES         None      
4   market_start_time   varchar(45)  YES         None      
5     market_end_time   varchar(45)  YES         None      
6       special_notes          blob  YES         None      
7       market_season   varchar(45)  YES         None      
8     market_min_temp  varchar(200)  YES         None      
9     market_max_temp   varchar(45)  YES         None      
10   market_rain_flag       int(11)  YES         None      
11   market_snow_flag       int(11)  YES         None       

****************************** product ******************************
                 Field         Type Null  Key Default Extra
0           product_id      int(11)   NO  PRI    None      
1         product_name  varchar(45)  YES         None      
2         product_size  varchar(45)  YES         None      
3  product_category_id      int(11)   NO  PRI    None      
4     product_qty_type  varchar(45)  YES         None       

****************************** product_category ******************************
                   Field         Type Null  Key Default           Extra
0    product_category_id      int(11)   NO  PRI    None  auto_increment
1  product_category_name  varchar(45)  YES         None                 

****************************** vendor ******************************
                     Field         Type Null  Key Default           Extra
0                vendor_id      int(11)   NO  PRI    None  auto_increment
1              vendor_name  varchar(45)   NO  UNI    None                
2              vendor_type  varchar(45)   NO         None                
3  vendor_owner_first_name  varchar(45)   NO         None                
4   vendor_owner_last_name  varchar(45)   NO         None                 

****************************** vendor_booth_assignments ******************************
          Field     Type Null  Key Default Extra
0     vendor_id  int(11)   NO  PRI    None      
1  booth_number  int(11)   NO  PRI    None      
2   market_date     date   NO  PRI    None       

****************************** vendor_inventory ******************************
            Field           Type Null  Key Default Extra
0     market_date           date   NO  PRI    None      
1        quantity  decimal(16,2)  YES         None      
2       vendor_id        int(11)   NO  PRI    None      
3      product_id        int(11)   NO  PRI    None      
4  original_price  decimal(16,2)  YES         None       

****************************** zip_data ******************************
                     Field     Type Null  Key Default Extra
0               zip_code_5  char(5)   NO  PRI    None      
1  median_household_income    float  YES         None      
2      percent_high_income    float  YES         None      
3         percent_under_18    float  YES         None      
4          percent_over_65    float  YES         None      
5       people_per_sq_mile    float  YES         None      
6                 latitude    float  YES         None      
7                longitude    float  YES         None       
product = pd.read_sql('select * from product', db)
product.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 5 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   product_id           23 non-null     int64 
 1   product_name         23 non-null     object
 2   product_size         22 non-null     object
 3   product_category_id  23 non-null     int64 
 4   product_qty_type     21 non-null     object
dtypes: int64(2), object(3)
memory usage: 1.0+ KB
pd.read_sql('select product_id from product limit 5', db)
product_id
0 1
1 2
2 3
3 9
4 12