From dict of Series or dicts

In [1]:
import numpy as np
import pandas as pd
In [2]:
d = {'one': pd.Series([2., 3., 4.], index=['p', 'q', 'r']),
     'two': pd.Series([2., 3., 4., 5.], index=['p', 'q', 'r', 's'])}
In [3]:
df = pd.DataFrame(d)
df
Out[3]:
one two
p 2.0 2.0
q 3.0 3.0
r 4.0 4.0
s NaN 5.0
In [4]:
pd.DataFrame(d, index=['s', 'q', 'p'])
Out[4]:
one two
s NaN 5.0
q 3.0 3.0
p 2.0 2.0
In [5]:
pd.DataFrame(d, index=['s', 'q', 'p'], columns=['two', 'three'])
Out[5]:
two three
s 5.0 NaN
q 3.0 NaN
p 2.0 NaN

The row and column labels can be accessed respectively by accessing the index and columns attributes:

In [6]:
df.index
Out[6]:
Index(['p', 'q', 'r', 's'], dtype='object')
In [7]:
df.columns
Out[7]:
Index(['one', 'two'], dtype='object')

From dict of ndarrays / lists

The ndarrays must all be the same length.
If an index is passed, it must clearly also be the same length as the arrays.
If no index is passed, the result will be range(n), where n is the array length.

In [8]:
d = {'one': [4., 5., 6., 7.],
     'two': [7., 6., 5., 4.]}
In [9]:
pd.DataFrame(d)
Out[9]:
one two
0 4.0 7.0
1 5.0 6.0
2 6.0 5.0
3 7.0 4.0
In [10]:
pd.DataFrame(d, index=['w', 'x', 'y', 'z'])
Out[10]:
one two
w 4.0 7.0
x 5.0 6.0
y 6.0 5.0
z 7.0 4.0

From structured or record array:

In [11]:
data = np.zeros((2, ), dtype=[('P', 'i4'), ('Q', 'f4'), ('R', 'a10')])
In [12]:
data[:] = [(2, 3., 'Best'), (3, 4., "Friend")]
In [13]:
pd.DataFrame(data)
Out[13]:
P Q R
0 2 3.0 b'Best'
1 3 4.0 b'Friend'
In [14]:
pd.DataFrame(data, index=['first', 'second'])
Out[14]:
P Q R
first 2 3.0 b'Best'
second 3 4.0 b'Friend'
In [15]:
pd.DataFrame(data, columns=['R', 'P', 'Q'])
Out[15]:
R P Q
0 b'Best' 2 3.0
1 b'Friend' 3 4.0

From a list of dicts

In [16]:
data2 = [{'p': 2, 'q': 4}, {'p': 5, 'q': 10, 'r': 15}]
In [17]:
pd.DataFrame(data2)
Out[17]:
p q r
0 2 4 NaN
1 5 10 15.0
In [18]:
pd.DataFrame(data2, index=['first', 'second'])
Out[18]:
p q r
first 2 4 NaN
second 5 10 15.0
In [19]:
pd.DataFrame(data2, columns=['p', 'q'])
Out[19]:
p q
0 2 4
1 5 10

From a dict of tuples
You can automatically create a MultiIndexed frame by passing a tuples dictionary.

In [20]:
pd.DataFrame({('p', 'q'): {('P', 'Q'): 2, ('P', 'R'): 1},
               ('p', 'p'): {('P', 'R'): 4, ('P', 'Q'): 3},
               ('p', 'r'): {('P', 'Q'): 6, ('P', 'R'): 5},
               ('q', 'p'): {('P', 'R'): 8, ('P', 'Q'): 7},
               ('q', 'q'): {('P', 'S'): 10, ('P', 'Q'): 9}})
Out[20]:
p q
q p r p q
P Q 2.0 3.0 6.0 7.0 9.0
R 1.0 4.0 5.0 8.0 NaN
S NaN NaN NaN NaN 10.0

Missing data

To construct a DataFrame with missing data, we use np.nan to represent missing values.
Alternatively, you may pass a numpy.MaskedArray as the data argument to the DataFrame
constructor, and its masked entries will be considered missing.

In [21]:
df = pd.DataFrame(np.random.randn(4, 3), index=['a', 'b', 'c', 'd'],
                  columns=['one', 'two', 'three'])
In [22]:
df['four'] = 'bar'
In [23]:
df['five'] = df['one'] > 0
df
Out[23]:
one two three four five
a 0.374851 1.575271 -2.633000 bar True
b -0.722451 -0.446845 0.933967 bar False
c -1.386522 1.575828 2.371845 bar False
d 1.422895 -1.145874 1.347855 bar True
In [24]:
df2 = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g'])
df2
Out[24]:
one two three four five
a 0.374851 1.575271 -2.633000 bar True
b -0.722451 -0.446845 0.933967 bar False
c -1.386522 1.575828 2.371845 bar False
d 1.422895 -1.145874 1.347855 bar True
e NaN NaN NaN NaN NaN
f NaN NaN NaN NaN NaN
g NaN NaN NaN NaN NaN

Alternate constructors
DataFrame.from_dict
DataFrame.from_dict takes a dict of dicts or a dict of array-like sequences and returns a DataFrame.
It operates like the DataFrame constructor except for the orient parameter which is 'columns' by default,
but which can be set to 'index' in order to use the dict keys as row labels.

In [25]:
pd.DataFrame.from_dict(dict([('P', [2, 3, 4]), ('Q', [5, 6, 7])]))
Out[25]:
P Q
0 2 5
1 3 6
2 4 7

If you pass orient='index', the keys will be the row labels. In this case, you can also pass the desired
column names:

In [26]:
pd.DataFrame.from_dict(dict([('P', [2, 3, 4]), ('Q', [5, 6, 7])]),
                       orient='index', columns=['one', 'two', 'three'])
Out[26]:
one two three
P 2 3 4
Q 5 6 7

DataFrame.from_records
DataFrame.from_records takes a list of tuples or an ndarray with structured dtype.

In [27]:
data
Out[27]:
array([(2, 3., b'Best'), (3, 4., b'Friend')],
      dtype=[('P', '<i4'), ('Q', '<f4'), ('R', 'S10')])
In [28]:
pd.DataFrame.from_records(data, index='R')
Out[28]:
P Q
R
b'Best' 2 3.0
b'Friend' 3 4.0

Column selection, addition, deletion

In [29]:
df['one']
Out[29]:
a    0.374851
b   -0.722451
c   -1.386522
d    1.422895
Name: one, dtype: float64
In [30]:
df['three'] = df['one'] * df['two']
In [31]:
df['flag'] = df['one'] > 2
df
Out[31]:
one two three four five flag
a 0.374851 1.575271 0.590491 bar True False
b -0.722451 -0.446845 0.322824 bar False False
c -1.386522 1.575828 -2.184920 bar False False
d 1.422895 -1.145874 -1.630458 bar True False

Columns can be deleted or popped like with a dict:

In [32]:
del df['two']
In [33]:
three = df.pop('three')
In [34]:
df
Out[34]:
one four five flag
a 0.374851 bar True False
b -0.722451 bar False False
c -1.386522 bar False False
d 1.422895 bar True False

When inserting a scalar value, it will naturally be propagated to fill the column:

In [35]:
df['foo'] = 'bar'
In [36]:
df
Out[36]:
one four five flag foo
a 0.374851 bar True False bar
b -0.722451 bar False False bar
c -1.386522 bar False False bar
d 1.422895 bar True False bar

When inserting a Series that does not have the same index as the DataFrame, it will be conformed to the
DataFrame’s index:

In [37]:
df['one_trunc'] = df['one'][:2]
In [38]:
df
Out[38]:
one four five flag foo one_trunc
a 0.374851 bar True False bar 0.374851
b -0.722451 bar False False bar -0.722451
c -1.386522 bar False False bar NaN
d 1.422895 bar True False bar NaN

You can insert raw ndarrays but their length must match the length of the DataFrame’s index.

By default, columns get inserted at the end. The insert function is available to insert at a particular
location in the columns:

In [39]:
df.insert(1, 'bar', df['one'])
In [40]:
df
Out[40]:
one bar four five flag foo one_trunc
a 0.374851 0.374851 bar True False bar 0.374851
b -0.722451 -0.722451 bar False False bar -0.722451
c -1.386522 -1.386522 bar False False bar NaN
d 1.422895 1.422895 bar True False bar NaN

Assigning new columns in method chains

In [41]:
iris = pd.read_csv('https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/d546eaee765268bf2f487608c537c05e22e4b221/iris.csv')
In [42]:
iris.head()
Out[42]:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
In [43]:
(iris.assign(sepal_ratio=iris['sepal_width'] / iris['sepal_length'])
      .head())
Out[43]:
sepal_length sepal_width petal_length petal_width species sepal_ratio
0 5.1 3.5 1.4 0.2 setosa 0.686275
1 4.9 3.0 1.4 0.2 setosa 0.612245
2 4.7 3.2 1.3 0.2 setosa 0.680851
3 4.6 3.1 1.5 0.2 setosa 0.673913
4 5.0 3.6 1.4 0.2 setosa 0.720000

In the example above, we inserted a precomputed value. We can also pass in a function of one argument to be
evaluated on the DataFrame being assigned to.

In [44]:
iris.assign(sepal_ratio=lambda x: (x['sepal_width'] / x['sepal_length'])).head()
Out[44]:
sepal_length sepal_width petal_length petal_width species sepal_ratio
0 5.1 3.5 1.4 0.2 setosa 0.686275
1 4.9 3.0 1.4 0.2 setosa 0.612245
2 4.7 3.2 1.3 0.2 setosa 0.680851
3 4.6 3.1 1.5 0.2 setosa 0.673913
4 5.0 3.6 1.4 0.2 setosa 0.720000

assign always returns a copy of the data, leaving the original DataFrame untouched.

In [45]:
(iris.query('sepal_length > 4')
      .assign(sepal_ratio=lambda x: x.sepal_width / x.sepal_length,
            petal_ratio=lambda x: x.petal_width / x.petal_length)
      .plot(kind='scatter', x='sepal_ratio', y='petal_ratio'))
Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x92360f0>

Indexing / selection
The basics of indexing are as follows:

Operation Syntax Result Select column df[col] Select column
Select row by label df.loc[label] Series Select row by integer location df.iloc[loc] Series Slice rows df[5:10] DataFrame Select rows by boolean vector df[bool_vec] DataFrame

Row selection, for example, returns a Series whose index is the columns of the DataFrame:

In [47]:
import numpy as np
import pandas as pd
In [48]:
d = {'one': pd.Series([2., 3., 4.], index=['p', 'q', 'r']),
     'two': pd.Series([2., 3., 4., 5.], index=['p', 'q', 'r', 's'])}
In [49]:
df = pd.DataFrame(d)
In [50]:
df.loc['q']
Out[50]:
one    3.0
two    3.0
Name: q, dtype: float64

For a more exhaustive treatment of sophisticated label-based indexing and slicing, see the section
on indexing. We will address the fundamentals of reindexing / conforming to new sets of labels in the
section on reindexing.

Data alignment and arithmetic
Data alignment between DataFrame objects automatically align on both the columns and the index
(row labels). Again, the resulting object will have the union of the column and row labels.

In [51]:
import numpy as np
import pandas as pd
In [52]:
df = pd.DataFrame(np.random.randn(8, 4), columns=['P', 'Q', 'R', 'S'])
In [53]:
df2 = pd.DataFrame(np.random.randn(9, 3), columns=['P', 'Q', 'R'])
In [54]:
df + df2
Out[54]:
P Q R S
0 -1.359783 -0.362051 2.138860 NaN
1 1.336226 -0.040574 -1.115697 NaN
2 0.607396 0.102404 -0.569467 NaN
3 0.447739 0.898215 -0.298585 NaN
4 -0.080552 0.106972 -1.896163 NaN
5 0.279951 3.039250 0.895204 NaN
6 2.216397 0.493784 1.881839 NaN
7 0.234132 -1.667978 1.391710 NaN
8 NaN NaN NaN NaN

When doing an operation between DataFrame and Series, the default behavior is to align the Series index
on the DataFrame columns, thus broadcasting row-wise. For example:

In [55]:
 df - df.iloc[0]
Out[55]:
P Q R S
0 0.000000 0.000000 0.000000 0.000000
1 2.341762 -0.437433 -1.774010 -1.113588
2 0.865723 0.373685 -1.407959 -3.147135
3 2.369243 0.895406 -1.702355 -1.851926
4 1.958280 1.686684 -2.913750 -3.017007
5 2.228300 1.197087 0.170023 -2.952011
6 2.629536 0.359690 -1.355995 -2.316860
7 -0.537236 0.538296 -0.258632 -2.835196

In the special case of working with time series data, if the DataFrame index contains dates,
the broadcasting will be column-wise:

In [56]:
index = pd.date_range('1/1/2019', periods=6)
In [57]:
df = pd.DataFrame(np.random.randn(6, 3), index=index, columns=list('XYZ'))
In [58]:
df
Out[58]:
X Y Z
2019-01-01 0.723344 -0.086666 0.345296
2019-01-02 0.447144 -0.016384 -0.256257
2019-01-03 0.100551 -1.133672 0.038232
2019-01-04 -1.461892 -0.217720 -0.919715
2019-01-05 -0.093798 -0.058988 -0.569805
2019-01-06 -0.594957 0.881995 0.161602
In [59]:
type(df['X'])
Out[59]:
pandas.core.series.Series
In [60]:
df - df['X']
Out[60]:
2019-01-01 00:00:00 2019-01-02 00:00:00 2019-01-03 00:00:00 2019-01-04 00:00:00 2019-01-05 00:00:00 2019-01-06 00:00:00 X Y Z
2019-01-01 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-02 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-03 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-05 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2019-01-06 NaN NaN NaN NaN NaN NaN NaN NaN NaN

For explicit control over the matching and broadcasting behavior.

Operations with scalars are just as you would expect:

In [61]:
df * 4 + 2
Out[61]:
X Y Z
2019-01-01 4.893376 1.653334 3.381182
2019-01-02 3.788575 1.934465 0.974971
2019-01-03 2.402204 -2.534689 2.152928
2019-01-04 -3.847566 1.129118 -1.678859
2019-01-05 1.624809 1.764047 -0.279221
2019-01-06 -0.379829 5.527982 2.646408
In [62]:
1 / df
Out[62]:
X Y Z
2019-01-01 1.382468 -11.538486 2.896070
2019-01-02 2.236417 -61.035978 -3.902330
2019-01-03 9.945196 -0.882089 26.156106
2019-01-04 -0.684045 -4.593046 -1.087294
2019-01-05 -10.661233 -16.952541 -1.754986
2019-01-06 -1.680793 1.133793 6.188043
In [63]:
df ** 6
Out[63]:
X Y Z
2019-01-01 1.432417e-01 4.237475e-07 1.694907e-03
2019-01-02 7.992510e-03 1.934125e-11 2.831756e-04
2019-01-03 1.033523e-06 2.122880e+00 3.122924e-09
2019-01-04 9.760924e+00 1.065112e-04 6.052274e-01
2019-01-05 6.810132e-07 4.212997e-08 3.422619e-02
2019-01-06 4.435215e-02 4.707584e-01 1.781066e-05

Boolean operators work as well:

In [64]:
df1 = pd.DataFrame({'x': [1, 0, 1], 'y': [0, 1, 1]}, dtype=bool)
In [65]:
df2 = pd.DataFrame({'x': [0, 1, 1], 'y': [1, 1, 0]}, dtype=bool)
In [66]:
df1 & df2
Out[66]:
x y
0 False False
1 False True
2 True False
In [67]:
df1 | df2
Out[67]:
x y
0 True True
1 True True
2 True True
In [68]:
df1 ^ df2
Out[68]:
x y
0 True True
1 True False
2 False True
In [69]:
-df1
Out[69]:
x y
0 False True
1 True False
2 False False

Show the first 5 rows:

In [70]:
df[:5].T
Out[70]:
2019-01-01 00:00:00 2019-01-02 00:00:00 2019-01-03 00:00:00 2019-01-04 00:00:00 2019-01-05 00:00:00
X 0.723344 0.447144 0.100551 -1.461892 -0.093798
Y -0.086666 -0.016384 -1.133672 -0.217720 -0.058988
Z 0.345296 -0.256257 0.038232 -0.919715 -0.569805

DataFrame interoperability with NumPy functions

In [71]:
np.exp(df)
Out[71]:
X Y Z
2019-01-01 2.061315 0.916983 1.412407
2019-01-02 1.563839 0.983750 0.773943
2019-01-03 1.105780 0.321849 1.038972
2019-01-04 0.231797 0.804350 0.398633
2019-01-05 0.910467 0.942718 0.565636
2019-01-06 0.551586 2.415715 1.175392
In [72]:
np.asarray(df)
Out[72]:
array([[ 0.72334402, -0.08666649,  0.34529553],
       [ 0.44714378, -0.01638378, -0.25625714],
       [ 0.10055106, -1.13367237,  0.03823199],
       [-1.46189155, -0.21772045, -0.91971464],
       [-0.09379779, -0.05898821, -0.56980521],
       [-0.59495728,  0.88199546,  0.16160197]])

pandas automatically align labeled inputs as part of a ufunc with multiple inputs.
For example, using numpy.remainder() on two Series with differently ordered labels will
align before the operation.

In [73]:
ser1 = pd.Series([2, 3, 4], index=['p', 'q', 'r'])
In [74]:
ser2 = pd.Series([3, 4, 5], index=['q', 'p', 'r'])
In [75]:
ser1
Out[75]:
p    2
q    3
r    4
dtype: int64
In [76]:
ser2
Out[76]:
q    3
p    4
r    5
dtype: int64
In [77]:
np.remainder(ser1, ser2)
Out[77]:
p    2
q    3
r    4
dtype: int64

As usual, the union of the two indices is taken, and non-overlapping values are filled with missing values.

In [78]:
ser3 = pd.Series([4, 6, 8], index=['q', 'r', 's'])
In [79]:
ser3
Out[79]:
q    4
r    6
s    8
dtype: int64
In [80]:
np.remainder(ser1, ser3)
Out[80]:
p    2
q    3
r    4
dtype: int64

When a binary ufunc is applied to a Series and Index, the Series implementation takes precedence and
a Series is returned.

In [81]:
ser = pd.Series([2, 3, 4])
In [82]:
idx = pd.Index([5, 6, 7])
In [83]:
np.maximum(ser, idx)
Out[83]:
0    5
1    6
2    7
dtype: int64

NumPy ufuncs are safe to apply to Series backed by non-ndarray arrays.
If possible, the ufunc is applied without converting the underlying data to an ndarray.

Console display

Very large DataFrames will be truncated to display them in the console.

In [84]:
baseball = pd.read_csv('https://raw.githubusercontent.com/pandas-dev/pandas/master/doc/data/baseball.csv')
In [85]:
print(baseball)
       id     player  year  stint team  lg    g   ab   r    h  ...    rbi  \
0   88641  womacto01  2006      2  CHN  NL   19   50   6   14  ...    2.0   
1   88643  schilcu01  2006      1  BOS  AL   31    2   0    1  ...    0.0   
2   88645  myersmi01  2006      1  NYA  AL   62    0   0    0  ...    0.0   
3   88649  helliri01  2006      1  MIL  NL   20    3   0    0  ...    0.0   
4   88650  johnsra05  2006      1  NYA  AL   33    6   0    1  ...    0.0   
5   88652  finlest01  2006      1  SFN  NL  139  426  66  105  ...   40.0   
6   88653  gonzalu01  2006      1  ARI  NL  153  586  93  159  ...   73.0   
7   88662   seleaa01  2006      1  LAN  NL   28   26   2    5  ...    0.0   
8   89177  francju01  2007      2  ATL  NL   15   40   1   10  ...    8.0   
9   89178  francju01  2007      1  NYN  NL   40   50   7   10  ...    8.0   
10  89330   zaungr01  2007      1  TOR  AL  110  331  43   80  ...   52.0   
11  89333  witasja01  2007      1  TBA  AL    3    0   0    0  ...    0.0   
12  89334  williwo02  2007      1  HOU  NL   33   59   3    6  ...    2.0   
13  89335  wickmbo01  2007      2  ARI  NL    8    0   0    0  ...    0.0   
14  89336  wickmbo01  2007      1  ATL  NL   47    0   0    0  ...    0.0   
15  89337  whitero02  2007      1  MIN  AL   38  109   8   19  ...   20.0   
16  89338  whiteri01  2007      1  HOU  NL   20    1   0    0  ...    0.0   
17  89339  wellsda01  2007      2  LAN  NL    7   15   2    4  ...    1.0   
18  89340  wellsda01  2007      1  SDN  NL   22   38   1    4  ...    0.0   
19  89341  weathda01  2007      1  CIN  NL   67    0   0    0  ...    0.0   
20  89343  walketo04  2007      1  OAK  AL   18   48   5   13  ...    4.0   
21  89345  wakefti01  2007      1  BOS  AL    1    2   0    0  ...    0.0   
22  89347  vizquom01  2007      1  SFN  NL  145  513  54  126  ...   51.0   
23  89348  villoro01  2007      1  NYA  AL    6    0   0    0  ...    0.0   
24  89352  valenjo03  2007      1  NYN  NL   51  166  18   40  ...   18.0   
25  89354  trachst01  2007      2  CHN  NL    4    7   0    1  ...    0.0   
26  89355  trachst01  2007      1  BAL  AL    3    5   0    0  ...    0.0   
27  89359  timlimi01  2007      1  BOS  AL    4    0   0    0  ...    0.0   
28  89360  thomeji01  2007      1  CHA  AL  130  432  79  119  ...   96.0   
29  89361  thomafr04  2007      1  TOR  AL  155  531  63  147  ...   95.0   
..    ...        ...   ...    ...  ...  ..  ...  ...  ..  ...  ...    ...   
70  89460  guarded01  2007      1  CIN  NL   15    0   0    0  ...    0.0   
71  89462  griffke02  2007      1  CIN  NL  144  528  78  146  ...   93.0   
72  89463  greensh01  2007      1  NYN  NL  130  446  62  130  ...   46.0   
73  89464  graffto01  2007      1  MIL  NL   86  231  34   55  ...   30.0   
74  89465  gordoto01  2007      1  PHI  NL   44    0   0    0  ...    0.0   
75  89466  gonzalu01  2007      1  LAN  NL  139  464  70  129  ...   68.0   
76  89467  gomezch02  2007      2  CLE  AL   19   53   4   15  ...    5.0   
77  89468  gomezch02  2007      1  BAL  AL   73  169  17   51  ...   16.0   
78  89469  glavito02  2007      1  NYN  NL   33   56   3   12  ...    4.0   
79  89473  floydcl01  2007      1  CHN  NL  108  282  40   80  ...   45.0   
80  89474  finlest01  2007      1  COL  NL   43   94   9   17  ...    2.0   
81  89480  embreal01  2007      1  OAK  AL    4    0   0    0  ...    0.0   
82  89481  edmonji01  2007      1  SLN  NL  117  365  39   92  ...   53.0   
83  89482  easleda01  2007      1  NYN  NL   76  193  24   54  ...   26.0   
84  89489  delgaca01  2007      1  NYN  NL  139  538  71  139  ...   87.0   
85  89493  cormirh01  2007      1  CIN  NL    6    0   0    0  ...    0.0   
86  89494  coninje01  2007      2  NYN  NL   21   41   2    8  ...    5.0   
87  89495  coninje01  2007      1  CIN  NL   80  215  23   57  ...   32.0   
88  89497  clemero02  2007      1  NYA  AL    2    2   0    1  ...    0.0   
89  89498  claytro01  2007      2  BOS  AL    8    6   1    0  ...    0.0   
90  89499  claytro01  2007      1  TOR  AL   69  189  23   48  ...   12.0   
91  89501  cirilje01  2007      2  ARI  NL   28   40   6    8  ...    6.0   
92  89502  cirilje01  2007      1  MIN  AL   50  153  18   40  ...   21.0   
93  89521  bondsba01  2007      1  SFN  NL  126  340  75   94  ...   66.0   
94  89523  biggicr01  2007      1  HOU  NL  141  517  68  130  ...   50.0   
95  89525  benitar01  2007      2  FLO  NL   34    0   0    0  ...    0.0   
96  89526  benitar01  2007      1  SFN  NL   19    0   0    0  ...    0.0   
97  89530  ausmubr01  2007      1  HOU  NL  117  349  38   82  ...   25.0   
98  89533   aloumo01  2007      1  NYN  NL   87  328  51  112  ...   49.0   
99  89534  alomasa02  2007      1  NYN  NL    8   22   1    3  ...    0.0   

      sb   cs   bb     so   ibb   hbp    sh   sf  gidp  
0    1.0  1.0    4    4.0   0.0   0.0   3.0  0.0   0.0  
1    0.0  0.0    0    1.0   0.0   0.0   0.0  0.0   0.0  
2    0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
3    0.0  0.0    0    2.0   0.0   0.0   0.0  0.0   0.0  
4    0.0  0.0    0    4.0   0.0   0.0   0.0  0.0   0.0  
5    7.0  0.0   46   55.0   2.0   2.0   3.0  4.0   6.0  
6    0.0  1.0   69   58.0  10.0   7.0   0.0  6.0  14.0  
7    0.0  0.0    1    7.0   0.0   0.0   6.0  0.0   1.0  
8    0.0  0.0    4   10.0   1.0   0.0   0.0  1.0   1.0  
9    2.0  1.0   10   13.0   0.0   0.0   0.0  1.0   1.0  
10   0.0  0.0   51   55.0   8.0   2.0   1.0  6.0   9.0  
11   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
12   0.0  0.0    0   25.0   0.0   0.0   5.0  0.0   1.0  
13   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
14   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
15   0.0  0.0    6   19.0   0.0   3.0   0.0  1.0   2.0  
16   0.0  0.0    0    1.0   0.0   0.0   0.0  0.0   0.0  
17   0.0  0.0    0    6.0   0.0   0.0   0.0  0.0   0.0  
18   0.0  0.0    0   12.0   0.0   0.0   4.0  0.0   0.0  
19   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
20   0.0  0.0    2    4.0   0.0   0.0   0.0  2.0   2.0  
21   0.0  0.0    0    2.0   0.0   0.0   0.0  0.0   0.0  
22  14.0  6.0   44   48.0   6.0   1.0  14.0  3.0  14.0  
23   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
24   2.0  1.0   15   28.0   4.0   0.0   1.0  1.0   5.0  
25   0.0  0.0    0    1.0   0.0   0.0   0.0  0.0   0.0  
26   0.0  0.0    0    3.0   0.0   0.0   0.0  0.0   0.0  
27   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
28   0.0  1.0   95  134.0  11.0   6.0   0.0  3.0  10.0  
29   0.0  0.0   81   94.0   3.0   7.0   0.0  5.0  14.0  
..   ...  ...  ...    ...   ...   ...   ...  ...   ...  
70   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
71   6.0  1.0   85   99.0  14.0   1.0   0.0  9.0  14.0  
72  11.0  1.0   37   62.0   4.0   5.0   1.0  1.0  14.0  
73   0.0  1.0   24   44.0   6.0   3.0   0.0  2.0   7.0  
74   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
75   6.0  2.0   56   56.0   4.0   4.0   0.0  2.0  11.0  
76   0.0  0.0    0    6.0   0.0   0.0   1.0  1.0   1.0  
77   1.0  2.0   10   20.0   1.0   0.0   5.0  1.0   5.0  
78   0.0  0.0    6    5.0   0.0   0.0  12.0  1.0   0.0  
79   0.0  0.0   35   47.0   5.0   5.0   0.0  0.0   6.0  
80   0.0  0.0    8    4.0   1.0   0.0   0.0  0.0   2.0  
81   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
82   0.0  2.0   41   75.0   2.0   0.0   2.0  3.0   9.0  
83   0.0  1.0   19   35.0   1.0   5.0   0.0  1.0   2.0  
84   4.0  0.0   52  118.0   8.0  11.0   0.0  6.0  12.0  
85   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
86   0.0  0.0    7    8.0   2.0   0.0   1.0  1.0   1.0  
87   4.0  0.0   20   28.0   0.0   0.0   1.0  6.0   4.0  
88   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
89   0.0  0.0    0    3.0   0.0   0.0   0.0  0.0   2.0  
90   2.0  1.0   14   50.0   0.0   1.0   3.0  3.0   8.0  
91   0.0  0.0    4    6.0   0.0   0.0   0.0  0.0   1.0  
92   2.0  0.0   15   13.0   0.0   1.0   3.0  2.0   9.0  
93   5.0  0.0  132   54.0  43.0   3.0   0.0  2.0  13.0  
94   4.0  3.0   23  112.0   0.0   3.0   7.0  5.0   5.0  
95   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
96   0.0  0.0    0    0.0   0.0   0.0   0.0  0.0   0.0  
97   6.0  1.0   37   74.0   3.0   6.0   4.0  1.0  11.0  
98   3.0  0.0   27   30.0   5.0   2.0   0.0  3.0  13.0  
99   0.0  0.0    0    3.0   0.0   0.0   0.0  0.0   0.0  

[100 rows x 23 columns]
In [86]:
baseball.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 23 columns):
id        100 non-null int64
player    100 non-null object
year      100 non-null int64
stint     100 non-null int64
team      100 non-null object
lg        100 non-null object
g         100 non-null int64
ab        100 non-null int64
r         100 non-null int64
h         100 non-null int64
X2b       100 non-null int64
X3b       100 non-null int64
hr        100 non-null int64
rbi       100 non-null float64
sb        100 non-null float64
cs        100 non-null float64
bb        100 non-null int64
so        100 non-null float64
ibb       100 non-null float64
hbp       100 non-null float64
sh        100 non-null float64
sf        100 non-null float64
gidp      100 non-null float64
dtypes: float64(9), int64(11), object(3)
memory usage: 18.0+ KB

However, using to_string will return a string representation of the DataFrame in tabular form, though
it won’t always fit the console width:

In [87]:
print(baseball.iloc[-20:, :10].to_string())
       id     player  year  stint team  lg    g   ab   r    h
80  89474  finlest01  2007      1  COL  NL   43   94   9   17
81  89480  embreal01  2007      1  OAK  AL    4    0   0    0
82  89481  edmonji01  2007      1  SLN  NL  117  365  39   92
83  89482  easleda01  2007      1  NYN  NL   76  193  24   54
84  89489  delgaca01  2007      1  NYN  NL  139  538  71  139
85  89493  cormirh01  2007      1  CIN  NL    6    0   0    0
86  89494  coninje01  2007      2  NYN  NL   21   41   2    8
87  89495  coninje01  2007      1  CIN  NL   80  215  23   57
88  89497  clemero02  2007      1  NYA  AL    2    2   0    1
89  89498  claytro01  2007      2  BOS  AL    8    6   1    0
90  89499  claytro01  2007      1  TOR  AL   69  189  23   48
91  89501  cirilje01  2007      2  ARI  NL   28   40   6    8
92  89502  cirilje01  2007      1  MIN  AL   50  153  18   40
93  89521  bondsba01  2007      1  SFN  NL  126  340  75   94
94  89523  biggicr01  2007      1  HOU  NL  141  517  68  130
95  89525  benitar01  2007      2  FLO  NL   34    0   0    0
96  89526  benitar01  2007      1  SFN  NL   19    0   0    0
97  89530  ausmubr01  2007      1  HOU  NL  117  349  38   82
98  89533   aloumo01  2007      1  NYN  NL   87  328  51  112
99  89534  alomasa02  2007      1  NYN  NL    8   22   1    3

Wide DataFrames will be printed across multiple rows by default:

In [88]:
pd.DataFrame(np.random.randn(4, 10))
Out[88]:
0 1 2 3 4 5 6 7 8 9
0 -1.162526 0.474868 0.157520 -0.226966 0.288694 -0.204342 -0.454281 -0.492041 0.626862 0.603523
1 0.741971 -0.570777 2.065719 0.844021 0.010747 1.188861 -1.831683 -0.340063 0.471729 0.140003
2 0.906849 0.335401 -0.159554 2.874012 1.539447 1.144927 -0.902080 -0.008464 0.475082 -0.746442
3 -0.377446 0.023604 -0.616251 1.465809 -0.255740 -1.556993 -1.046476 2.627271 1.470708 0.409682

You can change how much to print on a single row by setting the display.width option:

In [89]:
pd.set_option('display.width', 30)
In [90]:
pd.DataFrame(np.random.randn(4, 10))
Out[90]:
0 1 2 3 4 5 6 7 8 9
0 -2.318650 -0.313811 1.507447 -0.665551 0.694109 -0.522585 0.100735 -0.246376 1.584034 -0.390303
1 1.120756 -0.513528 -1.517324 0.908516 0.356448 0.036096 2.135186 -0.004657 1.790709 -0.302300
2 -0.677450 1.360683 -0.929337 -0.739711 0.434875 0.171721 -0.394395 1.956881 0.874484 -0.963741
3 1.195355 0.090944 -0.699150 -0.653663 0.075695 -0.140222 0.132759 -0.377024 -0.758761 -0.642230

You can adjust the max width of the individual columns by setting display.max_colwidth

In [91]:
datafile = {'filename': ['filename_01', 'filename_02'],
             'path': ["media/user_name/storage/folder_01/filename_01",
                      "media/user_name/storage/folder_02/filename_02"]}
In [92]:
pd.set_option('display.max_colwidth', 40)
In [93]:
pd.DataFrame(datafile)
Out[93]:
filename path
0 filename_01 media/user_name/storage/folder_01/fi...
1 filename_02 media/user_name/storage/folder_02/fi...
In [95]:
pd.set_option('display.max_colwidth', 100)
In [96]:
pd.DataFrame(datafile)
Out[96]:
filename path
0 filename_01 media/user_name/storage/folder_01/filename_01
1 filename_02 media/user_name/storage/folder_02/filename_02

You can also disable this feature via the expand_frame_repr option. This will print the table in one block.

DataFrame column attribute access and IPython completion

If a DataFrame column label is a valid Python variable name, the column can be accessed like an attribute:

In [97]:
df = pd.DataFrame({'boo1': np.random.randn(4),
                   'boo2': np.random.randn(4)})
df
Out[97]:
boo1 boo2
0 1.073844 0.586491
1 1.050296 0.241025
2 0.726989 0.725950
3 1.253145 -0.064325
In [98]:
df.boo2
Out[98]:
0    0.586491
1    0.241025
2    0.725950
3   -0.064325
Name: boo2, dtype: float64