In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [3]:
s = pd.Series([1, 3, 5, np.nan, 6, 8]) # 一维序列
In [4]:
s
Out[4]:
0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64
In [6]:
dates = pd.date_range('20130101', periods=6) # 时间序列
In [7]:
dates
Out[7]:
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
In [8]:
df = pd.DataFrame(np.random.randn(6,4), 
                          index=dates, 
                          columns=list('ABCD')) # 二维数据框
In [9]:
df
Out[9]:
A B C D
2013-01-01 0.642333 -0.772970 -0.823404 0.495468
2013-01-02 0.185380 -0.350632 0.220256 -0.082338
2013-01-03 -0.437222 -0.183861 -0.901640 0.518012
2013-01-04 0.108046 -1.118019 1.342771 -0.902833
2013-01-05 2.197607 1.352511 1.631912 0.098881
2013-01-06 0.421616 1.796068 -0.811890 1.328256
In [10]:
df2 = pd.DataFrame({'A' : 1.,
                    'B' : pd.Timestamp('20130102'),
                    'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
                    'D' : np.array([3] * 4,dtype='int32'),
                    'E' : pd.Categorical(["test","train","test","train"]),
                    'F' : 'foo' })
In [11]:
df2
Out[11]:
A B C D E F
0 1.0 2013-01-02 1.0 3 test foo
1 1.0 2013-01-02 1.0 3 train foo
2 1.0 2013-01-02 1.0 3 test foo
3 1.0 2013-01-02 1.0 3 train foo
In [12]:
df2.dtypes # 字段类型
Out[12]:
A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object
In [14]:
df2.head(2) # 前两行
Out[14]:
A B C D E F
0 1.0 2013-01-02 1.0 3 test foo
1 1.0 2013-01-02 1.0 3 train foo
In [15]:
df2.tail(3) # 后两行
Out[15]:
A B C D E F
1 1.0 2013-01-02 1.0 3 train foo
2 1.0 2013-01-02 1.0 3 test foo
3 1.0 2013-01-02 1.0 3 train foo
In [19]:
df.index
Out[19]:
DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')
In [18]:
df2.index
Out[18]:
Int64Index([0, 1, 2, 3], dtype='int64')
In [20]:
df.values
Out[20]:
array([[ 0.64233272, -0.77297006, -0.82340364,  0.49546812],
       [ 0.18537978, -0.35063155,  0.22025568, -0.08233841],
       [-0.4372219 , -0.18386053, -0.9016397 ,  0.51801185],
       [ 0.10804587, -1.11801911,  1.34277136, -0.90283271],
       [ 2.19760726,  1.35251076,  1.6319119 ,  0.09888119],
       [ 0.42161594,  1.79606758, -0.81189005,  1.32825639]])
In [21]:
df.T # 转置
Out[21]:
2013-01-01 00:00:00 2013-01-02 00:00:00 2013-01-03 00:00:00 2013-01-04 00:00:00 2013-01-05 00:00:00 2013-01-06 00:00:00
A 0.642333 0.185380 -0.437222 0.108046 2.197607 0.421616
B -0.772970 -0.350632 -0.183861 -1.118019 1.352511 1.796068
C -0.823404 0.220256 -0.901640 1.342771 1.631912 -0.811890
D 0.495468 -0.082338 0.518012 -0.902833 0.098881 1.328256
In [22]:
df.sort_values(by='B') # 按某一列排序
Out[22]:
A B C D
2013-01-04 0.108046 -1.118019 1.342771 -0.902833
2013-01-01 0.642333 -0.772970 -0.823404 0.495468
2013-01-02 0.185380 -0.350632 0.220256 -0.082338
2013-01-03 -0.437222 -0.183861 -0.901640 0.518012
2013-01-05 2.197607 1.352511 1.631912 0.098881
2013-01-06 0.421616 1.796068 -0.811890 1.328256
In [23]:
df['A'] # 投影
Out[23]:
2013-01-01    0.642333
2013-01-02    0.185380
2013-01-03   -0.437222
2013-01-04    0.108046
2013-01-05    2.197607
2013-01-06    0.421616
Freq: D, Name: A, dtype: float64
In [26]:
df[0:2] # 按行下标切片
Out[26]:
A B C D
2013-01-01 0.642333 -0.772970 -0.823404 0.495468
2013-01-02 0.185380 -0.350632 0.220256 -0.082338
In [27]:
df['20130102':'20130104'] # 按索引切片
Out[27]:
A B C D
2013-01-02 0.185380 -0.350632 0.220256 -0.082338
2013-01-03 -0.437222 -0.183861 -0.901640 0.518012
2013-01-04 0.108046 -1.118019 1.342771 -0.902833
In [28]:
df.loc[:,['A','B']] # 按列切片
Out[28]:
A B
2013-01-01 0.642333 -0.772970
2013-01-02 0.185380 -0.350632
2013-01-03 -0.437222 -0.183861
2013-01-04 0.108046 -1.118019
2013-01-05 2.197607 1.352511
2013-01-06 0.421616 1.796068
In [29]:
df.loc['20130102':'20130104',['A','B']] # 同时按行列切片
Out[29]:
A B
2013-01-02 0.185380 -0.350632
2013-01-03 -0.437222 -0.183861
2013-01-04 0.108046 -1.118019
In [41]:
df.loc['20130102']['A']
Out[41]:
0.18537978096442437
In [39]:
df.iloc[0]['A']
Out[39]:
0.6423327151848488
In [42]:
df.iloc[3:5,0:2]
Out[42]:
A B
2013-01-04 0.108046 -1.118019
2013-01-05 2.197607 1.352511
In [43]:
df.iloc[[1,2,4],[0,2]]
Out[43]:
A C
2013-01-02 0.185380 0.220256
2013-01-03 -0.437222 -0.901640
2013-01-05 2.197607 1.631912
In [44]:
df[df['A'] > 0] # 按条件进行筛选
Out[44]:
A B C D
2013-01-01 0.642333 -0.772970 -0.823404 0.495468
2013-01-02 0.185380 -0.350632 0.220256 -0.082338
2013-01-04 0.108046 -1.118019 1.342771 -0.902833
2013-01-05 2.197607 1.352511 1.631912 0.098881
2013-01-06 0.421616 1.796068 -0.811890 1.328256
In [45]:
 df.groupby('A').sum() # 聚合
Out[45]:
B C D
A
-0.437222 -0.183861 -0.901640 0.518012
0.108046 -1.118019 1.342771 -0.902833
0.185380 -0.350632 0.220256 -0.082338
0.421616 1.796068 -0.811890 1.328256
0.642333 -0.772970 -0.823404 0.495468
2.197607 1.352511 1.631912 0.098881
In [46]:
df.groupby(['A','B']).sum()
Out[46]:
C D
A B
-0.437222 -0.183861 -0.901640 0.518012
0.108046 -1.118019 1.342771 -0.902833
0.185380 -0.350632 0.220256 -0.082338
0.421616 1.796068 -0.811890 1.328256
0.642333 -0.772970 -0.823404 0.495468
2.197607 1.352511 1.631912 0.098881
In [47]:
news = pd.read_csv("news.csv")
In [48]:
news
Out[48]:
ID score title url
0 1 1 Reverse engineering YouTube demonetization al... https://docs.google.com/document/d/155yNpfR7dG...
1 2 9 Joplin A note-taking and to-do app with build... https://github.com/laurent22/joplin/
2 3 4 Coinbase Ordered to Turn Over Identities of 1... https://motherboard.vice.com/en_us/article/ywn...
3 4 8 Australian uses snack bags as Faraday cage to... https://arstechnica.com/information-technology...
4 5 3 A blog I started on Neural Networks and Proba... https://jontysinai.github.io
5 6 2 It Looks Like Nobel Economics Laureates Don't... https://www.bloomberg.com/news/articles/2017-1...
6 7 7 Seventh RISC-V Workshop: Day Two - LowRISC http://www.lowrisc.org/blog/2017/11/seventh-ri...
7 8 6 China's Art Factories: Van Gogh from the Swea... http://www.spiegel.de/international/0,1518,433...
8 9 5 Judge Tells Uber Lawyer: 'It Looks Like You C... https://www.nytimes.com/2017/11/29/business/wa...
9 10 0 As a solo developer, I decided to offer phone... http://plumshell.com/2017/11/30/as-a-solo-app-...
10 11 1 Why Did ProtonMail Vanish from Google Search ... http://techcrunch.com/2016/10/27/why-did-proto...
11 12 6 Weekly Machine Learning Toolset and Library R... https://blog.pocketcluster.io/2017/11/30/weekl...
12 13 8 50 Years Ago Jocelyn Bell Discovered Pulsars https://www.space.com/38912-pulsar-discovery-b...
13 14 6 Advent of Code 2017 https://adventofcode.com/2017
14 15 9 SafeButler (YC S17) is hiring employee #2 to ... item?id=15815913
15 16 4 BTC addresses whose private keys are from Sha... https://twitter.com/4Dgifts/status/93622348798...
16 17 5 Review and Teardown of a Cheap GPS Jammer http://phasenoise.livejournal.com/2017/11/3185...
17 18 4 Coinbase Obtains Partial Victory Over IRS https://blog.coinbase.com/coinbase-obtains-par...
18 19 2 How to Profit from Bitcoin Bubble http://www.danielwilczynski.com/2017/11/29/bit...
19 20 7 Linux Vendor Firmware Service https://fwupd.org/
20 21 8 Building mindshare in a company http://www.writethedocs.org/guide/writing/mind...
21 22 2 How Cubism Protected Warships in WorldWar I https://www.wired.com/story/dazzle-camouflage-...
22 23 3 The Model Book of Calligraphy (1561-1596) http://publicdomainreview.org/collections/the-...
23 24 4 How do you move out of a smarthome? https://shkspr.mobi/blog/2017/11/how-do-you-mo...
24 25 7 Software Giant Autodesk to Axe 13% of Global ... http://www.animationmagazine.net/people/softwa...
25 26 8 AWS EC2 Virtualization 2017: Including Nitro http://www.brendangregg.com/blog/2017-11-29/aw...
26 27 6 Google faces UK legal action for bypassing iP... http://www.bbc.co.uk/news/technology-42166089
27 28 3 OptionPlan, an app for founders looking to de... https://www.indexventures.com/optionplan
28 29 9 The Peculiarly Quiet Decline and Fall of the KVM http://www.loper-os.org/?p=1927
29 30 1 Nvidia has confirmed a driver bug resulting i... https://www.gamingonlinux.com/articles/nvidia-...
In [ ]: