Sie sind auf Seite 1von 11

bitcoin

April 21, 2024

[ ]: import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

[ ]: df = pd.read_csv('/content/BitcoinDataset3.csv')

[ ]: df.head()

[ ]: Date Price Open High Low Vol. Change %


0 Aug 02, 2020 11,105.80 11,802.60 12,061.10 10,730.70 698.62K -5.91%
1 Aug 01, 2020 11,803.10 11,333.20 11,847.70 11,226.10 611.47K 4.14%
2 Jul 31, 2020 11,333.40 11,096.50 11,434.80 10,964.60 530.95K 2.14%
3 Jul 30, 2020 11,096.20 11,105.80 11,164.40 10,861.60 501.14K -0.09%
4 Jul 29, 2020 11,105.90 10,908.40 11,336.50 10,771.80 576.83K 1.81%

[ ]: df['Price']=df['Price'].replace(",", "", regex=True)

[ ]: df.head()

[ ]: Date Price Open High Low Vol. Change %


0 Aug 02, 2020 11105.80 11,802.60 12,061.10 10,730.70 698.62K -5.91%
1 Aug 01, 2020 11803.10 11,333.20 11,847.70 11,226.10 611.47K 4.14%
2 Jul 31, 2020 11333.40 11,096.50 11,434.80 10,964.60 530.95K 2.14%
3 Jul 30, 2020 11096.20 11,105.80 11,164.40 10,861.60 501.14K -0.09%
4 Jul 29, 2020 11105.90 10,908.40 11,336.50 10,771.80 576.83K 1.81%

[ ]: df['Open']=df['Open'].replace(",","", regex=True)
df['High']=df['High'].replace(",", "", regex=True)
df['Vol.']=df['Vol.'].replace("K", "", regex=True)
df['Vol.']=df['Vol.'].replace("M", "", regex=True)

[ ]: df['Change %']=df['Change %'].replace("%","", regex= True)

[ ]: df['Low']=df['Low'].replace(",", "", regex=True)

[ ]: df.head()

1
[ ]: Date Price Open High Low Vol. Change %
0 Aug 02, 2020 11105.80 11802.60 12061.10 10,730.70 698.62 -5.91
1 Aug 01, 2020 11803.10 11333.20 11847.70 11,226.10 611.47 4.14
2 Jul 31, 2020 11333.40 11096.50 11434.80 10,964.60 530.95 2.14
3 Jul 30, 2020 11096.20 11105.80 11164.40 10,861.60 501.14 -0.09
4 Jul 29, 2020 11105.90 10908.40 11336.50 10,771.80 576.83 1.81

[ ]: df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190 entries, 0 to 1189
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 1190 non-null object
1 Price 1190 non-null object
2 Open 1190 non-null object
3 High 1190 non-null object
4 Low 1190 non-null object
5 Vol. 1187 non-null object
6 Change % 1190 non-null object
dtypes: object(7)
memory usage: 65.2+ KB

[ ]: df['High']=df['High'].astype(float)
df['Low']=df['Low'].astype(float)
df['Vol.']=df['Vol.'].astype(float)
df['Open']=df['Open'].astype(float)
df['Price']=df['Price'].astype(float)
df['Change %']= pd.to_numeric(df['Change %'],errors ='coerce')

[ ]: df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190 entries, 0 to 1189
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 1190 non-null object
1 Price 1190 non-null float64
2 Open 1190 non-null float64
3 High 1190 non-null float64
4 Low 1190 non-null float64
5 Vol. 1187 non-null float64
6 Change % 1190 non-null float64
dtypes: float64(6), object(1)
memory usage: 65.2+ KB

2
[ ]: df['Date']=pd.to_datetime(df['Date'])

[ ]: df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1190 entries, 0 to 1189
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Date 1190 non-null datetime64[ns]
1 Price 1190 non-null float64
2 Open 1190 non-null float64
3 High 1190 non-null float64
4 Low 1190 non-null float64
5 Vol. 1187 non-null float64
6 Change % 1190 non-null float64
dtypes: datetime64[ns](1), float64(6)
memory usage: 65.2 KB

[ ]: df.set_index(df['Date'], inplace=True)

[ ]: df.head()

[ ]: Date Price Open High Low Vol. Change %


Date
2020-08-02 2020-08-02 11105.8 11802.6 12061.1 10730.7 698.62 -5.91
2020-08-01 2020-08-01 11803.1 11333.2 11847.7 11226.1 611.47 4.14
2020-07-31 2020-07-31 11333.4 11096.5 11434.8 10964.6 530.95 2.14
2020-07-30 2020-07-30 11096.2 11105.8 11164.4 10861.6 501.14 -0.09
2020-07-29 2020-07-29 11105.9 10908.4 11336.5 10771.8 576.83 1.81

[ ]: target = df['Price']
target.plot(grid=True)

[ ]: <Axes: xlabel='Date'>

3
[ ]: df_2018 = df.loc['2018']
target_2018=df_2018['Price']
target_2018.plot(grid=True)

[ ]: <Axes: xlabel='Date'>

4
[ ]: df_2019 = df.loc['2019']
target_2019=df_2019['Price']
target_2019.plot(grid=True)

[ ]: <Axes: xlabel='Date'>

5
[ ]: df.plot(subplots=True, figsize=(10, 10))
plt.show()

6
[ ]: sns.pairplot(df)
plt.show()

7
[ ]: corr = df[['Price','Open','High','Low','Vol.','Change %']].corr()

[ ]: corr

[ ]: Price Open High Low Vol. Change %


Price 1.000000 0.992000 0.996439 0.995404 0.178875 0.026430
Open 0.992000 1.000000 0.996026 0.992945 0.182586 -0.087664
High 0.996439 0.996026 1.000000 0.991235 0.170831 -0.026556
Low 0.995404 0.992945 0.991235 1.000000 0.193933 -0.018682
Vol. 0.178875 0.182586 0.170831 0.193933 1.000000 -0.050921
Change % 0.026430 -0.087664 -0.026556 -0.018682 -0.050921 1.000000

8
[ ]: sns.heatmap(corr, annot=True, cmap="YlGnBu", linewidths=0.5, linecolor='black')
plt.show()

[ ]: df.isna()

[ ]: Date 0
Price 0
Open 0
High 0
Low 0
Vol. 3
Change % 0
dtype: int64

[ ]: df.fillna(method='ffill', inplace = True)

[ ]: df.isna().sum()

[ ]: Date 0
Price 0
Open 0
High 0

9
Low 0
Vol. 0
Change % 0
dtype: int64

[ ]: from statsmodels.tsa.stattools import kpss

kpss_test = kpss(df['Price'], regression='c', nlags='auto')

print('KPSS Test Statistic:', kpss_test[0])


print('KPSS p-value:', kpss_test[1])
print('KPSS Critical Values:')
for key, value in kpss_test[3].items():
print(f'{key}: {value}')

KPSS Test Statistic: 0.7718239563002028


KPSS p-value: 0.01
KPSS Critical Values:
10%: 0.347
5%: 0.463
2.5%: 0.574
1%: 0.739
<ipython-input-28-6dcbd234726b>:3: InterpolationWarning: The test statistic is
outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.

kpss_test = kpss(df['Price'], regression='c', nlags='auto')

[ ]: from statsmodels.tsa.stattools import adfuller

adf_test = adfuller(df['Price'], autolag='AIC')

print('ADF Statistic:', adf_test[0])


print('ADF p-value:', adf_test[1])
print('ADF Critical Values:')
for key, value in adf_test[4].items():
print(f'{key}: {value}')

ADF Statistic: -2.2166514685468264


ADF p-value: 0.2002575364116962
ADF Critical Values:
1%: -3.435956259544601
5%: -2.8640155784421606
10%: -2.568088052103053

[ ]: # Based on the KPSS test results:

10
kpss_p_value = kpss_test[1]
if kpss_p_value > 0.05:
print("The time series is non-stationary.")
else:
print("The time series is stationary.")

# Based on the ADF test results:

adf_p_value = adf_test[1]
if adf_p_value > 0.05:
print("The time series is non-stationary.")
else:
print("The time series is stationary.")

The time series is stationary.


The time series is non-stationary.

[ ]:

11

Das könnte Ihnen auch gefallen