Skip to content

Commit

Permalink
V0.9.59 更新一批代码 (#213)
Browse files Browse the repository at this point in the history
* 0.9.59 start coding

* 0.9.59 fix dt type error

* 0.9.59 fix dt type error

* 0.9.59 update

* 0.9.59 新增 min_max_limit

* 0.9.59 新增 rolling layers

* 0.9.59 新增 rolling layers

* 0.9.59 update

* 0.9.59 update

* 0.9.59 daily_performance 优化

* 0.9.59 daily_performance 优化
  • Loading branch information
zengbin93 authored Sep 19, 2024
1 parent daea50e commit 348ca55
Show file tree
Hide file tree
Showing 15 changed files with 558 additions and 183 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.58 ]
branches: [ master, V0.9.59 ]
pull_request:
branches: [ master ]

Expand Down
9 changes: 7 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from czsc.utils import (
mac_address,
overlap,
to_arrow,

format_standard_kline,

Expand All @@ -65,6 +66,7 @@
save_json,
get_sub_elements,
get_py_namespace,
code_namespace,
freqs_sorted,
x_round,
import_by_name,
Expand Down Expand Up @@ -155,6 +157,7 @@
show_strategies_recent,
show_factor_value,
show_code_editor,
show_classify,
)

from czsc.utils.bi_info import (
Expand Down Expand Up @@ -204,13 +207,15 @@
cross_sectional_strategy,
judge_factor_direction,
monotonicity,
min_max_limit,
rolling_layers,
)


__version__ = "0.9.58"
__version__ = "0.9.59"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20240808"
__date__ = "20240901"


def welcome():
Expand Down
63 changes: 63 additions & 0 deletions czsc/eda.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,66 @@ def monotonicity(sequence):
"""
from scipy.stats import spearmanr
return spearmanr(sequence, range(len(sequence)))[0]


def min_max_limit(x, min_val, max_val, digits=4):
"""限制 x 的取值范围在 min_val 和 max_val 之间
:param x: float, 输入值
:param min_val: float, 最小值
:param max_val: float, 最大值
:param digits: int, 保留小数位数
:return: float
"""
return round(max(min_val, min(max_val, x)), digits)


def rolling_layers(df, factor, n=5, **kwargs):
"""对时间序列数据进行分层
:param df: 因子数据,必须包含 dt, factor 列,其中 dt 为日期,factor 为因子值
:param factor: 因子列名
:param n: 分层数量,默认为10
:param kwargs:
- window: 窗口大小,默认为2000
- min_periods: 最小样本数量,默认为300
- mode: str, {'loose', 'strict'}, 分层模式,默认为 'loose';
loose 表示使用 rolling + rank 的方式分层,有一点点未来信息,存在一定的数据穿越问题;
strict 表示使用 rolling + qcut 的方式分层,无未来信息,但是执行速度较慢。
:return: df, 添加了 factor分层 列
"""
assert df[factor].nunique() > n * 2, "因子值的取值数量必须大于分层数量"
assert df[factor].isna().sum() == 0, "因子有缺失值,缺失数量为:{}".format(df[factor].isna().sum())
assert df['dt'].duplicated().sum() == 0, f"dt 列不能有重复值,存在重复值数量:{df['dt'].duplicated().sum()}"

window = kwargs.get("window", 600)
min_periods = kwargs.get("min_periods", 300)

# 不能有 inf 和 -inf
if df.loc[df[factor].isin([float("inf"), float("-inf")]), factor].shape[0] > 0:
raise ValueError(f"存在 {factor} 为 inf / -inf 的数据")

if kwargs.get('mode', 'loose') == 'loose':
# loose 模式,可能存在一点点未来信息
df['pct_rank'] = df[factor].rolling(window=window, min_periods=min_periods).rank(pct=True, ascending=True)
bins = [i/n for i in range(n+1)]
df['pct_rank_cut'] = pd.cut(df['pct_rank'], bins=bins, labels=False)
df['pct_rank_cut'] = df['pct_rank_cut'].fillna(-1)
# 第00层表示缺失值
df[f"{factor}分层"] = df['pct_rank_cut'].apply(lambda x: f"第{str(int(x+1)).zfill(2)}层")
df.drop(['pct_rank', 'pct_rank_cut'], axis=1, inplace=True)

else:
assert kwargs.get('mode', 'strict') == 'strict'
df[f"{factor}_qcut"] = (
df[factor].rolling(window=window, min_periods=min_periods)
.apply(lambda x: pd.qcut(x, q=n, labels=False, duplicates="drop", retbins=False).values[-1], raw=False)
)
df[f"{factor}_qcut"] = df[f"{factor}_qcut"].fillna(-1)
# 第00层表示缺失值
df[f"{factor}分层"] = df[f"{factor}_qcut"].apply(lambda x: f"第{str(int(x+1)).zfill(2)}层")
df.drop([f"{factor}_qcut"], axis=1, inplace=True)

return df
3 changes: 2 additions & 1 deletion czsc/traders/rwc.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def publish_dataframe(self, df, overwrite=False, batch_size=10000):
:param df: pandas.DataFrame, 必需包含['symbol', 'dt', 'weight']列,
可选['price', 'ref']列, 如没有price则写0, dtype同publish方法
:param overwrite: boolean, 是否覆盖已有记录
:param batch_size: int, 每次发布的最大数量
:return: 成功发布信号的条数
"""
df = df.copy()
Expand Down Expand Up @@ -392,7 +393,7 @@ def get_hist_weights(self, symbol, sdt, edt) -> pd.DataFrame:
price = price if price is None else float(price)
try:
ref = json.loads(ref)
except Exception:
except Exception as e:
ref = ref
weights.append((self.strategy_name, symbol, dt, weight, price, ref))

Expand Down
4 changes: 3 additions & 1 deletion czsc/traders/weight_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def __init__(self, dfw, digits=2, **kwargs) -> None:
"""
self.kwargs = kwargs
self.dfw = dfw.copy()
self.dfw["dt"] = pd.to_datetime(self.dfw["dt"])
if self.dfw.isnull().sum().sum() > 0:
raise ValueError("dfw 中存在空值, 请先处理")
self.digits = digits
Expand Down Expand Up @@ -553,9 +554,10 @@ def backtest(self, n_jobs=1):
dret = pd.concat([v["daily"] for k, v in res.items() if k in symbols], ignore_index=True)
dret = pd.pivot_table(dret, index="date", columns="symbol", values="return").fillna(0)
dret["total"] = dret[list(res.keys())].mean(axis=1)
dret = dret.round(4).reset_index()
res["品种等权日收益"] = dret

stats = {"开始日期": dret.index.min().strftime("%Y%m%d"), "结束日期": dret.index.max().strftime("%Y%m%d")}
stats = {"开始日期": dret["date"].min().strftime("%Y%m%d"), "结束日期": dret["date"].max().strftime("%Y%m%d")}
stats.update(daily_performance(dret["total"]))
dfp = pd.concat([v["pairs"] for k, v in res.items() if k in symbols], ignore_index=True)
pairs_stats = evaluate_pairs(dfp)
Expand Down
27 changes: 27 additions & 0 deletions czsc/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# coding: utf-8
import os
import pandas as pd
from typing import List, Union

from . import qywx
Expand Down Expand Up @@ -95,6 +96,20 @@ def get_py_namespace(file_py: str, keys: list = []) -> dict:
return namespace


def code_namespace(code: str, keys: list = []) -> dict:
"""获取 python 代码中的 namespace
:param code: python 代码
:param keys: 指定需要的对象名称
:return: namespace
"""
namespace = {"code": code}
exec(code, namespace)
if keys:
namespace = {k: v for k, v in namespace.items() if k in keys}
return namespace


def import_by_name(name):
"""通过字符串导入模块、类、函数
Expand Down Expand Up @@ -199,3 +214,15 @@ def mac_address():
x = uuid.UUID(int=uuid.getnode()).hex[-12:].upper()
x = "-".join([x[i : i + 2] for i in range(0, 11, 2)])
return x


def to_arrow(df: pd.DataFrame):
"""将 pandas.DataFrame 转换为 pyarrow.Table"""
import io
import pyarrow as pa

table = pa.Table.from_pandas(df)
with io.BytesIO() as sink:
with pa.ipc.new_file(sink, table.schema) as writer:
writer.write_table(table)
return sink.getvalue()
1 change: 1 addition & 0 deletions czsc/utils/bar_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def is_trading_time(dt: datetime = datetime.now(), market="A股"):
def get_intraday_times(freq="1分钟", market="A股"):
"""获取指定市场的交易时间段
:param freq: K线周期,如 1分钟、5分钟、15分钟、30分钟、60分钟
:param market: 市场名称,可选值:A股、期货、默认
:return: 交易时间段列表
"""
Expand Down
Loading

0 comments on commit 348ca55

Please sign in to comment.