CTR Model Train & Predict
import "github.com/auxten/go-ctr/recommend"
Constants
const (
SampleAssembler = 16
StageKey = "stage"
ItemEmbDim = 16
ItemEmbWindow = 5
UserBehaviorLen = 10
)
Variables
var (
//TODO: maybe a switch to control whether to reuse training cache when predict
UserFeatureCache *ccache.Cache
ItemFeatureCache *ccache.Cache
UserBehaviorCache *ccache.Cache
// DefaultUserFeature and DefaultItemFeature are backup if not nil
//when user or item missing in database, use this to fill
DefaultUserFeature []float32
DefaultItemFeature []float32
DebugUserId int
DebugItemId int
)
func BatchPredict
func BatchPredict(ctx context.Context, recSys Predictor, sampleKeys []Sample) (y tensor.Tensor, err error)
func GetItemEmbeddingModelFromUb
func GetItemEmbeddingModelFromUb(ctx context.Context, iSeq ItemEmbedding) (mod model.Model, err error)
func GetSampleVector
func GetSampleVector(ctx context.Context, userFeatureCache *ccache.Cache, itemFeatureCache *ccache.Cache, featureProvider BasicFeatureProvider, sampleKey *Sample) (vec []float32, userFeatureWidth int, itemFeatureWidth int, err error)
func StartHttpApi
func StartHttpApi(predict Predictor, path string, addr string, efs *embed.FS) (err error)
StartHttpApi starts the http api for recommendation Query by:
curl --header "Content-Type: application/json" \
--request POST \
--data '{"userId":107,"itemIdList":[1,2,39]}' \
http://localhost:8080/api/v1/recommend
type BasicFeatureProvider
type BasicFeatureProvider interface {
UserFeaturer
ItemFeaturer
}
type DashboardOverviewResult
type DashboardOverviewResult struct {
Users int `json:"users"`
Items int `json:"items"`
TotalPositive int `json:"total_positive"`
ValidPositive int `json:"valid_positive"`
ValidNegative int `json:"valid_negative"`
}
type FeatureOverview
type FeatureOverview interface {
// GetUsersFeatureOverview returns offset and size used for paging query
GetUsersFeatureOverview(ctx context.Context, offset, size int, opts map[string][]string) (UserItemOverviewResult, error)
// GetItemsFeatureOverview returns offset and size used for paging query
GetItemsFeatureOverview(ctx context.Context, offset, size int, opts map[string][]string) (ItemOverviewResult, error)
// GetDashboardOverview returns dashboard overview, see DashboardOverviewResult
GetDashboardOverview(ctx context.Context) (DashboardOverviewResult, error)
}
type Fitter
type Fitter interface {
Fit(sample *TrainSample) (PredictAbstract, error)
}
type ItemEmbedding
ItemEmbedding is an interface used to generate item embedding with item2vec model by just providing a behavior based item sequence. Example: user liked items sequence, user bought items sequence, user viewed items sequence
type ItemEmbedding interface {
ItemSeqGenerator(context.Context) (<-chan string, error)
}
type ItemFeaturer
type ItemFeaturer interface {
GetItemFeature(context.Context, int) (Tensor, error)
}
type ItemOverView
type ItemOverView struct {
ItemId int `json:"item_id"`
ItemFeatures map[string]interface{}
}
type ItemOverviewResult
type ItemOverviewResult struct {
Items []ItemOverView `json:"items"`
}
type ItemScore
type ItemScore struct {
ItemId int `json:"itemId"`
Score float32 `json:"score"`
}
func Rank
func Rank(ctx context.Context, recSys Predictor, userId int, itemIds []int) (itemScores []ItemScore, err error)
type PreRanker
type PreRanker interface {
PreRank(context.Context) error
}
type PreTrainer
type PreTrainer interface {
PreTrain(context.Context) error
}
type PredictAbstract
type PredictAbstract interface {
Predict(X tensor.Tensor) tensor.Tensor
}
type Predictor
type Predictor interface {
BasicFeatureProvider
PredictAbstract
}
func Train
func Train(ctx context.Context, recSys RecSys, mlp Fitter) (model Predictor, err error)
type RecApiRequest
type RecApiRequest struct {
UserId int `json:"userId"`
ItemIdList []int `json:"itemIdList"`
}
type RecApiResponse
type RecApiResponse struct {
ItemScoreList []ItemScore `json:"itemScoreList"`
}
type RecSys
type RecSys interface {
BasicFeatureProvider
Trainer
}
type Sample
type Sample struct {
UserId int `json:"userId"`
ItemId int `json:"itemId"`
Label float32 `json:"label"`
Timestamp int64 `json:"timestamp"`
}
type SampleInfo
type SampleInfo struct {
UserProfileRange [2]int // [start, end)
UserBehaviorRange [2]int // [start, end)
ItemFeatureRange [2]int // [start, end)
CtxFeatureRange [2]int // [start, end)
}
type Stage
type Stage int
const (
TrainStage Stage = iota
PredictStage
)
type Tensor
type Tensor []float32
type TrainSample
type TrainSample struct {
X []float32
Y []float32
Rows int
XCols int
Info SampleInfo
}
func GetSample
func GetSample(recSys RecSys, ctx context.Context) (sample *TrainSample, err error)
type Trainer
type Trainer interface {
SampleGenerator(context.Context) (<-chan Sample, error)
}
type UserBehavior
UserBehavior interface is used to get user behavior feature. typically, it is user's clicked/bought/liked item id list ordered by time desc. During training, you should limit the seq to avoid time travel,
maxPk or maxTs could be used here:
- maxPk is the max primary key of user behavior table.
- maxTs is the max timestamp of user behavior table.
- maxLen is the max length of user behavior seq, if total len is
greater than maxLen, the seq will be truncated from the tail.
which is latest maxLen items.
specially, -1 means no limit. During prediction, you should use the latest user behavior seq.
type UserBehavior interface {
GetUserBehavior(ctx context.Context, userId int,
maxLen int64, maxPk int64, maxTs int64) (itemSeq []int, err error)
}
type UserFeaturer
type UserFeaturer interface {
GetUserFeature(context.Context, int) (Tensor, error)
}
type UserItemOverview
type UserItemOverview struct {
UserId int `json:"user_id"`
UserFeatures map[string]interface{}
}
type UserItemOverviewResult
type UserItemOverviewResult struct {
Users []UserItemOverview `json:"users"`
}