migration tool
This commit is contained in:
parent
d4d0dc01ea
commit
c51ff4a88d
13
go.mod
13
go.mod
|
|
@ -6,7 +6,8 @@ require (
|
|||
github.com/blevesearch/bleve/v2 v2.5.3
|
||||
github.com/gin-gonic/gin v1.11.0
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/jackc/pgx/v5 v5.6.0
|
||||
github.com/jackc/pgx/v5 v5.7.5
|
||||
github.com/pressly/goose/v3 v3.26.0
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
)
|
||||
|
|
@ -41,27 +42,29 @@ require (
|
|||
github.com/go-playground/validator/v10 v10.27.0 // indirect
|
||||
github.com/goccy/go-json v0.10.2 // indirect
|
||||
github.com/goccy/go-yaml v1.18.0 // indirect
|
||||
github.com/golang/protobuf v1.5.0 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/jackc/pgpassfile v1.0.0 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.1 // indirect
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
|
||||
github.com/jackc/puddle/v2 v2.2.2 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/leodido/go-urn v1.4.0 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mfridman/interpolate v0.0.2 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/mschoch/smat v0.2.0 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.2.4 // indirect
|
||||
github.com/quic-go/qpack v0.5.1 // indirect
|
||||
github.com/quic-go/quic-go v0.54.0 // indirect
|
||||
github.com/rogpeppe/go-internal v1.14.1 // indirect
|
||||
github.com/sethvargo/go-retry v0.3.0 // indirect
|
||||
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
|
||||
github.com/ugorji/go/codec v1.3.0 // indirect
|
||||
go.etcd.io/bbolt v1.4.0 // indirect
|
||||
go.uber.org/mock v0.5.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
golang.org/x/arch v0.20.0 // indirect
|
||||
golang.org/x/crypto v0.40.0 // indirect
|
||||
golang.org/x/mod v0.25.0 // indirect
|
||||
|
|
|
|||
43
go.sum
43
go.sum
|
|
@ -49,6 +49,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3
|
|||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/gabriel-vasile/mimetype v1.4.8 h1:FfZ3gj38NjllZIeJAmMhr+qKL8Wu+nOoI3GqacKw1NM=
|
||||
github.com/gabriel-vasile/mimetype v1.4.8/go.mod h1:ByKUIKGjh1ODkGM1asKUbQZOLGrPjydw3hYPU2YU9t8=
|
||||
github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w=
|
||||
|
|
@ -67,11 +69,10 @@ github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
|
|||
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
|
||||
github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
|
||||
github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
|
||||
github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
|
|
@ -79,12 +80,12 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
|||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
|
||||
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
|
||||
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
|
||||
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
|
||||
github.com/jackc/pgx/v5 v5.6.0 h1:SWJzexBzPL5jb0GEsrPMLIsi/3jOo7RHlzTjcAeDrPY=
|
||||
github.com/jackc/pgx/v5 v5.6.0/go.mod h1:DNZ/vlrUnhWCoFGxHAG8U2ljioxukquj7utPDgtQdTw=
|
||||
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
|
||||
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo=
|
||||
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
|
||||
github.com/jackc/pgx/v5 v5.7.5 h1:JHGfMnQY+IEtGM63d+NGMjoRpysB2JBwDr5fsngwmJs=
|
||||
github.com/jackc/pgx/v5 v5.7.5/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M=
|
||||
github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo=
|
||||
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
|
||||
|
|
@ -97,22 +98,32 @@ github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
|||
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
|
||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||
github.com/mfridman/interpolate v0.0.2 h1:pnuTK7MQIxxFz1Gr+rjSIx9u7qVjf5VOoM/u6BbAxPY=
|
||||
github.com/mfridman/interpolate v0.0.2/go.mod h1:p+7uk6oE07mpE/Ik1b8EckO0O4ZXiGAfshKBWLUM9Xg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
|
||||
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
|
||||
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
|
||||
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
|
||||
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
|
||||
github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/pressly/goose/v3 v3.26.0 h1:KJakav68jdH0WDvoAcj8+n61WqOIaPGgH0bJWS6jpmM=
|
||||
github.com/pressly/goose/v3 v3.26.0/go.mod h1:4hC1KrritdCxtuFsqgs1R4AU5bWtTAf+cnWvfhf2DNY=
|
||||
github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI=
|
||||
github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg=
|
||||
github.com/quic-go/quic-go v0.54.0 h1:6s1YB9QotYI6Ospeiguknbp2Znb/jZYjZLRXn9kMQBg=
|
||||
github.com/quic-go/quic-go v0.54.0/go.mod h1:e68ZEaCdyviluZmy44P6Iey98v/Wfz6HCjQEm+l8zTY=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE=
|
||||
github.com/sethvargo/go-retry v0.3.0/go.mod h1:mNX17F0C/HguQMyMyJxcnU471gOZGxCLyYaFyAZraas=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
|
|
@ -133,10 +144,14 @@ go.etcd.io/bbolt v1.4.0 h1:TU77id3TnN/zKr7CO/uk+fBCwF2jGcMuw2B/FMAzYIk=
|
|||
go.etcd.io/bbolt v1.4.0/go.mod h1:AsD+OCi/qPN1giOX1aiLAha3o1U8rAz65bvN4j0sRuk=
|
||||
go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU=
|
||||
go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM=
|
||||
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
|
||||
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
|
||||
golang.org/x/arch v0.20.0 h1:dx1zTU0MAE98U+TQ8BLl7XsJbgze2WnNKF/8tGp/Q6c=
|
||||
golang.org/x/arch v0.20.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
|
||||
golang.org/x/crypto v0.40.0 h1:r4x+VvoG5Fm+eJcxMaY8CQM7Lb0l1lsmjGBQ6s8BfKM=
|
||||
golang.org/x/crypto v0.40.0/go.mod h1:Qr1vMER5WyS2dfPHAlsOj01wgLbsyWtFn/aY+5+ZdxY=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||
golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
|
||||
golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
|
||||
golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs=
|
||||
|
|
@ -152,8 +167,6 @@ golang.org/x/text v0.27.0 h1:4fGWRpyh641NLlecmyl4LOe6yDdfaYNrGb2zdfo4JV4=
|
|||
golang.org/x/text v0.27.0/go.mod h1:1D28KMCvyooCX9hBiosv5Tz/+YLxj0j7XhWjpSUF7CU=
|
||||
golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
|
||||
golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||
google.golang.org/protobuf v1.36.9 h1:w2gp2mA27hUeUzj9Ex9FBjsBm40zfaDtEWow293U7Iw=
|
||||
google.golang.org/protobuf v1.36.9/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
|
|
@ -163,3 +176,11 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C
|
|||
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
modernc.org/libc v1.66.3 h1:cfCbjTUcdsKyyZZfEUKfoHcP3S0Wkvz3jgSzByEWVCQ=
|
||||
modernc.org/libc v1.66.3/go.mod h1:XD9zO8kt59cANKvHPXpx7yS2ELPheAey0vjIuZOhOU8=
|
||||
modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU=
|
||||
modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg=
|
||||
modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI=
|
||||
modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw=
|
||||
modernc.org/sqlite v1.38.2 h1:Aclu7+tgjgcQVShZqim41Bbw9Cho0y/7WzYptXqkEek=
|
||||
modernc.org/sqlite v1.38.2/go.mod h1:cPTJYSlgg3Sfg046yBShXENNtPrWrDX8bsbAQBzgQ5E=
|
||||
|
|
|
|||
18
main.go
18
main.go
|
|
@ -2,11 +2,14 @@ package main
|
|||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"html/template"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
_ "github.com/jackc/pgx/v5/stdlib"
|
||||
"github.com/pressly/goose/v3"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
|
|
@ -43,6 +46,21 @@ func main() {
|
|||
// Initialize PostgreSQL repository first
|
||||
dsn := buildDefaultDSN()
|
||||
logrus.Info("Connecting to PostgreSQL with DSN: ", dsn)
|
||||
|
||||
// Run goose migrations using database/sql with pgx driver before initializing repository
|
||||
if dsn != "" {
|
||||
sqlDB, err := sql.Open("pgx", dsn)
|
||||
if err != nil {
|
||||
logrus.Fatalf("Failed to open database for migration: %v", err)
|
||||
}
|
||||
defer sqlDB.Close()
|
||||
goose.SetDialect("postgres")
|
||||
if err := goose.Up(sqlDB, "migrations"); err != nil {
|
||||
logrus.Fatalf("Goose migration failed: %v", err)
|
||||
}
|
||||
logrus.Info("Database migrations applied successfully (goose)")
|
||||
}
|
||||
|
||||
repo, err := NewPGChatRepository(context.Background(), dsn)
|
||||
if err != nil {
|
||||
logrus.WithError(err).Warn("PostgreSQL repository disabled (connection failed)")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
-- +goose Up
|
||||
-- Create chat_interactions table
|
||||
CREATE TABLE IF NOT EXISTS chat_interactions (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
correlation_id TEXT NOT NULL,
|
||||
user_message TEXT NOT NULL,
|
||||
translate TEXT,
|
||||
animal TEXT,
|
||||
keywords TEXT[] NOT NULL,
|
||||
best_visit_id TEXT,
|
||||
total_price INT,
|
||||
total_duration INT
|
||||
);
|
||||
|
||||
-- Create chat_llm_raw table
|
||||
CREATE TABLE IF NOT EXISTS chat_llm_raw (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
correlation_id TEXT NOT NULL,
|
||||
phase TEXT NOT NULL,
|
||||
raw_json TEXT
|
||||
);
|
||||
|
||||
-- Create knowledgeModel table
|
||||
CREATE TABLE IF NOT EXISTS knowledgeModel (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
knowledge_text TEXT NOT NULL
|
||||
);
|
||||
|
||||
-- Drop legacy columns if they exist
|
||||
ALTER TABLE chat_interactions DROP COLUMN IF EXISTS raw_keywords_json;
|
||||
ALTER TABLE chat_interactions DROP COLUMN IF EXISTS raw_disambig_json;
|
||||
|
||||
-- +goose Down
|
||||
DROP TABLE IF EXISTS chat_llm_raw;
|
||||
DROP TABLE IF EXISTS chat_interactions;
|
||||
DROP TABLE IF EXISTS knowledgeModel;
|
||||
|
|
@ -70,61 +70,10 @@ func NewPGChatRepository(ctx context.Context, dsn string) (*PGChatRepository, er
|
|||
return nil, err
|
||||
}
|
||||
r := &PGChatRepository{pool: p}
|
||||
if err := r.ensureSchema(ctx); err != nil {
|
||||
p.Close()
|
||||
return nil, err
|
||||
}
|
||||
// Schema migration will be handled by a migration tool
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// ensureSchema creates/adjusts tables. Drops legacy raw columns.
|
||||
func (r *PGChatRepository) ensureSchema(ctx context.Context) error {
|
||||
ddlInteractions := `CREATE TABLE IF NOT EXISTS chat_interactions (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
correlation_id TEXT NOT NULL,
|
||||
user_message TEXT NOT NULL,
|
||||
translate TEXT,
|
||||
animal TEXT,
|
||||
keywords TEXT[] NOT NULL,
|
||||
best_visit_id TEXT,
|
||||
total_price INT,
|
||||
total_duration INT
|
||||
);`
|
||||
if _, err := r.pool.Exec(ctx, ddlInteractions); err != nil {
|
||||
return err
|
||||
}
|
||||
ddlRaw := `CREATE TABLE IF NOT EXISTS chat_llm_raw (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
correlation_id TEXT NOT NULL,
|
||||
phase TEXT NOT NULL,
|
||||
raw_json TEXT
|
||||
);`
|
||||
if _, err := r.pool.Exec(ctx, ddlRaw); err != nil {
|
||||
return err
|
||||
}
|
||||
// Add knowledgeModel table
|
||||
ddlKnowledgeModel := `CREATE TABLE IF NOT EXISTS knowledgeModel (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
knowledge_text TEXT NOT NULL
|
||||
);`
|
||||
if _, err := r.pool.Exec(ctx, ddlKnowledgeModel); err != nil {
|
||||
return err
|
||||
}
|
||||
// Legacy column cleanup (ignore errors)
|
||||
for _, drop := range []string{
|
||||
"ALTER TABLE chat_interactions DROP COLUMN IF EXISTS raw_keywords_json",
|
||||
"ALTER TABLE chat_interactions DROP COLUMN IF EXISTS raw_disambig_json",
|
||||
} {
|
||||
if _, err := r.pool.Exec(ctx, drop); err != nil {
|
||||
logrus.WithError(err).Debug("drop legacy column failed (ignored)")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// SaveChatInteraction inserts a record
|
||||
func (r *PGChatRepository) SaveChatInteraction(ctx context.Context, rec ChatInteraction) error {
|
||||
if r == nil || r.pool == nil {
|
||||
|
|
@ -164,10 +113,8 @@ func (r *PGChatRepository) ListChatInteractions(ctx context.Context, limit, offs
|
|||
rows, err := r.pool.Query(ctx, qry, limit, offset)
|
||||
if err != nil {
|
||||
if pgErr, ok := err.(*pgconn.PgError); ok && (pgErr.Code == "42P01" || pgErr.Code == "42703") {
|
||||
logrus.WithError(err).Warn("listing: attempting schema repair")
|
||||
if r.ensureSchema(context.Background()) == nil {
|
||||
rows, err = r.pool.Query(ctx, qry, limit, offset)
|
||||
}
|
||||
logrus.WithError(err).Warn("listing: schema missing or column not found; please run migrations")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
kind: pipeline
|
||||
name: default
|
||||
|
||||
workspace:
|
||||
base: /go
|
||||
path: src/github.com/RoaringBitmap/roaring
|
||||
|
||||
steps:
|
||||
- name: test
|
||||
image: golang
|
||||
commands:
|
||||
- go get -t
|
||||
- go test
|
||||
- go build -tags appengine
|
||||
- go test -tags appengine
|
||||
- GOARCH=386 go build
|
||||
- GOARCH=386 go test
|
||||
- GOARCH=arm go build
|
||||
- GOARCH=arm64 go build
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
*~
|
||||
roaring-fuzz.zip
|
||||
workdir
|
||||
coverage.out
|
||||
testdata/all3.classic
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
# This is the official list of roaring authors for copyright purposes.
|
||||
|
||||
Todd Gruben (@tgruben),
|
||||
Daniel Lemire (@lemire),
|
||||
Elliot Murphy (@statik),
|
||||
Bob Potter (@bpot),
|
||||
Tyson Maly (@tvmaly),
|
||||
Will Glynn (@willglynn),
|
||||
Brent Pedersen (@brentp)
|
||||
Maciej Biłas (@maciej),
|
||||
Joe Nall (@joenall)
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
# This is the official list of roaring contributors
|
||||
|
||||
Todd Gruben (@tgruben),
|
||||
Daniel Lemire (@lemire),
|
||||
Elliot Murphy (@statik),
|
||||
Bob Potter (@bpot),
|
||||
Tyson Maly (@tvmaly),
|
||||
Will Glynn (@willglynn),
|
||||
Brent Pedersen (@brentp),
|
||||
Jason E. Aten (@glycerine),
|
||||
Vali Malinoiu (@0x4139),
|
||||
Forud Ghafouri (@fzerorubigd),
|
||||
Joe Nall (@joenall),
|
||||
(@fredim),
|
||||
Edd Robinson (@e-dard),
|
||||
Alexander Petrov (@alldroll),
|
||||
Guy Molinari (@guymolinari),
|
||||
Ling Jin (@JinLingChristopher)
|
||||
|
|
@ -0,0 +1,235 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2016 by the authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
================================================================================
|
||||
|
||||
Portions of runcontainer.go are from the Go standard library, which is licensed
|
||||
under:
|
||||
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2016 by the authors
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -0,0 +1,417 @@
|
|||
# roaring
|
||||
|
||||
[](https://godoc.org/github.com/RoaringBitmap/roaring) [](https://goreportcard.com/report/github.com/RoaringBitmap/roaring)
|
||||
|
||||

|
||||

|
||||

|
||||
=============
|
||||
|
||||
This is a go version of the Roaring bitmap data structure.
|
||||
|
||||
Roaring bitmaps are used by several major systems such as [Apache Lucene][lucene] and derivative systems such as [Solr][solr] and
|
||||
[Elasticsearch][elasticsearch], [Apache Druid (Incubating)][druid], [LinkedIn Pinot][pinot], [Netflix Atlas][atlas], [Apache Spark][spark], [OpenSearchServer][opensearchserver], [anacrolix/torrent][anacrolix/torrent], [Whoosh][whoosh], [Redpanda](https://github.com/redpanda-data/redpanda), [Pilosa][pilosa], [Microsoft Visual Studio Team Services (VSTS)][vsts], and eBay's [Apache Kylin][kylin]. The YouTube SQL Engine, [Google Procella](https://research.google/pubs/pub48388/), uses Roaring bitmaps for indexing.
|
||||
|
||||
[lucene]: https://lucene.apache.org/
|
||||
[solr]: https://lucene.apache.org/solr/
|
||||
[elasticsearch]: https://www.elastic.co/products/elasticsearch
|
||||
[druid]: https://druid.apache.org/
|
||||
[spark]: https://spark.apache.org/
|
||||
[opensearchserver]: http://www.opensearchserver.com
|
||||
[anacrolix/torrent]: https://github.com/anacrolix/torrent
|
||||
[whoosh]: https://bitbucket.org/mchaput/whoosh/wiki/Home
|
||||
[pilosa]: https://www.pilosa.com/
|
||||
[kylin]: http://kylin.apache.org/
|
||||
[pinot]: http://github.com/linkedin/pinot/wiki
|
||||
[vsts]: https://www.visualstudio.com/team-services/
|
||||
[atlas]: https://github.com/Netflix/atlas
|
||||
|
||||
Roaring bitmaps are found to work well in many important applications:
|
||||
|
||||
> Use Roaring for bitmap compression whenever possible. Do not use other bitmap compression methods ([Wang et al., SIGMOD 2017](http://db.ucsd.edu/wp-content/uploads/2017/03/sidm338-wangA.pdf))
|
||||
|
||||
|
||||
The ``roaring`` Go library is used by
|
||||
* [anacrolix/torrent]
|
||||
* [InfluxDB](https://www.influxdata.com)
|
||||
* [Pilosa](https://www.pilosa.com/)
|
||||
* [Bleve](http://www.blevesearch.com)
|
||||
* [Weaviate](https://github.com/weaviate/weaviate)
|
||||
* [lindb](https://github.com/lindb/lindb)
|
||||
* [Elasticell](https://github.com/deepfabric/elasticell)
|
||||
* [SourceGraph](https://github.com/sourcegraph/sourcegraph)
|
||||
* [M3](https://github.com/m3db/m3)
|
||||
* [trident](https://github.com/NetApp/trident)
|
||||
* [Husky](https://www.datadoghq.com/blog/engineering/introducing-husky/)
|
||||
* [FrostDB](https://github.com/polarsignals/frostdb)
|
||||
|
||||
This library is used in production in several systems, it is part of the [Awesome Go collection](https://awesome-go.com).
|
||||
|
||||
|
||||
There are also [Java](https://github.com/RoaringBitmap/RoaringBitmap) and [C/C++](https://github.com/RoaringBitmap/CRoaring) versions. The Java, C, C++ and Go version are binary compatible: e.g, you can save bitmaps
|
||||
from a Java program and load them back in Go, and vice versa. We have a [format specification](https://github.com/RoaringBitmap/RoaringFormatSpec).
|
||||
|
||||
|
||||
This code is licensed under Apache License, Version 2.0 (ASL2.0).
|
||||
|
||||
Copyright 2016-... by the authors.
|
||||
|
||||
When should you use a bitmap?
|
||||
===================================
|
||||
|
||||
|
||||
Sets are a fundamental abstraction in
|
||||
software. They can be implemented in various
|
||||
ways, as hash sets, as trees, and so forth.
|
||||
In databases and search engines, sets are often an integral
|
||||
part of indexes. For example, we may need to maintain a set
|
||||
of all documents or rows (represented by numerical identifier)
|
||||
that satisfy some property. Besides adding or removing
|
||||
elements from the set, we need fast functions
|
||||
to compute the intersection, the union, the difference between sets, and so on.
|
||||
|
||||
|
||||
To implement a set
|
||||
of integers, a particularly appealing strategy is the
|
||||
bitmap (also called bitset or bit vector). Using n bits,
|
||||
we can represent any set made of the integers from the range
|
||||
[0,n): the ith bit is set to one if integer i is present in the set.
|
||||
Commodity processors use words of W=32 or W=64 bits. By combining many such words, we can
|
||||
support large values of n. Intersections, unions and differences can then be implemented
|
||||
as bitwise AND, OR and ANDNOT operations.
|
||||
More complicated set functions can also be implemented as bitwise operations.
|
||||
|
||||
When the bitset approach is applicable, it can be orders of
|
||||
magnitude faster than other possible implementation of a set (e.g., as a hash set)
|
||||
while using several times less memory.
|
||||
|
||||
However, a bitset, even a compressed one is not always applicable. For example, if
|
||||
you have 1000 random-looking integers, then a simple array might be the best representation.
|
||||
We refer to this case as the "sparse" scenario.
|
||||
|
||||
When should you use compressed bitmaps?
|
||||
===================================
|
||||
|
||||
An uncompressed BitSet can use a lot of memory. For example, if you take a BitSet
|
||||
and set the bit at position 1,000,000 to true and you have just over 100kB. That is over 100kB
|
||||
to store the position of one bit. This is wasteful even if you do not care about memory:
|
||||
suppose that you need to compute the intersection between this BitSet and another one
|
||||
that has a bit at position 1,000,001 to true, then you need to go through all these zeroes,
|
||||
whether you like it or not. That can become very wasteful.
|
||||
|
||||
This being said, there are definitively cases where attempting to use compressed bitmaps is wasteful.
|
||||
For example, if you have a small universe size. E.g., your bitmaps represent sets of integers
|
||||
from [0,n) where n is small (e.g., n=64 or n=128). If you can use uncompressed BitSet and
|
||||
it does not blow up your memory usage, then compressed bitmaps are probably not useful
|
||||
to you. In fact, if you do not need compression, then a BitSet offers remarkable speed.
|
||||
|
||||
The sparse scenario is another use case where compressed bitmaps should not be used.
|
||||
Keep in mind that random-looking data is usually not compressible. E.g., if you have a small set of
|
||||
32-bit random integers, it is not mathematically possible to use far less than 32 bits per integer,
|
||||
and attempts at compression can be counterproductive.
|
||||
|
||||
How does Roaring compares with the alternatives?
|
||||
==================================================
|
||||
|
||||
|
||||
Most alternatives to Roaring are part of a larger family of compressed bitmaps that are run-length-encoded
|
||||
bitmaps. They identify long runs of 1s or 0s and they represent them with a marker word.
|
||||
If you have a local mix of 1s and 0, you use an uncompressed word.
|
||||
|
||||
There are many formats in this family:
|
||||
|
||||
* Oracle's BBC is an obsolete format at this point: though it may provide good compression,
|
||||
it is likely much slower than more recent alternatives due to excessive branching.
|
||||
* WAH is a patented variation on BBC that provides better performance.
|
||||
* Concise is a variation on the patented WAH. It some specific instances, it can compress
|
||||
much better than WAH (up to 2x better), but it is generally slower.
|
||||
* EWAH is both free of patent, and it is faster than all the above. On the downside, it
|
||||
does not compress quite as well. It is faster because it allows some form of "skipping"
|
||||
over uncompressed words. So though none of these formats are great at random access, EWAH
|
||||
is better than the alternatives.
|
||||
|
||||
|
||||
|
||||
There is a big problem with these formats however that can hurt you badly in some cases: there is no random access. If you want to check whether a given value is present in the set, you have to start from the beginning and "uncompress" the whole thing. This means that if you want to intersect a big set with a large set, you still have to uncompress the whole big set in the worst case...
|
||||
|
||||
Roaring solves this problem. It works in the following manner. It divides the data into chunks of 2<sup>16</sup> integers
|
||||
(e.g., [0, 2<sup>16</sup>), [2<sup>16</sup>, 2 x 2<sup>16</sup>), ...). Within a chunk, it can use an uncompressed bitmap, a simple list of integers,
|
||||
or a list of runs. Whatever format it uses, they all allow you to check for the presence of any one value quickly
|
||||
(e.g., with a binary search). The net result is that Roaring can compute many operations much faster than run-length-encoded
|
||||
formats like WAH, EWAH, Concise... Maybe surprisingly, Roaring also generally offers better compression ratios.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### References
|
||||
|
||||
- Daniel Lemire, Owen Kaser, Nathan Kurz, Luca Deri, Chris O'Hara, François Saint-Jacques, Gregory Ssi-Yan-Kai, Roaring Bitmaps: Implementation of an Optimized Software Library, Software: Practice and Experience 48 (4), 2018 [arXiv:1709.07821](https://arxiv.org/abs/1709.07821)
|
||||
- Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin,
|
||||
Better bitmap performance with Roaring bitmaps,
|
||||
Software: Practice and Experience 46 (5), 2016.[arXiv:1402.6407](http://arxiv.org/abs/1402.6407) This paper used data from http://lemire.me/data/realroaring2014.html
|
||||
- Daniel Lemire, Gregory Ssi-Yan-Kai, Owen Kaser, Consistently faster and smaller compressed bitmaps with Roaring, Software: Practice and Experience 46 (11), 2016. [arXiv:1603.06549](http://arxiv.org/abs/1603.06549)
|
||||
|
||||
### Dependencies
|
||||
|
||||
Dependencies are fetched automatically by giving the `-t` flag to `go get`.
|
||||
|
||||
they include
|
||||
- github.com/bits-and-blooms/bitset
|
||||
- github.com/mschoch/smat
|
||||
- github.com/glycerine/go-unsnap-stream
|
||||
- github.com/philhofer/fwd
|
||||
- github.com/jtolds/gls
|
||||
|
||||
Note that the smat library requires Go 1.15 or better.
|
||||
|
||||
#### Installation
|
||||
|
||||
- go get -t github.com/RoaringBitmap/roaring
|
||||
|
||||
### Instructions for contributors
|
||||
|
||||
Using bash or other common shells:
|
||||
```
|
||||
$ git clone git@github.com:RoaringBitmap/roaring.git
|
||||
$ export GO111MODULE=on
|
||||
$ go mod tidy
|
||||
$ go test -v
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
Here is a simplified but complete example:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
|
||||
func main() {
|
||||
// example inspired by https://github.com/fzandona/goroar
|
||||
fmt.Println("==roaring==")
|
||||
rb1 := roaring.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
|
||||
fmt.Println(rb1.String())
|
||||
|
||||
rb2 := roaring.BitmapOf(3, 4, 1000)
|
||||
fmt.Println(rb2.String())
|
||||
|
||||
rb3 := roaring.New()
|
||||
fmt.Println(rb3.String())
|
||||
|
||||
fmt.Println("Cardinality: ", rb1.GetCardinality())
|
||||
|
||||
fmt.Println("Contains 3? ", rb1.Contains(3))
|
||||
|
||||
rb1.And(rb2)
|
||||
|
||||
rb3.Add(1)
|
||||
rb3.Add(5)
|
||||
|
||||
rb3.Or(rb1)
|
||||
|
||||
// computes union of the three bitmaps in parallel using 4 workers
|
||||
roaring.ParOr(4, rb1, rb2, rb3)
|
||||
// computes intersection of the three bitmaps in parallel using 4 workers
|
||||
roaring.ParAnd(4, rb1, rb2, rb3)
|
||||
|
||||
|
||||
// prints 1, 3, 4, 5, 1000
|
||||
i := rb3.Iterator()
|
||||
for i.HasNext() {
|
||||
fmt.Println(i.Next())
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// next we include an example of serialization
|
||||
buf := new(bytes.Buffer)
|
||||
rb1.WriteTo(buf) // we omit error handling
|
||||
newrb:= roaring.New()
|
||||
newrb.ReadFrom(buf)
|
||||
if rb1.Equals(newrb) {
|
||||
fmt.Println("I wrote the content to a byte stream and read it back.")
|
||||
}
|
||||
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
|
||||
}
|
||||
```
|
||||
|
||||
If you wish to use serialization and handle errors, you might want to
|
||||
consider the following sample of code:
|
||||
|
||||
```go
|
||||
rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000)
|
||||
buf := new(bytes.Buffer)
|
||||
size,err:=rb.WriteTo(buf)
|
||||
if err != nil {
|
||||
fmt.Println("Failed writing") // return or panic
|
||||
}
|
||||
newrb:= New()
|
||||
size,err=newrb.ReadFrom(buf)
|
||||
if err != nil {
|
||||
fmt.Println("Failed reading") // return or panic
|
||||
}
|
||||
// if buf is an untrusted source, you should validate the result
|
||||
// (this adds a bit of complexity but it is necessary for security)
|
||||
if newrb.Validate() != nil {
|
||||
fmt.Println("Failed validation") // return or panic
|
||||
}
|
||||
if ! rb.Equals(newrb) {
|
||||
fmt.Println("Cannot retrieve serialized version")
|
||||
}
|
||||
```
|
||||
|
||||
Given N integers in [0,x), then the serialized size in bytes of
|
||||
a Roaring bitmap should never exceed this bound:
|
||||
|
||||
`` 8 + 9 * ((long)x+65535)/65536 + 2 * N ``
|
||||
|
||||
That is, given a fixed overhead for the universe size (x), Roaring
|
||||
bitmaps never use more than 2 bytes per integer. You can call
|
||||
``BoundSerializedSizeInBytes`` for a more precise estimate.
|
||||
|
||||
### 64-bit Roaring
|
||||
|
||||
By default, roaring is used to stored unsigned 32-bit integers. However, we also offer
|
||||
an extension dedicated to 64-bit integers. It supports roughly the same functions:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/RoaringBitmap/roaring/v2/roaring64"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
|
||||
func main() {
|
||||
// example inspired by https://github.com/fzandona/goroar
|
||||
fmt.Println("==roaring64==")
|
||||
rb1 := roaring64.BitmapOf(1, 2, 3, 4, 5, 100, 1000)
|
||||
fmt.Println(rb1.String())
|
||||
|
||||
rb2 := roaring64.BitmapOf(3, 4, 1000)
|
||||
fmt.Println(rb2.String())
|
||||
|
||||
rb3 := roaring64.New()
|
||||
fmt.Println(rb3.String())
|
||||
|
||||
fmt.Println("Cardinality: ", rb1.GetCardinality())
|
||||
|
||||
fmt.Println("Contains 3? ", rb1.Contains(3))
|
||||
|
||||
rb1.And(rb2)
|
||||
|
||||
rb3.Add(1)
|
||||
rb3.Add(5)
|
||||
|
||||
rb3.Or(rb1)
|
||||
|
||||
|
||||
|
||||
// prints 1, 3, 4, 5, 1000
|
||||
i := rb3.Iterator()
|
||||
for i.HasNext() {
|
||||
fmt.Println(i.Next())
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// next we include an example of serialization
|
||||
buf := new(bytes.Buffer)
|
||||
rb1.WriteTo(buf) // we omit error handling
|
||||
newrb:= roaring64.New()
|
||||
newrb.ReadFrom(buf)
|
||||
if rb1.Equals(newrb) {
|
||||
fmt.Println("I wrote the content to a byte stream and read it back.")
|
||||
}
|
||||
// you can iterate over bitmaps using ReverseIterator(), Iterator, ManyIterator()
|
||||
}
|
||||
```
|
||||
|
||||
Only the 32-bit roaring format is standard and cross-operable between Java, C++, C and Go. There is no guarantee that the 64-bit versions are compatible.
|
||||
|
||||
### Documentation
|
||||
|
||||
Current documentation is available at https://pkg.go.dev/github.com/RoaringBitmap/roaring and https://pkg.go.dev/github.com/RoaringBitmap/roaring/roaring64
|
||||
|
||||
### Goroutine safety
|
||||
|
||||
In general, it should not generally be considered safe to access
|
||||
the same bitmaps using different goroutines--they are left
|
||||
unsynchronized for performance. Should you want to access
|
||||
a Bitmap from more than one goroutine, you should
|
||||
provide synchronization. Typically this is done by using channels to pass
|
||||
the *Bitmap around (in Go style; so there is only ever one owner),
|
||||
or by using `sync.Mutex` to serialize operations on Bitmaps.
|
||||
|
||||
### Coverage
|
||||
|
||||
We test our software. For a report on our test coverage, see
|
||||
|
||||
https://coveralls.io/github/RoaringBitmap/roaring?branch=master
|
||||
|
||||
### Benchmark
|
||||
|
||||
Type
|
||||
|
||||
go test -bench Benchmark -run -
|
||||
|
||||
To run benchmarks on [Real Roaring Datasets](https://github.com/RoaringBitmap/real-roaring-datasets)
|
||||
run the following:
|
||||
|
||||
```sh
|
||||
go get github.com/RoaringBitmap/real-roaring-datasets
|
||||
BENCH_REAL_DATA=1 go test -bench BenchmarkRealData -run -
|
||||
```
|
||||
|
||||
### Iterative use
|
||||
|
||||
You can use roaring with gore:
|
||||
|
||||
- go install github.com/x-motemen/gore/cmd/gore@latest
|
||||
- Make sure that ``$GOPATH/bin`` is in your ``$PATH``.
|
||||
|
||||
```go
|
||||
$ gore
|
||||
gore version 0.2.6 :help for help
|
||||
gore> :import github.com/RoaringBitmap/roaring
|
||||
gore> x:=roaring.New()
|
||||
gore> x.Add(1)
|
||||
gore> x.String()
|
||||
"{1}"
|
||||
```
|
||||
|
||||
|
||||
### Fuzzy testing
|
||||
|
||||
You can help us test further the library with fuzzy testing:
|
||||
|
||||
go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||
go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||
go-fuzz-build github.com/RoaringBitmap/roaring
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200 -func FuzzSmat
|
||||
|
||||
Let it run, and if the # of crashers is > 0, check out the reports in
|
||||
the workdir where you should be able to find the panic goroutine stack
|
||||
traces.
|
||||
|
||||
You may also replace `-func FuzzSmat` by `-func FuzzSerializationBuffer` or `-func FuzzSerializationStream`.
|
||||
|
||||
### Alternative in Go
|
||||
|
||||
There is a Go version wrapping the C/C++ implementation https://github.com/RoaringBitmap/gocroaring
|
||||
|
||||
For an alternative implementation in Go, see https://github.com/fzandona/goroar
|
||||
The two versions were written independently.
|
||||
|
||||
|
||||
### Mailing list/discussion group
|
||||
|
||||
https://groups.google.com/forum/#!forum/roaring-bitmaps
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,19 @@
|
|||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// countLeadingOnes returns the number of leading zeros bits in x; the result is 64 for x == 0.
|
||||
func countLeadingZeros(x uint64) int {
|
||||
return bits.LeadingZeros64(x)
|
||||
}
|
||||
|
||||
// countLeadingOnes returns the number of leading ones bits in x; the result is 0 for x == 0.
|
||||
func countLeadingOnes(x uint64) int {
|
||||
return bits.LeadingZeros64(^x)
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
//go:build !go1.9
|
||||
// +build !go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
// LeadingZeroBits returns the number of consecutive most significant zero
|
||||
// bits of x.
|
||||
func countLeadingZeros(i uint64) int {
|
||||
if i == 0 {
|
||||
return 64
|
||||
}
|
||||
n := 1
|
||||
x := uint32(i >> 32)
|
||||
if x == 0 {
|
||||
n += 32
|
||||
x = uint32(i)
|
||||
}
|
||||
if (x >> 16) == 0 {
|
||||
n += 16
|
||||
x <<= 16
|
||||
}
|
||||
if (x >> 24) == 0 {
|
||||
n += 8
|
||||
x <<= 8
|
||||
}
|
||||
if x>>28 == 0 {
|
||||
n += 4
|
||||
x <<= 4
|
||||
}
|
||||
if x>>30 == 0 {
|
||||
n += 2
|
||||
x <<= 2
|
||||
|
||||
}
|
||||
n -= int(x >> 31)
|
||||
return n
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
// countTrailingZeros returns the number of trailing zero bits in x; the result is 64 for x == 0.
|
||||
func countTrailingZeros(x uint64) int {
|
||||
return bits.TrailingZeros64(x)
|
||||
}
|
||||
|
||||
// countTrailingOnes returns the number of trailing one bits in x
|
||||
// The result is 64 for x == 9,223,372,036,854,775,807.
|
||||
// The result is 0 for x == 0.
|
||||
func countTrailingOnes(x uint64) int {
|
||||
return bits.TrailingZeros64(^x)
|
||||
}
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
//go:build !go1.9
|
||||
// +build !go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
// Reuse of portions of go/src/math/big standard lib code
|
||||
// under this license:
|
||||
/*
|
||||
Copyright (c) 2009 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
const deBruijn32 = 0x077CB531
|
||||
|
||||
var deBruijn32Lookup = []byte{
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9,
|
||||
}
|
||||
|
||||
const deBruijn64 = 0x03f79d71b4ca8b09
|
||||
|
||||
var deBruijn64Lookup = []byte{
|
||||
0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4,
|
||||
62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5,
|
||||
63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11,
|
||||
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
|
||||
}
|
||||
|
||||
// trailingZeroBits returns the number of consecutive least significant zero
|
||||
// bits of x.
|
||||
func countTrailingZeros(x uint64) int {
|
||||
// x & -x leaves only the right-most bit set in the word. Let k be the
|
||||
// index of that bit. Since only a single bit is set, the value is two
|
||||
// to the power of k. Multiplying by a power of two is equivalent to
|
||||
// left shifting, in this case by k bits. The de Bruijn constant is
|
||||
// such that all six bit, consecutive substrings are distinct.
|
||||
// Therefore, if we have a left shifted version of this constant we can
|
||||
// find by how many bits it was shifted by looking at which six bit
|
||||
// substring ended up at the top of the word.
|
||||
// (Knuth, volume 4, section 7.3.1)
|
||||
if x == 0 {
|
||||
// We have to special case 0; the fomula
|
||||
// below doesn't work for 0.
|
||||
return 64
|
||||
}
|
||||
return int(deBruijn64Lookup[((x&-x)*(deBruijn64))>>58])
|
||||
}
|
||||
|
|
@ -0,0 +1,313 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
)
|
||||
|
||||
// Or function that requires repairAfterLazy
|
||||
func lazyOR(x1, x2 *Bitmap) *Bitmap {
|
||||
answer := NewBitmap()
|
||||
pos1 := 0
|
||||
pos2 := 0
|
||||
length1 := x1.highlowcontainer.size()
|
||||
length2 := x2.highlowcontainer.size()
|
||||
main:
|
||||
for (pos1 < length1) && (pos2 < length2) {
|
||||
s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
|
||||
for {
|
||||
if s1 < s2 {
|
||||
answer.highlowcontainer.appendCopy(x1.highlowcontainer, pos1)
|
||||
pos1++
|
||||
if pos1 == length1 {
|
||||
break main
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
} else if s1 > s2 {
|
||||
answer.highlowcontainer.appendCopy(x2.highlowcontainer, pos2)
|
||||
pos2++
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
}
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
} else {
|
||||
c1 := x1.highlowcontainer.getContainerAtIndex(pos1)
|
||||
answer.highlowcontainer.appendContainer(s1, c1.lazyOR(x2.highlowcontainer.getContainerAtIndex(pos2)), false)
|
||||
pos1++
|
||||
pos2++
|
||||
if (pos1 == length1) || (pos2 == length2) {
|
||||
break main
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
}
|
||||
}
|
||||
}
|
||||
if pos1 == length1 {
|
||||
answer.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
|
||||
} else if pos2 == length2 {
|
||||
answer.highlowcontainer.appendCopyMany(x1.highlowcontainer, pos1, length1)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
// In-place Or function that requires repairAfterLazy
|
||||
func (x1 *Bitmap) lazyOR(x2 *Bitmap) *Bitmap {
|
||||
pos1 := 0
|
||||
pos2 := 0
|
||||
length1 := x1.highlowcontainer.size()
|
||||
length2 := x2.highlowcontainer.size()
|
||||
main:
|
||||
for (pos1 < length1) && (pos2 < length2) {
|
||||
s1 := x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 := x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
|
||||
for {
|
||||
if s1 < s2 {
|
||||
pos1++
|
||||
if pos1 == length1 {
|
||||
break main
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
} else if s1 > s2 {
|
||||
x1.highlowcontainer.insertNewKeyValueAt(pos1, s2, x2.highlowcontainer.getContainerAtIndex(pos2).clone())
|
||||
pos2++
|
||||
pos1++
|
||||
length1++
|
||||
if pos2 == length2 {
|
||||
break main
|
||||
}
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
} else {
|
||||
c1 := x1.highlowcontainer.getWritableContainerAtIndex(pos1)
|
||||
x1.highlowcontainer.containers[pos1] = c1.lazyIOR(x2.highlowcontainer.getContainerAtIndex(pos2))
|
||||
x1.highlowcontainer.needCopyOnWrite[pos1] = false
|
||||
pos1++
|
||||
pos2++
|
||||
if (pos1 == length1) || (pos2 == length2) {
|
||||
break main
|
||||
}
|
||||
s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
|
||||
s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
|
||||
}
|
||||
}
|
||||
}
|
||||
if pos1 == length1 {
|
||||
x1.highlowcontainer.appendCopyMany(x2.highlowcontainer, pos2, length2)
|
||||
}
|
||||
return x1
|
||||
}
|
||||
|
||||
// to be called after lazy aggregates
|
||||
func (x1 *Bitmap) repairAfterLazy() {
|
||||
for pos := 0; pos < x1.highlowcontainer.size(); pos++ {
|
||||
c := x1.highlowcontainer.getContainerAtIndex(pos)
|
||||
switch c.(type) {
|
||||
case *bitmapContainer:
|
||||
if c.(*bitmapContainer).cardinality == invalidCardinality {
|
||||
c = x1.highlowcontainer.getWritableContainerAtIndex(pos)
|
||||
c.(*bitmapContainer).computeCardinality()
|
||||
if c.(*bitmapContainer).getCardinality() <= arrayDefaultMaxSize {
|
||||
x1.highlowcontainer.setContainerAtIndex(pos, c.(*bitmapContainer).toArrayContainer())
|
||||
} else if c.(*bitmapContainer).isFull() {
|
||||
x1.highlowcontainer.setContainerAtIndex(pos, newRunContainer16Range(0, MaxUint16))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FastAnd computes the intersection between many bitmaps quickly
|
||||
// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
|
||||
// of manually calling "And" many times.
|
||||
//
|
||||
// Performance hints: if you have very large and tiny bitmaps,
|
||||
// it may be beneficial performance-wise to put a tiny bitmap
|
||||
// in first position.
|
||||
func FastAnd(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
answer := And(bitmaps[0], bitmaps[1])
|
||||
for _, bm := range bitmaps[2:] {
|
||||
answer.And(bm)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
|
||||
// It might also be faster than calling Or repeatedly.
|
||||
func FastOr(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
answer := lazyOR(bitmaps[0], bitmaps[1])
|
||||
for _, bm := range bitmaps[2:] {
|
||||
answer = answer.lazyOR(bm)
|
||||
}
|
||||
// here is where repairAfterLazy is called.
|
||||
answer.repairAfterLazy()
|
||||
return answer
|
||||
}
|
||||
|
||||
// HeapOr computes the union between many bitmaps quickly using a heap.
|
||||
// It might be faster than calling Or repeatedly.
|
||||
func HeapOr(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
}
|
||||
// TODO: for better speed, we could do the operation lazily, see Java implementation
|
||||
pq := make(priorityQueue, len(bitmaps))
|
||||
for i, bm := range bitmaps {
|
||||
pq[i] = &item{bm, i}
|
||||
}
|
||||
heap.Init(&pq)
|
||||
|
||||
for pq.Len() > 1 {
|
||||
x1 := heap.Pop(&pq).(*item)
|
||||
x2 := heap.Pop(&pq).(*item)
|
||||
heap.Push(&pq, &item{Or(x1.value, x2.value), 0})
|
||||
}
|
||||
return heap.Pop(&pq).(*item).value
|
||||
}
|
||||
|
||||
// HeapXor computes the symmetric difference between many bitmaps quickly (as opposed to calling Xor repeated).
|
||||
// Internally, this function uses a heap.
|
||||
// It might be faster than calling Xor repeatedly.
|
||||
func HeapXor(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
}
|
||||
|
||||
pq := make(priorityQueue, len(bitmaps))
|
||||
for i, bm := range bitmaps {
|
||||
pq[i] = &item{bm, i}
|
||||
}
|
||||
heap.Init(&pq)
|
||||
|
||||
for pq.Len() > 1 {
|
||||
x1 := heap.Pop(&pq).(*item)
|
||||
x2 := heap.Pop(&pq).(*item)
|
||||
heap.Push(&pq, &item{Xor(x1.value, x2.value), 0})
|
||||
}
|
||||
return heap.Pop(&pq).(*item).value
|
||||
}
|
||||
|
||||
// AndAny provides a result equivalent to x1.And(FastOr(bitmaps)).
|
||||
// It's optimized to minimize allocations. It also might be faster than separate calls.
|
||||
func (x1 *Bitmap) AndAny(bitmaps ...*Bitmap) {
|
||||
if len(bitmaps) == 0 {
|
||||
return
|
||||
} else if len(bitmaps) == 1 {
|
||||
x1.And(bitmaps[0])
|
||||
return
|
||||
}
|
||||
|
||||
type withPos struct {
|
||||
bitmap *roaringArray
|
||||
pos int
|
||||
key uint16
|
||||
}
|
||||
filters := make([]withPos, 0, len(bitmaps))
|
||||
|
||||
for _, b := range bitmaps {
|
||||
if b.highlowcontainer.size() > 0 {
|
||||
filters = append(filters, withPos{
|
||||
bitmap: &b.highlowcontainer,
|
||||
pos: 0,
|
||||
key: b.highlowcontainer.getKeyAtIndex(0),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
basePos := 0
|
||||
intersections := 0
|
||||
keyContainers := make([]container, 0, len(filters))
|
||||
var (
|
||||
tmpArray *arrayContainer
|
||||
tmpBitmap *bitmapContainer
|
||||
minNextKey uint16
|
||||
)
|
||||
|
||||
for basePos < x1.highlowcontainer.size() && len(filters) > 0 {
|
||||
baseKey := x1.highlowcontainer.getKeyAtIndex(basePos)
|
||||
|
||||
// accumulate containers for current key, find next minimal key in filters
|
||||
// and exclude filters that do not have related values anymore
|
||||
i := 0
|
||||
maxPossibleOr := 0
|
||||
minNextKey = MaxUint16
|
||||
for _, f := range filters {
|
||||
if f.key < baseKey {
|
||||
f.pos = f.bitmap.advanceUntil(baseKey, f.pos)
|
||||
if f.pos == f.bitmap.size() {
|
||||
continue
|
||||
}
|
||||
f.key = f.bitmap.getKeyAtIndex(f.pos)
|
||||
}
|
||||
|
||||
if f.key == baseKey {
|
||||
cont := f.bitmap.getContainerAtIndex(f.pos)
|
||||
keyContainers = append(keyContainers, cont)
|
||||
maxPossibleOr += cont.getCardinality()
|
||||
|
||||
f.pos++
|
||||
if f.pos == f.bitmap.size() {
|
||||
continue
|
||||
}
|
||||
f.key = f.bitmap.getKeyAtIndex(f.pos)
|
||||
}
|
||||
|
||||
minNextKey = minOfUint16(minNextKey, f.key)
|
||||
filters[i] = f
|
||||
i++
|
||||
}
|
||||
filters = filters[:i]
|
||||
|
||||
if len(keyContainers) == 0 {
|
||||
basePos = x1.highlowcontainer.advanceUntil(minNextKey, basePos)
|
||||
continue
|
||||
}
|
||||
|
||||
var ored container
|
||||
|
||||
if len(keyContainers) == 1 {
|
||||
ored = keyContainers[0]
|
||||
} else {
|
||||
//TODO: special case for run containers?
|
||||
if maxPossibleOr > arrayDefaultMaxSize {
|
||||
if tmpBitmap == nil {
|
||||
tmpBitmap = newBitmapContainer()
|
||||
}
|
||||
tmpBitmap.resetTo(keyContainers[0])
|
||||
ored = tmpBitmap
|
||||
} else {
|
||||
if tmpArray == nil {
|
||||
tmpArray = newArrayContainerCapacity(maxPossibleOr)
|
||||
}
|
||||
tmpArray.realloc(maxPossibleOr)
|
||||
tmpArray.resetTo(keyContainers[0])
|
||||
ored = tmpArray
|
||||
}
|
||||
for _, c := range keyContainers[1:] {
|
||||
ored = ored.ior(c)
|
||||
}
|
||||
}
|
||||
|
||||
result := x1.highlowcontainer.getWritableContainerAtIndex(basePos).iand(ored)
|
||||
if !result.isEmpty() {
|
||||
x1.highlowcontainer.replaceKeyAndContainerAtIndex(intersections, baseKey, result, false)
|
||||
intersections++
|
||||
}
|
||||
|
||||
keyContainers = keyContainers[:0]
|
||||
basePos = x1.highlowcontainer.advanceUntil(minNextKey, basePos)
|
||||
}
|
||||
|
||||
x1.highlowcontainer.resize(intersections)
|
||||
}
|
||||
215
vendor/github.com/RoaringBitmap/roaring/v2/internal/byte_input.go
generated
vendored
Normal file
215
vendor/github.com/RoaringBitmap/roaring/v2/internal/byte_input.go
generated
vendored
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
)
|
||||
|
||||
// ByteInput typed interface around io.Reader or raw bytes
|
||||
type ByteInput interface {
|
||||
// Next returns a slice containing the next n bytes from the buffer,
|
||||
// advancing the buffer as if the bytes had been returned by Read.
|
||||
Next(n int) ([]byte, error)
|
||||
// NextReturnsSafeSlice returns true if Next() returns a safe slice as opposed
|
||||
// to a slice that points to an underlying buffer possibly owned by another system.
|
||||
// When NextReturnsSafeSlice returns false, the result from Next() should be copied
|
||||
// before it is modified (i.e., it is immutable).
|
||||
NextReturnsSafeSlice() bool
|
||||
// ReadUInt32 reads uint32 with LittleEndian order
|
||||
ReadUInt32() (uint32, error)
|
||||
// ReadUInt16 reads uint16 with LittleEndian order
|
||||
ReadUInt16() (uint16, error)
|
||||
// GetReadBytes returns read bytes
|
||||
GetReadBytes() int64
|
||||
// SkipBytes skips exactly n bytes
|
||||
SkipBytes(n int) error
|
||||
}
|
||||
|
||||
// NewByteInputFromReader creates reader wrapper
|
||||
func NewByteInputFromReader(reader io.Reader) ByteInput {
|
||||
return &ByteInputAdapter{
|
||||
r: reader,
|
||||
readBytes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// NewByteInput creates raw bytes wrapper
|
||||
func NewByteInput(buf []byte) ByteInput {
|
||||
return &ByteBuffer{
|
||||
buf: buf,
|
||||
off: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// ByteBuffer raw bytes wrapper
|
||||
type ByteBuffer struct {
|
||||
buf []byte
|
||||
off int
|
||||
}
|
||||
|
||||
// NewByteBuffer creates a new ByteBuffer.
|
||||
func NewByteBuffer(buf []byte) *ByteBuffer {
|
||||
return &ByteBuffer{
|
||||
buf: buf,
|
||||
}
|
||||
}
|
||||
|
||||
var _ io.Reader = (*ByteBuffer)(nil)
|
||||
|
||||
// Read implements io.Reader.
|
||||
func (b *ByteBuffer) Read(p []byte) (int, error) {
|
||||
data, err := b.Next(len(p))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
copy(p, data)
|
||||
return len(data), nil
|
||||
}
|
||||
|
||||
// Next returns a slice containing the next n bytes from the reader
|
||||
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
|
||||
func (b *ByteBuffer) Next(n int) ([]byte, error) {
|
||||
m := len(b.buf) - b.off
|
||||
|
||||
if n > m {
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
data := b.buf[b.off : b.off+n]
|
||||
b.off += n
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// NextReturnsSafeSlice returns false since ByteBuffer might hold
|
||||
// an array owned by some other systems.
|
||||
func (b *ByteBuffer) NextReturnsSafeSlice() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// ReadUInt32 reads uint32 with LittleEndian order
|
||||
func (b *ByteBuffer) ReadUInt32() (uint32, error) {
|
||||
if len(b.buf)-b.off < 4 {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
v := binary.LittleEndian.Uint32(b.buf[b.off:])
|
||||
b.off += 4
|
||||
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// ReadUInt16 reads uint16 with LittleEndian order
|
||||
func (b *ByteBuffer) ReadUInt16() (uint16, error) {
|
||||
if len(b.buf)-b.off < 2 {
|
||||
return 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
v := binary.LittleEndian.Uint16(b.buf[b.off:])
|
||||
b.off += 2
|
||||
|
||||
return v, nil
|
||||
}
|
||||
|
||||
// GetReadBytes returns read bytes
|
||||
func (b *ByteBuffer) GetReadBytes() int64 {
|
||||
return int64(b.off)
|
||||
}
|
||||
|
||||
// SkipBytes skips exactly n bytes
|
||||
func (b *ByteBuffer) SkipBytes(n int) error {
|
||||
m := len(b.buf) - b.off
|
||||
|
||||
if n > m {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
|
||||
b.off += n
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reset resets the given buffer with a new byte slice
|
||||
func (b *ByteBuffer) Reset(buf []byte) {
|
||||
b.buf = buf
|
||||
b.off = 0
|
||||
}
|
||||
|
||||
// ByteInputAdapter reader wrapper
|
||||
type ByteInputAdapter struct {
|
||||
r io.Reader
|
||||
readBytes int
|
||||
buf [4]byte
|
||||
}
|
||||
|
||||
var _ io.Reader = (*ByteInputAdapter)(nil)
|
||||
|
||||
// Read implements io.Reader.
|
||||
func (b *ByteInputAdapter) Read(buf []byte) (int, error) {
|
||||
m, err := io.ReadAtLeast(b.r, buf, len(buf))
|
||||
b.readBytes += m
|
||||
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Next returns a slice containing the next n bytes from the buffer,
|
||||
// advancing the buffer as if the bytes had been returned by Read.
|
||||
func (b *ByteInputAdapter) Next(n int) ([]byte, error) {
|
||||
buf := make([]byte, n)
|
||||
_, err := b.Read(buf)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// NextReturnsSafeSlice returns true since ByteInputAdapter always returns a slice
|
||||
// allocated with make([]byte, ...)
|
||||
func (b *ByteInputAdapter) NextReturnsSafeSlice() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// ReadUInt32 reads uint32 with LittleEndian order
|
||||
func (b *ByteInputAdapter) ReadUInt32() (uint32, error) {
|
||||
buf := b.buf[:4]
|
||||
_, err := b.Read(buf)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return binary.LittleEndian.Uint32(buf), nil
|
||||
}
|
||||
|
||||
// ReadUInt16 reads uint16 with LittleEndian order
|
||||
func (b *ByteInputAdapter) ReadUInt16() (uint16, error) {
|
||||
buf := b.buf[:2]
|
||||
_, err := b.Read(buf)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return binary.LittleEndian.Uint16(buf), nil
|
||||
}
|
||||
|
||||
// GetReadBytes returns read bytes
|
||||
func (b *ByteInputAdapter) GetReadBytes() int64 {
|
||||
return int64(b.readBytes)
|
||||
}
|
||||
|
||||
// SkipBytes skips exactly n bytes
|
||||
func (b *ByteInputAdapter) SkipBytes(n int) error {
|
||||
_, err := b.Next(n)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Reset resets the given buffer with a new stream
|
||||
func (b *ByteInputAdapter) Reset(stream io.Reader) {
|
||||
b.r = stream
|
||||
b.readBytes = 0
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
package internal
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
var (
|
||||
// ByteInputAdapterPool shared pool
|
||||
ByteInputAdapterPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &ByteInputAdapter{}
|
||||
},
|
||||
}
|
||||
|
||||
// ByteBufferPool shared pool
|
||||
ByteBufferPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &ByteBuffer{}
|
||||
},
|
||||
}
|
||||
)
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
package roaring
|
||||
|
||||
type manyIterable interface {
|
||||
nextMany(hs uint32, buf []uint32) int
|
||||
nextMany64(hs uint64, buf []uint64) int
|
||||
}
|
||||
|
||||
func (si *shortIterator) nextMany(hs uint32, buf []uint32) int {
|
||||
n := 0
|
||||
l := si.loc
|
||||
s := si.slice
|
||||
for n < len(buf) && l < len(s) {
|
||||
buf[n] = uint32(s[l]) | hs
|
||||
l++
|
||||
n++
|
||||
}
|
||||
si.loc = l
|
||||
return n
|
||||
}
|
||||
|
||||
func (si *shortIterator) nextMany64(hs uint64, buf []uint64) int {
|
||||
n := 0
|
||||
l := si.loc
|
||||
s := si.slice
|
||||
for n < len(buf) && l < len(s) {
|
||||
buf[n] = uint64(s[l]) | hs
|
||||
l++
|
||||
n++
|
||||
}
|
||||
si.loc = l
|
||||
return n
|
||||
}
|
||||
|
|
@ -0,0 +1,612 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sync"
|
||||
)
|
||||
|
||||
var defaultWorkerCount = runtime.NumCPU()
|
||||
|
||||
type bitmapContainerKey struct {
|
||||
key uint16
|
||||
idx int
|
||||
bitmap *Bitmap
|
||||
}
|
||||
|
||||
type multipleContainers struct {
|
||||
key uint16
|
||||
containers []container
|
||||
idx int
|
||||
}
|
||||
|
||||
type keyedContainer struct {
|
||||
key uint16
|
||||
container container
|
||||
idx int
|
||||
}
|
||||
|
||||
type bitmapContainerHeap []bitmapContainerKey
|
||||
|
||||
func (h bitmapContainerHeap) Len() int { return len(h) }
|
||||
func (h bitmapContainerHeap) Less(i, j int) bool { return h[i].key < h[j].key }
|
||||
func (h bitmapContainerHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||
|
||||
func (h *bitmapContainerHeap) Push(x interface{}) {
|
||||
// Push and Pop use pointer receivers because they modify the slice's length,
|
||||
// not just its contents.
|
||||
*h = append(*h, x.(bitmapContainerKey))
|
||||
}
|
||||
|
||||
func (h *bitmapContainerHeap) Pop() interface{} {
|
||||
old := *h
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
*h = old[0 : n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
func (h bitmapContainerHeap) Peek() bitmapContainerKey {
|
||||
return h[0]
|
||||
}
|
||||
|
||||
func (h *bitmapContainerHeap) popIncrementing() (key uint16, container container) {
|
||||
k := h.Peek()
|
||||
key = k.key
|
||||
container = k.bitmap.highlowcontainer.containers[k.idx]
|
||||
|
||||
newIdx := k.idx + 1
|
||||
if newIdx < k.bitmap.highlowcontainer.size() {
|
||||
k = bitmapContainerKey{
|
||||
k.bitmap.highlowcontainer.keys[newIdx],
|
||||
newIdx,
|
||||
k.bitmap,
|
||||
}
|
||||
(*h)[0] = k
|
||||
heap.Fix(h, 0)
|
||||
} else {
|
||||
heap.Pop(h)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (h *bitmapContainerHeap) Next(containers []container) multipleContainers {
|
||||
if h.Len() == 0 {
|
||||
return multipleContainers{}
|
||||
}
|
||||
|
||||
key, container := h.popIncrementing()
|
||||
containers = append(containers, container)
|
||||
|
||||
for h.Len() > 0 && key == h.Peek().key {
|
||||
_, container = h.popIncrementing()
|
||||
containers = append(containers, container)
|
||||
}
|
||||
|
||||
return multipleContainers{
|
||||
key,
|
||||
containers,
|
||||
-1,
|
||||
}
|
||||
}
|
||||
|
||||
func newBitmapContainerHeap(bitmaps ...*Bitmap) bitmapContainerHeap {
|
||||
// Initialize heap
|
||||
var h bitmapContainerHeap = make([]bitmapContainerKey, 0, len(bitmaps))
|
||||
for _, bitmap := range bitmaps {
|
||||
if !bitmap.IsEmpty() {
|
||||
key := bitmapContainerKey{
|
||||
bitmap.highlowcontainer.keys[0],
|
||||
0,
|
||||
bitmap,
|
||||
}
|
||||
h = append(h, key)
|
||||
}
|
||||
}
|
||||
|
||||
heap.Init(&h)
|
||||
|
||||
return h
|
||||
}
|
||||
|
||||
func repairAfterLazy(c container) container {
|
||||
switch t := c.(type) {
|
||||
case *bitmapContainer:
|
||||
if t.cardinality == invalidCardinality {
|
||||
t.computeCardinality()
|
||||
}
|
||||
|
||||
if t.getCardinality() <= arrayDefaultMaxSize {
|
||||
return t.toArrayContainer()
|
||||
} else if c.(*bitmapContainer).isFull() {
|
||||
return newRunContainer16Range(0, MaxUint16)
|
||||
}
|
||||
}
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
func toBitmapContainer(c container) container {
|
||||
switch t := c.(type) {
|
||||
case *arrayContainer:
|
||||
return t.toBitmapContainer()
|
||||
case *runContainer16:
|
||||
if !t.isFull() {
|
||||
return t.toBitmapContainer()
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
func appenderRoutine(bitmapChan chan<- *Bitmap, resultChan <-chan keyedContainer, expectedKeysChan <-chan int) {
|
||||
expectedKeys := -1
|
||||
appendedKeys := 0
|
||||
var keys []uint16
|
||||
var containers []container
|
||||
for appendedKeys != expectedKeys {
|
||||
select {
|
||||
case item := <-resultChan:
|
||||
if len(keys) <= item.idx {
|
||||
keys = append(keys, make([]uint16, item.idx-len(keys)+1)...)
|
||||
containers = append(containers, make([]container, item.idx-len(containers)+1)...)
|
||||
}
|
||||
keys[item.idx] = item.key
|
||||
containers[item.idx] = item.container
|
||||
|
||||
appendedKeys++
|
||||
case msg := <-expectedKeysChan:
|
||||
expectedKeys = msg
|
||||
}
|
||||
}
|
||||
answer := &Bitmap{
|
||||
roaringArray{
|
||||
make([]uint16, 0, expectedKeys),
|
||||
make([]container, 0, expectedKeys),
|
||||
make([]bool, 0, expectedKeys),
|
||||
false,
|
||||
},
|
||||
}
|
||||
for i := range keys {
|
||||
if containers[i] != nil { // in case a resulting container was empty, see ParAnd function
|
||||
answer.highlowcontainer.appendContainer(keys[i], containers[i], false)
|
||||
}
|
||||
}
|
||||
|
||||
bitmapChan <- answer
|
||||
}
|
||||
|
||||
// ParHeapOr computes the union (OR) of all provided bitmaps in parallel,
|
||||
// where the parameter "parallelism" determines how many workers are to be used
|
||||
// (if it is set to 0, a default number of workers is chosen)
|
||||
// ParHeapOr uses a heap to compute the union. For rare cases it might be faster than ParOr
|
||||
func ParHeapOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
|
||||
bitmapCount := len(bitmaps)
|
||||
if bitmapCount == 0 {
|
||||
return NewBitmap()
|
||||
} else if bitmapCount == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
|
||||
if parallelism == 0 {
|
||||
parallelism = defaultWorkerCount
|
||||
}
|
||||
|
||||
h := newBitmapContainerHeap(bitmaps...)
|
||||
|
||||
bitmapChan := make(chan *Bitmap)
|
||||
inputChan := make(chan multipleContainers, 128)
|
||||
resultChan := make(chan keyedContainer, 32)
|
||||
expectedKeysChan := make(chan int)
|
||||
|
||||
pool := sync.Pool{
|
||||
New: func() interface{} {
|
||||
return make([]container, 0, len(bitmaps))
|
||||
},
|
||||
}
|
||||
|
||||
orFunc := func() {
|
||||
// Assumes only structs with >=2 containers are passed
|
||||
for input := range inputChan {
|
||||
c := toBitmapContainer(input.containers[0]).lazyOR(input.containers[1])
|
||||
for _, next := range input.containers[2:] {
|
||||
c = c.lazyIOR(next)
|
||||
}
|
||||
c = repairAfterLazy(c)
|
||||
kx := keyedContainer{
|
||||
input.key,
|
||||
c,
|
||||
input.idx,
|
||||
}
|
||||
resultChan <- kx
|
||||
pool.Put(input.containers[:0])
|
||||
}
|
||||
}
|
||||
|
||||
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
|
||||
|
||||
for i := 0; i < parallelism; i++ {
|
||||
go orFunc()
|
||||
}
|
||||
|
||||
idx := 0
|
||||
for h.Len() > 0 {
|
||||
ck := h.Next(pool.Get().([]container))
|
||||
if len(ck.containers) == 1 {
|
||||
resultChan <- keyedContainer{
|
||||
ck.key,
|
||||
ck.containers[0],
|
||||
idx,
|
||||
}
|
||||
pool.Put(ck.containers[:0])
|
||||
} else {
|
||||
ck.idx = idx
|
||||
inputChan <- ck
|
||||
}
|
||||
idx++
|
||||
}
|
||||
expectedKeysChan <- idx
|
||||
|
||||
bitmap := <-bitmapChan
|
||||
|
||||
close(inputChan)
|
||||
close(resultChan)
|
||||
close(expectedKeysChan)
|
||||
|
||||
return bitmap
|
||||
}
|
||||
|
||||
// ParAnd computes the intersection (AND) of all provided bitmaps in parallel,
|
||||
// where the parameter "parallelism" determines how many workers are to be used
|
||||
// (if it is set to 0, a default number of workers is chosen)
|
||||
func ParAnd(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
bitmapCount := len(bitmaps)
|
||||
if bitmapCount == 0 {
|
||||
return NewBitmap()
|
||||
} else if bitmapCount == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
|
||||
if parallelism == 0 {
|
||||
parallelism = defaultWorkerCount
|
||||
}
|
||||
|
||||
h := newBitmapContainerHeap(bitmaps...)
|
||||
|
||||
bitmapChan := make(chan *Bitmap)
|
||||
inputChan := make(chan multipleContainers, 128)
|
||||
resultChan := make(chan keyedContainer, 32)
|
||||
expectedKeysChan := make(chan int)
|
||||
|
||||
andFunc := func() {
|
||||
// Assumes only structs with >=2 containers are passed
|
||||
for input := range inputChan {
|
||||
c := input.containers[0].and(input.containers[1])
|
||||
for _, next := range input.containers[2:] {
|
||||
if c.isEmpty() {
|
||||
break
|
||||
}
|
||||
c = c.iand(next)
|
||||
}
|
||||
|
||||
// Send a nil explicitly if the result of the intersection is an empty container
|
||||
if c.isEmpty() {
|
||||
c = nil
|
||||
}
|
||||
|
||||
kx := keyedContainer{
|
||||
input.key,
|
||||
c,
|
||||
input.idx,
|
||||
}
|
||||
resultChan <- kx
|
||||
}
|
||||
}
|
||||
|
||||
go appenderRoutine(bitmapChan, resultChan, expectedKeysChan)
|
||||
|
||||
for i := 0; i < parallelism; i++ {
|
||||
go andFunc()
|
||||
}
|
||||
|
||||
idx := 0
|
||||
for h.Len() > 0 {
|
||||
ck := h.Next(make([]container, 0, 4))
|
||||
if len(ck.containers) == bitmapCount {
|
||||
ck.idx = idx
|
||||
inputChan <- ck
|
||||
idx++
|
||||
}
|
||||
}
|
||||
expectedKeysChan <- idx
|
||||
|
||||
bitmap := <-bitmapChan
|
||||
|
||||
close(inputChan)
|
||||
close(resultChan)
|
||||
close(expectedKeysChan)
|
||||
|
||||
return bitmap
|
||||
}
|
||||
|
||||
// ParOr computes the union (OR) of all provided bitmaps in parallel,
|
||||
// where the parameter "parallelism" determines how many workers are to be used
|
||||
// (if it is set to 0, a default number of workers is chosen)
|
||||
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
var lKey uint16 = MaxUint16
|
||||
var hKey uint16
|
||||
|
||||
bitmapsFiltered := bitmaps[:0]
|
||||
for _, b := range bitmaps {
|
||||
if !b.IsEmpty() {
|
||||
bitmapsFiltered = append(bitmapsFiltered, b)
|
||||
}
|
||||
}
|
||||
bitmaps = bitmapsFiltered
|
||||
|
||||
for _, b := range bitmaps {
|
||||
lKey = minOfUint16(lKey, b.highlowcontainer.keys[0])
|
||||
hKey = maxOfUint16(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
|
||||
}
|
||||
|
||||
if lKey == MaxUint16 && hKey == 0 {
|
||||
return New()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
|
||||
keyRange := int(hKey) - int(lKey) + 1
|
||||
if keyRange == 1 {
|
||||
// revert to FastOr. Since the key range is 0
|
||||
// no container-level aggregation parallelism is achievable
|
||||
return FastOr(bitmaps...)
|
||||
}
|
||||
|
||||
if parallelism == 0 {
|
||||
parallelism = defaultWorkerCount
|
||||
}
|
||||
|
||||
var chunkSize int
|
||||
var chunkCount int
|
||||
if parallelism*4 > int(keyRange) {
|
||||
chunkSize = 1
|
||||
chunkCount = int(keyRange)
|
||||
} else {
|
||||
chunkCount = parallelism * 4
|
||||
chunkSize = (int(keyRange) + chunkCount - 1) / chunkCount
|
||||
}
|
||||
|
||||
if chunkCount*chunkSize < int(keyRange) {
|
||||
// it's fine to panic to indicate an implementation error
|
||||
panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
|
||||
}
|
||||
|
||||
chunks := make([]*roaringArray, chunkCount)
|
||||
|
||||
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
|
||||
chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
|
||||
|
||||
orFunc := func() {
|
||||
for spec := range chunkSpecChan {
|
||||
ra := lazyOrOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
|
||||
for _, b := range bitmaps[2:] {
|
||||
ra = lazyIOrOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
|
||||
}
|
||||
|
||||
for i, c := range ra.containers {
|
||||
ra.containers[i] = repairAfterLazy(c)
|
||||
}
|
||||
|
||||
chunkChan <- parChunk{ra, spec.idx}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < parallelism; i++ {
|
||||
go orFunc()
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := 0; i < chunkCount; i++ {
|
||||
spec := parChunkSpec{
|
||||
start: uint16(int(lKey) + i*chunkSize),
|
||||
end: uint16(minOfInt(int(lKey)+(i+1)*chunkSize-1, int(hKey))),
|
||||
idx: int(i),
|
||||
}
|
||||
chunkSpecChan <- spec
|
||||
}
|
||||
}()
|
||||
|
||||
chunksRemaining := chunkCount
|
||||
for chunk := range chunkChan {
|
||||
chunks[chunk.idx] = chunk.ra
|
||||
chunksRemaining--
|
||||
if chunksRemaining == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
close(chunkChan)
|
||||
close(chunkSpecChan)
|
||||
|
||||
containerCount := 0
|
||||
for _, chunk := range chunks {
|
||||
containerCount += chunk.size()
|
||||
}
|
||||
|
||||
result := Bitmap{
|
||||
roaringArray{
|
||||
containers: make([]container, containerCount),
|
||||
keys: make([]uint16, containerCount),
|
||||
needCopyOnWrite: make([]bool, containerCount),
|
||||
},
|
||||
}
|
||||
|
||||
resultOffset := 0
|
||||
for _, chunk := range chunks {
|
||||
copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
|
||||
copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
|
||||
copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
|
||||
resultOffset += chunk.size()
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
type parChunkSpec struct {
|
||||
start uint16
|
||||
end uint16
|
||||
idx int
|
||||
}
|
||||
|
||||
type parChunk struct {
|
||||
ra *roaringArray
|
||||
idx int
|
||||
}
|
||||
|
||||
func (c parChunk) size() int {
|
||||
return c.ra.size()
|
||||
}
|
||||
|
||||
func parNaiveStartAt(ra *roaringArray, start uint16, last uint16) int {
|
||||
for idx, key := range ra.keys {
|
||||
if key >= start && key <= last {
|
||||
return idx
|
||||
} else if key > last {
|
||||
break
|
||||
}
|
||||
}
|
||||
return ra.size()
|
||||
}
|
||||
|
||||
func lazyOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
|
||||
answer := newRoaringArray()
|
||||
length1 := ra1.size()
|
||||
length2 := ra2.size()
|
||||
|
||||
idx1 := parNaiveStartAt(ra1, start, last)
|
||||
idx2 := parNaiveStartAt(ra2, start, last)
|
||||
|
||||
var key1 uint16
|
||||
var key2 uint16
|
||||
if idx1 < length1 && idx2 < length2 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
|
||||
for key1 <= last && key2 <= last {
|
||||
|
||||
if key1 < key2 {
|
||||
answer.appendCopy(*ra1, idx1)
|
||||
idx1++
|
||||
if idx1 == length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
} else if key1 > key2 {
|
||||
answer.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 == length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
} else {
|
||||
c1 := ra1.getFastContainerAtIndex(idx1, false)
|
||||
|
||||
answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
|
||||
idx1++
|
||||
idx2++
|
||||
if idx1 == length1 || idx2 == length2 {
|
||||
break
|
||||
}
|
||||
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if idx2 < length2 {
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
for key2 <= last {
|
||||
answer.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 == length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
|
||||
if idx1 < length1 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
for key1 <= last {
|
||||
answer.appendCopy(*ra1, idx1)
|
||||
idx1++
|
||||
if idx1 == length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
}
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func lazyIOrOnRange(ra1, ra2 *roaringArray, start, last uint16) *roaringArray {
|
||||
length1 := ra1.size()
|
||||
length2 := ra2.size()
|
||||
|
||||
idx1 := 0
|
||||
idx2 := parNaiveStartAt(ra2, start, last)
|
||||
|
||||
var key1 uint16
|
||||
var key2 uint16
|
||||
if idx1 < length1 && idx2 < length2 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
|
||||
for key1 <= last && key2 <= last {
|
||||
if key1 < key2 {
|
||||
idx1++
|
||||
if idx1 >= length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
} else if key1 > key2 {
|
||||
ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
|
||||
ra1.needCopyOnWrite[idx1] = true
|
||||
idx2++
|
||||
idx1++
|
||||
length1++
|
||||
if idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
} else {
|
||||
c1 := ra1.getFastContainerAtIndex(idx1, true)
|
||||
|
||||
ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
|
||||
ra1.needCopyOnWrite[idx1] = false
|
||||
idx1++
|
||||
idx2++
|
||||
if idx1 >= length1 || idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
}
|
||||
if idx2 < length2 {
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
for key2 <= last {
|
||||
ra1.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
return ra1
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
//go:build go1.9
|
||||
// +build go1.9
|
||||
|
||||
// "go1.9", from Go version 1.9 onward
|
||||
// See https://golang.org/pkg/go/build/#hdr-Build_Constraints
|
||||
|
||||
package roaring
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func popcount(x uint64) uint64 {
|
||||
return uint64(bits.OnesCount64(x))
|
||||
}
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
// +build amd64,!appengine,!go1.9
|
||||
|
||||
TEXT ·hasAsm(SB),4,$0-1
|
||||
MOVQ $1, AX
|
||||
CPUID
|
||||
SHRQ $23, CX
|
||||
ANDQ $1, CX
|
||||
MOVB CX, ret+0(FP)
|
||||
RET
|
||||
|
||||
#define POPCNTQ_DX_DX BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0xd2
|
||||
|
||||
TEXT ·popcntSliceAsm(SB),4,$0-32
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntSliceEnd
|
||||
popcntSliceLoop:
|
||||
BYTE $0xf3; BYTE $0x48; BYTE $0x0f; BYTE $0xb8; BYTE $0x16 // POPCNTQ (SI), DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
LOOP popcntSliceLoop
|
||||
popcntSliceEnd:
|
||||
MOVQ AX, ret+24(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntMaskSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntMaskSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntMaskSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
NOTQ DX
|
||||
ANDQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntMaskSliceLoop
|
||||
popcntMaskSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntAndSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntAndSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntAndSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
ANDQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntAndSliceLoop
|
||||
popcntAndSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntOrSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntOrSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntOrSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
ORQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntOrSliceLoop
|
||||
popcntOrSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
||||
TEXT ·popcntXorSliceAsm(SB),4,$0-56
|
||||
XORQ AX, AX
|
||||
MOVQ s+0(FP), SI
|
||||
MOVQ s_len+8(FP), CX
|
||||
TESTQ CX, CX
|
||||
JZ popcntXorSliceEnd
|
||||
MOVQ m+24(FP), DI
|
||||
popcntXorSliceLoop:
|
||||
MOVQ (DI), DX
|
||||
XORQ (SI), DX
|
||||
POPCNTQ_DX_DX
|
||||
ADDQ DX, AX
|
||||
ADDQ $8, SI
|
||||
ADDQ $8, DI
|
||||
LOOP popcntXorSliceLoop
|
||||
popcntXorSliceEnd:
|
||||
MOVQ AX, ret+48(FP)
|
||||
RET
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
//go:build amd64 && !appengine && !go1.9
|
||||
// +build amd64,!appengine,!go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
// *** the following functions are defined in popcnt_amd64.s
|
||||
|
||||
//go:noescape
|
||||
|
||||
func hasAsm() bool
|
||||
|
||||
// useAsm is a flag used to select the GO or ASM implementation of the popcnt function
|
||||
var useAsm = hasAsm()
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntSliceAsm(s []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntMaskSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntAndSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntOrSliceAsm(s, m []uint64) uint64
|
||||
|
||||
//go:noescape
|
||||
|
||||
func popcntXorSliceAsm(s, m []uint64) uint64
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntSliceAsm(s)
|
||||
}
|
||||
return popcntSliceGo(s)
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntMaskSliceAsm(s, m)
|
||||
}
|
||||
return popcntMaskSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntAndSliceAsm(s, m)
|
||||
}
|
||||
return popcntAndSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntOrSliceAsm(s, m)
|
||||
}
|
||||
return popcntOrSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
if useAsm {
|
||||
return popcntXorSliceAsm(s, m)
|
||||
}
|
||||
return popcntXorSliceGo(s, m)
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
//go:build !go1.9
|
||||
// +build !go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
// bit population count, take from
|
||||
// https://code.google.com/p/go/issues/detail?id=4988#c11
|
||||
// credit: https://code.google.com/u/arnehormann/
|
||||
// credit: https://play.golang.org/p/U7SogJ7psJ
|
||||
// credit: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
func popcount(x uint64) uint64 {
|
||||
x -= (x >> 1) & 0x5555555555555555
|
||||
x = (x>>2)&0x3333333333333333 + x&0x3333333333333333
|
||||
x += x >> 4
|
||||
x &= 0x0f0f0f0f0f0f0f0f
|
||||
x *= 0x0101010101010101
|
||||
return x >> 56
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
//go:build !amd64 || appengine || go1.9
|
||||
// +build !amd64 appengine go1.9
|
||||
|
||||
package roaring
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
return popcntSliceGo(s)
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
return popcntMaskSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
return popcntAndSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
return popcntOrSliceGo(s, m)
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
return popcntXorSliceGo(s, m)
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
package roaring
|
||||
|
||||
func popcntSliceGo(s []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for _, x := range s {
|
||||
cnt += popcount(x)
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
||||
func popcntMaskSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] &^ m[i])
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
||||
func popcntAndSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] & m[i])
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
||||
func popcntOrSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] | m[i])
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
||||
func popcntXorSliceGo(s, m []uint64) uint64 {
|
||||
cnt := uint64(0)
|
||||
for i := range s {
|
||||
cnt += popcount(s[i] ^ m[i])
|
||||
}
|
||||
return cnt
|
||||
}
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
package roaring
|
||||
|
||||
import "container/heap"
|
||||
|
||||
/////////////
|
||||
// The priorityQueue is used to keep Bitmaps sorted.
|
||||
////////////
|
||||
|
||||
type item struct {
|
||||
value *Bitmap
|
||||
index int
|
||||
}
|
||||
|
||||
type priorityQueue []*item
|
||||
|
||||
func (pq priorityQueue) Len() int { return len(pq) }
|
||||
|
||||
func (pq priorityQueue) Less(i, j int) bool {
|
||||
return pq[i].value.GetSizeInBytes() < pq[j].value.GetSizeInBytes()
|
||||
}
|
||||
|
||||
func (pq priorityQueue) Swap(i, j int) {
|
||||
pq[i], pq[j] = pq[j], pq[i]
|
||||
pq[i].index = i
|
||||
pq[j].index = j
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) Push(x interface{}) {
|
||||
n := len(*pq)
|
||||
item := x.(*item)
|
||||
item.index = n
|
||||
*pq = append(*pq, item)
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) Pop() interface{} {
|
||||
old := *pq
|
||||
n := len(old)
|
||||
item := old[n-1]
|
||||
item.index = -1 // for safety
|
||||
*pq = old[0 : n-1]
|
||||
return item
|
||||
}
|
||||
|
||||
func (pq *priorityQueue) update(item *item, value *Bitmap) {
|
||||
item.value = value
|
||||
heap.Fix(pq, item.index)
|
||||
}
|
||||
|
||||
/////////////
|
||||
// The containerPriorityQueue is used to keep the containers of various Bitmaps sorted.
|
||||
////////////
|
||||
|
||||
type containeritem struct {
|
||||
value *Bitmap
|
||||
keyindex int
|
||||
index int
|
||||
}
|
||||
|
||||
type containerPriorityQueue []*containeritem
|
||||
|
||||
func (pq containerPriorityQueue) Len() int { return len(pq) }
|
||||
|
||||
func (pq containerPriorityQueue) Less(i, j int) bool {
|
||||
k1 := pq[i].value.highlowcontainer.getKeyAtIndex(pq[i].keyindex)
|
||||
k2 := pq[j].value.highlowcontainer.getKeyAtIndex(pq[j].keyindex)
|
||||
if k1 != k2 {
|
||||
return k1 < k2
|
||||
}
|
||||
c1 := pq[i].value.highlowcontainer.getContainerAtIndex(pq[i].keyindex)
|
||||
c2 := pq[j].value.highlowcontainer.getContainerAtIndex(pq[j].keyindex)
|
||||
|
||||
return c1.getCardinality() > c2.getCardinality()
|
||||
}
|
||||
|
||||
func (pq containerPriorityQueue) Swap(i, j int) {
|
||||
pq[i], pq[j] = pq[j], pq[i]
|
||||
pq[i].index = i
|
||||
pq[j].index = j
|
||||
}
|
||||
|
||||
func (pq *containerPriorityQueue) Push(x interface{}) {
|
||||
n := len(*pq)
|
||||
item := x.(*containeritem)
|
||||
item.index = n
|
||||
*pq = append(*pq, item)
|
||||
}
|
||||
|
||||
func (pq *containerPriorityQueue) Pop() interface{} {
|
||||
old := *pq
|
||||
n := len(old)
|
||||
item := old[n-1]
|
||||
item.index = -1 // for safety
|
||||
*pq = old[0 : n-1]
|
||||
return item
|
||||
}
|
||||
|
||||
//func (pq *containerPriorityQueue) update(item *containeritem, value *Bitmap, keyindex int) {
|
||||
// item.value = value
|
||||
// item.keyindex = keyindex
|
||||
// heap.Fix(pq, item.index)
|
||||
//}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,106 @@
|
|||
.PHONY: help all test format fmtcheck vet lint qa deps clean nuke ser fetch-real-roaring-datasets
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Display general help about this command
|
||||
help:
|
||||
@echo ""
|
||||
@echo "The following commands are available:"
|
||||
@echo ""
|
||||
@echo " make qa : Run all the tests"
|
||||
@echo " make test : Run the unit tests"
|
||||
@echo ""
|
||||
@echo " make format : Format the source code"
|
||||
@echo " make fmtcheck : Check if the source code has been formatted"
|
||||
@echo " make vet : Check for suspicious constructs"
|
||||
@echo " make lint : Check for style errors"
|
||||
@echo ""
|
||||
@echo " make deps : Get the dependencies"
|
||||
@echo " make clean : Remove any build artifact"
|
||||
@echo " make nuke : Deletes any intermediate file"
|
||||
@echo ""
|
||||
@echo " make fuzz-smat : Fuzzy testing with smat"
|
||||
@echo " make fuzz-stream : Fuzzy testing with stream deserialization"
|
||||
@echo " make fuzz-buffer : Fuzzy testing with buffer deserialization"
|
||||
@echo ""
|
||||
|
||||
# Alias for help target
|
||||
all: help
|
||||
test:
|
||||
go test
|
||||
# Format the source code
|
||||
format:
|
||||
@find ./ -type f -name "*.go" -exec gofmt -w {} \;
|
||||
|
||||
# Check if the source code has been formatted
|
||||
fmtcheck:
|
||||
@mkdir -p target
|
||||
@find ./ -type f -name "*.go" -exec gofmt -d {} \; | tee target/format.diff
|
||||
@test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
|
||||
|
||||
# Check for syntax errors
|
||||
vet:
|
||||
GOPATH=$(GOPATH) go vet ./...
|
||||
|
||||
# Check for style errors
|
||||
lint:
|
||||
GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint ./...
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Alias to run all quality-assurance checks
|
||||
qa: fmtcheck test vet lint
|
||||
|
||||
# --- INSTALL ---
|
||||
|
||||
# Get the dependencies
|
||||
deps:
|
||||
GOPATH=$(GOPATH) go get github.com/stretchr/testify
|
||||
GOPATH=$(GOPATH) go get github.com/bits-and-blooms/bitset
|
||||
GOPATH=$(GOPATH) go get github.com/golang/lint/golint
|
||||
GOPATH=$(GOPATH) go get github.com/mschoch/smat
|
||||
GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||
GOPATH=$(GOPATH) go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||
GOPATH=$(GOPATH) go get github.com/glycerine/go-unsnap-stream
|
||||
GOPATH=$(GOPATH) go get github.com/philhofer/fwd
|
||||
GOPATH=$(GOPATH) go get github.com/jtolds/gls
|
||||
|
||||
fuzz-smat:
|
||||
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
|
||||
|
||||
fuzz-stream:
|
||||
go-fuzz-build -func FuzzSerializationStream github.com/RoaringBitmap/roaring
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
|
||||
fuzz-buffer:
|
||||
go-fuzz-build -func FuzzSerializationBuffer github.com/RoaringBitmap/roaring
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
|
||||
# Remove any build artifact
|
||||
clean:
|
||||
GOPATH=$(GOPATH) go clean ./...
|
||||
|
||||
# Deletes any intermediate file
|
||||
nuke:
|
||||
rm -rf ./target
|
||||
GOPATH=$(GOPATH) go clean -i ./...
|
||||
|
||||
|
||||
cover:
|
||||
go test -coverprofile=coverage.out
|
||||
go tool cover -html=coverage.out
|
||||
|
||||
fetch-real-roaring-datasets:
|
||||
# pull github.com/RoaringBitmap/real-roaring-datasets -> testdata/real-roaring-datasets
|
||||
git submodule init
|
||||
git submodule update
|
||||
File diff suppressed because it is too large
Load Diff
31
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/fastaggregation64.go
generated
vendored
Normal file
31
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/fastaggregation64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
package roaring64
|
||||
|
||||
// FastAnd computes the intersection between many bitmaps quickly
|
||||
// Compared to the And function, it can take many bitmaps as input, thus saving the trouble
|
||||
// of manually calling "And" many times.
|
||||
func FastAnd(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
answer := And(bitmaps[0], bitmaps[1])
|
||||
for _, bm := range bitmaps[2:] {
|
||||
answer.And(bm)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
// FastOr computes the union between many bitmaps quickly, as opposed to having to call Or repeatedly.
|
||||
func FastOr(bitmaps ...*Bitmap) *Bitmap {
|
||||
if len(bitmaps) == 0 {
|
||||
return NewBitmap()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0].Clone()
|
||||
}
|
||||
answer := Or(bitmaps[0], bitmaps[1])
|
||||
for _, bm := range bitmaps[2:] {
|
||||
answer.Or(bm)
|
||||
}
|
||||
return answer
|
||||
}
|
||||
169
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/iterables64.go
generated
vendored
Normal file
169
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/iterables64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
package roaring64
|
||||
|
||||
import (
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
)
|
||||
|
||||
// IntIterable64 allows you to iterate over the values in a Bitmap
|
||||
type IntIterable64 interface {
|
||||
HasNext() bool
|
||||
Next() uint64
|
||||
}
|
||||
|
||||
// IntPeekable64 allows you to look at the next value without advancing and
|
||||
// advance as long as the next value is smaller than minval
|
||||
type IntPeekable64 interface {
|
||||
IntIterable64
|
||||
// PeekNext peeks the next value without advancing the iterator
|
||||
PeekNext() uint64
|
||||
// AdvanceIfNeeded advances as long as the next value is smaller than minval
|
||||
AdvanceIfNeeded(minval uint64)
|
||||
}
|
||||
|
||||
type intIterator struct {
|
||||
pos int
|
||||
hs uint64
|
||||
iter roaring.IntPeekable
|
||||
highlowcontainer *roaringArray64
|
||||
}
|
||||
|
||||
// HasNext returns true if there are more integers to iterate over
|
||||
func (ii *intIterator) HasNext() bool {
|
||||
return ii.pos < ii.highlowcontainer.size()
|
||||
}
|
||||
|
||||
func (ii *intIterator) init() {
|
||||
if ii.highlowcontainer.size() > ii.pos {
|
||||
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).Iterator()
|
||||
ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
|
||||
}
|
||||
}
|
||||
|
||||
// Next returns the next integer
|
||||
func (ii *intIterator) Next() uint64 {
|
||||
lowbits := ii.iter.Next()
|
||||
x := uint64(lowbits) | ii.hs
|
||||
if !ii.iter.HasNext() {
|
||||
ii.pos = ii.pos + 1
|
||||
ii.init()
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
// PeekNext peeks the next value without advancing the iterator
|
||||
func (ii *intIterator) PeekNext() uint64 {
|
||||
return uint64(ii.iter.PeekNext()&maxLowBit) | ii.hs
|
||||
}
|
||||
|
||||
// AdvanceIfNeeded advances as long as the next value is smaller than minval
|
||||
func (ii *intIterator) AdvanceIfNeeded(minval uint64) {
|
||||
to := minval >> 32
|
||||
|
||||
for ii.HasNext() && (ii.hs>>32) < to {
|
||||
ii.pos++
|
||||
ii.init()
|
||||
}
|
||||
|
||||
if ii.HasNext() && (ii.hs>>32) == to {
|
||||
ii.iter.AdvanceIfNeeded(lowbits(minval))
|
||||
|
||||
if !ii.iter.HasNext() {
|
||||
ii.pos++
|
||||
ii.init()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newIntIterator(a *Bitmap) *intIterator {
|
||||
p := new(intIterator)
|
||||
p.pos = 0
|
||||
p.highlowcontainer = &a.highlowcontainer
|
||||
p.init()
|
||||
return p
|
||||
}
|
||||
|
||||
type intReverseIterator struct {
|
||||
pos int
|
||||
hs uint64
|
||||
iter roaring.IntIterable
|
||||
highlowcontainer *roaringArray64
|
||||
}
|
||||
|
||||
// HasNext returns true if there are more integers to iterate over
|
||||
func (ii *intReverseIterator) HasNext() bool {
|
||||
return ii.pos >= 0
|
||||
}
|
||||
|
||||
func (ii *intReverseIterator) init() {
|
||||
if ii.pos >= 0 {
|
||||
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).ReverseIterator()
|
||||
ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
|
||||
} else {
|
||||
ii.iter = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Next returns the next integer
|
||||
func (ii *intReverseIterator) Next() uint64 {
|
||||
x := uint64(ii.iter.Next()) | ii.hs
|
||||
if !ii.iter.HasNext() {
|
||||
ii.pos = ii.pos - 1
|
||||
ii.init()
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func newIntReverseIterator(a *Bitmap) *intReverseIterator {
|
||||
p := new(intReverseIterator)
|
||||
p.highlowcontainer = &a.highlowcontainer
|
||||
p.pos = a.highlowcontainer.size() - 1
|
||||
p.init()
|
||||
return p
|
||||
}
|
||||
|
||||
// ManyIntIterable64 allows you to iterate over the values in a Bitmap
|
||||
type ManyIntIterable64 interface {
|
||||
// pass in a buffer to fill up with values, returns how many values were returned
|
||||
NextMany([]uint64) int
|
||||
}
|
||||
|
||||
type manyIntIterator struct {
|
||||
pos int
|
||||
hs uint64
|
||||
iter roaring.ManyIntIterable
|
||||
highlowcontainer *roaringArray64
|
||||
}
|
||||
|
||||
func (ii *manyIntIterator) init() {
|
||||
if ii.highlowcontainer.size() > ii.pos {
|
||||
ii.iter = ii.highlowcontainer.getContainerAtIndex(ii.pos).ManyIterator()
|
||||
ii.hs = uint64(ii.highlowcontainer.getKeyAtIndex(ii.pos)) << 32
|
||||
} else {
|
||||
ii.iter = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (ii *manyIntIterator) NextMany(buf []uint64) int {
|
||||
n := 0
|
||||
for n < len(buf) {
|
||||
if ii.iter == nil {
|
||||
break
|
||||
}
|
||||
moreN := ii.iter.NextMany64(ii.hs, buf[n:])
|
||||
n += moreN
|
||||
if moreN == 0 {
|
||||
ii.pos = ii.pos + 1
|
||||
ii.init()
|
||||
}
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func newManyIntIterator(a *Bitmap) *manyIntIterator {
|
||||
p := new(manyIntIterator)
|
||||
p.pos = 0
|
||||
p.highlowcontainer = &a.highlowcontainer
|
||||
p.init()
|
||||
return p
|
||||
}
|
||||
293
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/parallel64.go
generated
vendored
Normal file
293
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/parallel64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
package roaring64
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
)
|
||||
|
||||
var defaultWorkerCount = runtime.NumCPU()
|
||||
|
||||
// ParOr computes the union (OR) of all provided bitmaps in parallel,
|
||||
// where the parameter "parallelism" determines how many workers are to be used
|
||||
// (if it is set to 0, a default number of workers is chosen)
|
||||
func ParOr(parallelism int, bitmaps ...*Bitmap) *Bitmap {
|
||||
var lKey uint32 = maxUint32
|
||||
var hKey uint32
|
||||
|
||||
bitmapsFiltered := bitmaps[:0]
|
||||
for _, b := range bitmaps {
|
||||
if !b.IsEmpty() {
|
||||
bitmapsFiltered = append(bitmapsFiltered, b)
|
||||
}
|
||||
}
|
||||
bitmaps = bitmapsFiltered
|
||||
|
||||
for _, b := range bitmaps {
|
||||
lKey = minOfUint32(lKey, b.highlowcontainer.keys[0])
|
||||
hKey = maxOfUint32(hKey, b.highlowcontainer.keys[b.highlowcontainer.size()-1])
|
||||
}
|
||||
|
||||
if lKey == maxUint32 && hKey == 0 {
|
||||
return New()
|
||||
} else if len(bitmaps) == 1 {
|
||||
return bitmaps[0]
|
||||
}
|
||||
// The following might overflow and we do not want that!
|
||||
// as it might lead to a channel of size 0 later which,
|
||||
// on some systems, would block indefinitely.
|
||||
keyRange := uint64(hKey) - uint64(lKey) + 1
|
||||
if keyRange == 1 {
|
||||
// revert to FastOr. Since the key range is 0
|
||||
// no container-level aggregation parallelism is achievable
|
||||
return FastOr(bitmaps...)
|
||||
}
|
||||
|
||||
if parallelism == 0 {
|
||||
parallelism = defaultWorkerCount
|
||||
}
|
||||
// We cannot use int since int is 32-bit on 32-bit systems.
|
||||
var chunkSize int64
|
||||
var chunkCount int64
|
||||
if int64(parallelism)*4 > int64(keyRange) {
|
||||
chunkSize = 1
|
||||
chunkCount = int64(keyRange)
|
||||
} else {
|
||||
chunkCount = int64(parallelism) * 4
|
||||
chunkSize = (int64(keyRange) + chunkCount - 1) / chunkCount
|
||||
}
|
||||
|
||||
if chunkCount*chunkSize < int64(keyRange) {
|
||||
// it's fine to panic to indicate an implementation error
|
||||
panic(fmt.Sprintf("invariant check failed: chunkCount * chunkSize < keyRange, %d * %d < %d", chunkCount, chunkSize, keyRange))
|
||||
}
|
||||
|
||||
chunks := make([]*roaringArray64, chunkCount)
|
||||
|
||||
chunkSpecChan := make(chan parChunkSpec, minOfInt(maxOfInt(64, 2*parallelism), int(chunkCount)))
|
||||
chunkChan := make(chan parChunk, minOfInt(32, int(chunkCount)))
|
||||
|
||||
orFunc := func() {
|
||||
for spec := range chunkSpecChan {
|
||||
ra := orOnRange(&bitmaps[0].highlowcontainer, &bitmaps[1].highlowcontainer, spec.start, spec.end)
|
||||
for _, b := range bitmaps[2:] {
|
||||
ra = iorOnRange(ra, &b.highlowcontainer, spec.start, spec.end)
|
||||
}
|
||||
|
||||
chunkChan <- parChunk{ra, spec.idx}
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < parallelism; i++ {
|
||||
go orFunc()
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i := int64(0); i < chunkCount; i++ {
|
||||
spec := parChunkSpec{
|
||||
start: uint32(int64(lKey) + i*chunkSize),
|
||||
end: uint32(minOfInt64(int64(lKey)+(i+1)*chunkSize-1, int64(hKey))),
|
||||
idx: int(i),
|
||||
}
|
||||
chunkSpecChan <- spec
|
||||
}
|
||||
}()
|
||||
|
||||
chunksRemaining := chunkCount
|
||||
for chunk := range chunkChan {
|
||||
chunks[chunk.idx] = chunk.ra
|
||||
chunksRemaining--
|
||||
if chunksRemaining == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
close(chunkChan)
|
||||
close(chunkSpecChan)
|
||||
|
||||
containerCount := 0
|
||||
for _, chunk := range chunks {
|
||||
containerCount += chunk.size()
|
||||
}
|
||||
|
||||
result := Bitmap{
|
||||
roaringArray64{
|
||||
containers: make([]*roaring.Bitmap, containerCount),
|
||||
keys: make([]uint32, containerCount),
|
||||
needCopyOnWrite: make([]bool, containerCount),
|
||||
},
|
||||
}
|
||||
|
||||
resultOffset := 0
|
||||
for _, chunk := range chunks {
|
||||
copy(result.highlowcontainer.containers[resultOffset:], chunk.containers)
|
||||
copy(result.highlowcontainer.keys[resultOffset:], chunk.keys)
|
||||
copy(result.highlowcontainer.needCopyOnWrite[resultOffset:], chunk.needCopyOnWrite)
|
||||
resultOffset += chunk.size()
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
type parChunkSpec struct {
|
||||
start uint32
|
||||
end uint32
|
||||
idx int
|
||||
}
|
||||
|
||||
type parChunk struct {
|
||||
ra *roaringArray64
|
||||
idx int
|
||||
}
|
||||
|
||||
func (c parChunk) size() int {
|
||||
return c.ra.size()
|
||||
}
|
||||
|
||||
// parNaiveStartAt returns the index of the first key that is inclusive between start and last
|
||||
// Returns the size if there is no such key
|
||||
func parNaiveStartAt(ra *roaringArray64, start uint32, last uint32) int {
|
||||
for idx, key := range ra.keys {
|
||||
if key >= start && key <= last {
|
||||
return idx
|
||||
} else if key > last {
|
||||
break
|
||||
}
|
||||
}
|
||||
return ra.size()
|
||||
}
|
||||
|
||||
func orOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
|
||||
answer := &roaringArray64{}
|
||||
length1 := ra1.size()
|
||||
length2 := ra2.size()
|
||||
|
||||
idx1 := parNaiveStartAt(ra1, start, last)
|
||||
idx2 := parNaiveStartAt(ra2, start, last)
|
||||
|
||||
var key1 uint32
|
||||
var key2 uint32
|
||||
if idx1 < length1 && idx2 < length2 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
|
||||
for key1 <= last && key2 <= last {
|
||||
if key1 < key2 {
|
||||
answer.appendCopy(*ra1, idx1)
|
||||
idx1++
|
||||
if idx1 == length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
} else if key1 > key2 {
|
||||
answer.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 == length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
} else {
|
||||
c1 := ra1.getContainerAtIndex(idx1)
|
||||
|
||||
// answer.appendContainer(key1, c1.lazyOR(ra2.getContainerAtIndex(idx2)), false)
|
||||
answer.appendContainer(key1, roaring.Or(c1, ra2.getContainerAtIndex(idx2)), false)
|
||||
idx1++
|
||||
idx2++
|
||||
if idx1 == length1 || idx2 == length2 {
|
||||
break
|
||||
}
|
||||
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if idx2 < length2 {
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
for key2 <= last {
|
||||
answer.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 == length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
|
||||
if idx1 < length1 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
for key1 <= last {
|
||||
answer.appendCopy(*ra1, idx1)
|
||||
idx1++
|
||||
if idx1 == length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
}
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func iorOnRange(ra1, ra2 *roaringArray64, start, last uint32) *roaringArray64 {
|
||||
length1 := ra1.size()
|
||||
length2 := ra2.size()
|
||||
|
||||
idx1 := 0
|
||||
idx2 := parNaiveStartAt(ra2, start, last)
|
||||
|
||||
var key1 uint32
|
||||
var key2 uint32
|
||||
if idx1 < length1 && idx2 < length2 {
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
|
||||
for key1 <= last && key2 <= last {
|
||||
if key1 < key2 {
|
||||
idx1++
|
||||
if idx1 >= length1 {
|
||||
break
|
||||
}
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
} else if key1 > key2 {
|
||||
ra1.insertNewKeyValueAt(idx1, key2, ra2.getContainerAtIndex(idx2))
|
||||
ra1.needCopyOnWrite[idx1] = true
|
||||
idx2++
|
||||
idx1++
|
||||
length1++
|
||||
if idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
} else {
|
||||
c1 := ra1.getWritableContainerAtIndex(idx1)
|
||||
|
||||
// ra1.containers[idx1] = c1.lazyIOR(ra2.getContainerAtIndex(idx2))
|
||||
c1.Or(ra2.getContainerAtIndex(idx2))
|
||||
ra1.setContainerAtIndex(idx1, c1)
|
||||
|
||||
ra1.needCopyOnWrite[idx1] = false
|
||||
idx1++
|
||||
idx2++
|
||||
if idx1 >= length1 || idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
|
||||
key1 = ra1.getKeyAtIndex(idx1)
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
}
|
||||
if idx2 < length2 {
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
for key2 <= last {
|
||||
ra1.appendCopy(*ra2, idx2)
|
||||
idx2++
|
||||
if idx2 >= length2 {
|
||||
break
|
||||
}
|
||||
key2 = ra2.getKeyAtIndex(idx2)
|
||||
}
|
||||
}
|
||||
return ra1
|
||||
}
|
||||
1257
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/roaring64.go
generated
vendored
Normal file
1257
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/roaring64.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
462
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/roaringarray64.go
generated
vendored
Normal file
462
vendor/github.com/RoaringBitmap/roaring/v2/roaring64/roaringarray64.go
generated
vendored
Normal file
|
|
@ -0,0 +1,462 @@
|
|||
package roaring64
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2"
|
||||
)
|
||||
|
||||
type roaringArray64 struct {
|
||||
keys []uint32
|
||||
containers []*roaring.Bitmap
|
||||
needCopyOnWrite []bool
|
||||
copyOnWrite bool
|
||||
}
|
||||
|
||||
var (
|
||||
ErrKeySortOrder = errors.New("keys were out of order")
|
||||
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
|
||||
)
|
||||
|
||||
// runOptimize compresses the element containers to minimize space consumed.
|
||||
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
|
||||
// A: since we aren't changing the logical content, just the representation,
|
||||
//
|
||||
// we don't bother to check the needCopyOnWrite bits. We replace
|
||||
// (possibly all) elements of ra.containers in-place with space
|
||||
// optimized versions.
|
||||
func (ra *roaringArray64) runOptimize() {
|
||||
for i := range ra.containers {
|
||||
ra.containers[i].RunOptimize()
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) appendContainer(key uint32, value *roaring.Bitmap, mustCopyOnWrite bool) {
|
||||
ra.keys = append(ra.keys, key)
|
||||
ra.containers = append(ra.containers, value)
|
||||
ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) appendWithoutCopy(sa roaringArray64, startingindex int) {
|
||||
mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) appendCopy(sa roaringArray64, startingindex int) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
|
||||
if !copyonwrite {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].Clone(), copyonwrite)
|
||||
} else {
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].Clone(), copyonwrite)
|
||||
if !sa.needsCopyOnWrite(startingindex) {
|
||||
sa.setNeedsCopyOnWrite(startingindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) appendWithoutCopyMany(sa roaringArray64, startingindex, end int) {
|
||||
for i := startingindex; i < end; i++ {
|
||||
ra.appendWithoutCopy(sa, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) appendCopyMany(sa roaringArray64, startingindex, end int) {
|
||||
for i := startingindex; i < end; i++ {
|
||||
ra.appendCopy(sa, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) appendCopiesUntil(sa roaringArray64, stoppingKey uint32) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||
|
||||
for i := 0; i < sa.size(); i++ {
|
||||
if sa.keys[i] >= stoppingKey {
|
||||
break
|
||||
}
|
||||
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||
if thiscopyonewrite {
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||
if !sa.needsCopyOnWrite(i) {
|
||||
sa.setNeedsCopyOnWrite(i)
|
||||
}
|
||||
|
||||
} else {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i].Clone(), thiscopyonewrite)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) appendCopiesAfter(sa roaringArray64, beforeStart uint32) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||
|
||||
startLocation := sa.getIndex(beforeStart)
|
||||
if startLocation >= 0 {
|
||||
startLocation++
|
||||
} else {
|
||||
startLocation = -startLocation - 1
|
||||
}
|
||||
|
||||
for i := startLocation; i < sa.size(); i++ {
|
||||
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||
if thiscopyonewrite {
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||
if !sa.needsCopyOnWrite(i) {
|
||||
sa.setNeedsCopyOnWrite(i)
|
||||
}
|
||||
} else {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i].Clone(), thiscopyonewrite)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) removeIndexRange(begin, end int) {
|
||||
if end <= begin {
|
||||
return
|
||||
}
|
||||
|
||||
r := end - begin
|
||||
|
||||
copy(ra.keys[begin:], ra.keys[end:])
|
||||
copy(ra.containers[begin:], ra.containers[end:])
|
||||
copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
|
||||
|
||||
ra.resize(len(ra.keys) - r)
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) resize(newsize int) {
|
||||
for k := newsize; k < len(ra.containers); k++ {
|
||||
ra.keys[k] = 0
|
||||
ra.needCopyOnWrite[k] = false
|
||||
ra.containers[k] = nil
|
||||
}
|
||||
|
||||
ra.keys = ra.keys[:newsize]
|
||||
ra.containers = ra.containers[:newsize]
|
||||
ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) clear() {
|
||||
ra.resize(0)
|
||||
ra.copyOnWrite = false
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) clone() *roaringArray64 {
|
||||
sa := roaringArray64{}
|
||||
sa.copyOnWrite = ra.copyOnWrite
|
||||
|
||||
// this is where copyOnWrite is used.
|
||||
if ra.copyOnWrite {
|
||||
sa.keys = make([]uint32, len(ra.keys))
|
||||
copy(sa.keys, ra.keys)
|
||||
sa.containers = make([]*roaring.Bitmap, len(ra.containers))
|
||||
copy(sa.containers, ra.containers)
|
||||
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||
|
||||
ra.markAllAsNeedingCopyOnWrite()
|
||||
sa.markAllAsNeedingCopyOnWrite()
|
||||
|
||||
// sa.needCopyOnWrite is shared
|
||||
} else {
|
||||
// make a full copy
|
||||
|
||||
sa.keys = make([]uint32, len(ra.keys))
|
||||
copy(sa.keys, ra.keys)
|
||||
|
||||
sa.containers = make([]*roaring.Bitmap, len(ra.containers))
|
||||
for i := range sa.containers {
|
||||
sa.containers[i] = ra.containers[i].Clone()
|
||||
}
|
||||
|
||||
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||
}
|
||||
return &sa
|
||||
}
|
||||
|
||||
// clone all containers which have needCopyOnWrite set to true
|
||||
// This can be used to make sure it is safe to munmap a []byte
|
||||
// that the roaring array may still have a reference to.
|
||||
func (ra *roaringArray64) cloneCopyOnWriteContainers() {
|
||||
for i, needCopyOnWrite := range ra.needCopyOnWrite {
|
||||
if needCopyOnWrite {
|
||||
ra.containers[i] = ra.containers[i].Clone()
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unused function:
|
||||
// func (ra *roaringArray64) containsKey(x uint32) bool {
|
||||
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
|
||||
// }
|
||||
|
||||
func (ra *roaringArray64) getContainer(x uint32) *roaring.Bitmap {
|
||||
i := ra.binarySearch(0, int64(len(ra.keys)), x)
|
||||
if i < 0 {
|
||||
return nil
|
||||
}
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) getContainerAtIndex(i int) *roaring.Bitmap {
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) getWritableContainerAtIndex(i int) *roaring.Bitmap {
|
||||
if ra.needCopyOnWrite[i] {
|
||||
ra.containers[i] = ra.containers[i].Clone()
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) getIndex(x uint32) int {
|
||||
// before the binary search, we optimize for frequent cases
|
||||
size := len(ra.keys)
|
||||
if (size == 0) || (ra.keys[size-1] == x) {
|
||||
return size - 1
|
||||
}
|
||||
return ra.binarySearch(0, int64(size), x)
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) getKeyAtIndex(i int) uint32 {
|
||||
return ra.keys[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) insertNewKeyValueAt(i int, key uint32, value *roaring.Bitmap) {
|
||||
ra.keys = append(ra.keys, 0)
|
||||
ra.containers = append(ra.containers, nil)
|
||||
|
||||
copy(ra.keys[i+1:], ra.keys[i:])
|
||||
copy(ra.containers[i+1:], ra.containers[i:])
|
||||
|
||||
ra.keys[i] = key
|
||||
ra.containers[i] = value
|
||||
|
||||
ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
|
||||
copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) remove(key uint32) bool {
|
||||
i := ra.binarySearch(0, int64(len(ra.keys)), key)
|
||||
if i >= 0 { // if a new key
|
||||
ra.removeAtIndex(i)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) removeAtIndex(i int) {
|
||||
copy(ra.keys[i:], ra.keys[i+1:])
|
||||
copy(ra.containers[i:], ra.containers[i+1:])
|
||||
|
||||
copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
|
||||
|
||||
ra.resize(len(ra.keys) - 1)
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) setContainerAtIndex(i int, c *roaring.Bitmap) {
|
||||
ra.containers[i] = c
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) replaceKeyAndContainerAtIndex(i int, key uint32, c *roaring.Bitmap, mustCopyOnWrite bool) {
|
||||
ra.keys[i] = key
|
||||
ra.containers[i] = c
|
||||
ra.needCopyOnWrite[i] = mustCopyOnWrite
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) size() int {
|
||||
return len(ra.keys)
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) binarySearch(begin, end int64, ikey uint32) int {
|
||||
low := begin
|
||||
high := end - 1
|
||||
for low+16 <= high {
|
||||
middleIndex := low + (high-low)/2 // avoid overflow
|
||||
middleValue := ra.keys[middleIndex]
|
||||
|
||||
if middleValue < ikey {
|
||||
low = middleIndex + 1
|
||||
} else if middleValue > ikey {
|
||||
high = middleIndex - 1
|
||||
} else {
|
||||
return int(middleIndex)
|
||||
}
|
||||
}
|
||||
for ; low <= high; low++ {
|
||||
val := ra.keys[low]
|
||||
if val >= ikey {
|
||||
if val == ikey {
|
||||
return int(low)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
return -int(low + 1)
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) equals(o interface{}) bool {
|
||||
srb, ok := o.(roaringArray64)
|
||||
if ok {
|
||||
|
||||
if srb.size() != ra.size() {
|
||||
return false
|
||||
}
|
||||
for i, k := range ra.keys {
|
||||
if k != srb.keys[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
for i, c := range ra.containers {
|
||||
if !c.Equals(srb.containers[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) hasRunCompression() bool {
|
||||
for _, c := range ra.containers {
|
||||
if c.HasRunCompression() {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the smallest integer index strictly larger than pos such that array[index].key>=min. If none can
|
||||
* be found, return size. Based on code by O. Kaser.
|
||||
*
|
||||
* @param min minimal value
|
||||
* @param pos index to exceed
|
||||
* @return the smallest index greater than pos such that array[index].key is at least as large as
|
||||
* min, or size if it is not possible.
|
||||
*/
|
||||
func (ra *roaringArray64) advanceUntil(min uint32, pos int) int {
|
||||
lower := pos + 1
|
||||
|
||||
if lower >= len(ra.keys) || ra.keys[lower] >= min {
|
||||
return lower
|
||||
}
|
||||
|
||||
spansize := 1
|
||||
|
||||
for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
|
||||
spansize *= 2
|
||||
}
|
||||
var upper int
|
||||
if lower+spansize < len(ra.keys) {
|
||||
upper = lower + spansize
|
||||
} else {
|
||||
upper = len(ra.keys) - 1
|
||||
}
|
||||
|
||||
if ra.keys[upper] == min {
|
||||
return upper
|
||||
}
|
||||
|
||||
if ra.keys[upper] < min {
|
||||
// means
|
||||
// array
|
||||
// has no
|
||||
// item
|
||||
// >= min
|
||||
// pos = array.length;
|
||||
return len(ra.keys)
|
||||
}
|
||||
|
||||
// we know that the next-smallest span was too small
|
||||
lower += (spansize >> 1)
|
||||
|
||||
mid := 0
|
||||
for lower+1 != upper {
|
||||
mid = (lower + upper) >> 1
|
||||
if ra.keys[mid] == min {
|
||||
return mid
|
||||
} else if ra.keys[mid] < min {
|
||||
lower = mid
|
||||
} else {
|
||||
upper = mid
|
||||
}
|
||||
}
|
||||
return upper
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) markAllAsNeedingCopyOnWrite() {
|
||||
for i := range ra.needCopyOnWrite {
|
||||
ra.needCopyOnWrite[i] = true
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) needsCopyOnWrite(i int) bool {
|
||||
return ra.needCopyOnWrite[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) setNeedsCopyOnWrite(i int) {
|
||||
ra.needCopyOnWrite[i] = true
|
||||
}
|
||||
|
||||
// should be dirt cheap
|
||||
func (ra *roaringArray64) serializedSizeInBytes() uint64 {
|
||||
answer := uint64(8)
|
||||
for _, c := range ra.containers {
|
||||
answer += 4
|
||||
answer += c.GetSerializedSizeInBytes()
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func (ra *roaringArray64) checkKeysSorted() bool {
|
||||
if len(ra.keys) == 0 || len(ra.keys) == 1 {
|
||||
return true
|
||||
}
|
||||
previous := ra.keys[0]
|
||||
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
|
||||
next := ra.keys[nextIdx]
|
||||
if previous >= next {
|
||||
return false
|
||||
}
|
||||
previous = next
|
||||
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// validate checks the referential integrity
|
||||
// ensures len(keys) == len(containers), recurses and checks each container type
|
||||
func (ra *roaringArray64) validate() error {
|
||||
if !ra.checkKeysSorted() {
|
||||
return ErrKeySortOrder
|
||||
}
|
||||
|
||||
if len(ra.keys) != len(ra.containers) {
|
||||
return ErrCardinalityConstraint
|
||||
}
|
||||
|
||||
if len(ra.keys) != len(ra.needCopyOnWrite) {
|
||||
return ErrCardinalityConstraint
|
||||
}
|
||||
|
||||
for _, maps := range ra.containers {
|
||||
err := maps.Validate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if maps.IsEmpty() {
|
||||
return errors.New("empty container")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
package roaring64
|
||||
|
||||
import "github.com/RoaringBitmap/roaring/v2"
|
||||
|
||||
func highbits(x uint64) uint32 {
|
||||
return uint32(x >> 32)
|
||||
}
|
||||
|
||||
func lowbits(x uint64) uint32 {
|
||||
return uint32(x & maxLowBit)
|
||||
}
|
||||
|
||||
const maxLowBit = roaring.MaxUint32
|
||||
const maxUint32 = roaring.MaxUint32
|
||||
|
||||
func minOfInt64(a, b int64) int64 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func minOfInt(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxOfInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxOfUint32(a, b uint32) uint32 {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func minOfUint32(a, b uint32) uint32 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
|
@ -0,0 +1,820 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/RoaringBitmap/roaring/v2/internal"
|
||||
)
|
||||
|
||||
type container interface {
|
||||
// addOffset returns the (low, high) parts of the shifted container.
|
||||
// Whenever one of them would be empty, nil will be returned instead to
|
||||
// avoid unnecessary allocations.
|
||||
addOffset(uint16) (container, container)
|
||||
|
||||
clone() container
|
||||
and(container) container
|
||||
andCardinality(container) int
|
||||
iand(container) container // i stands for inplace
|
||||
andNot(container) container
|
||||
iandNot(container) container // i stands for inplace
|
||||
isEmpty() bool
|
||||
getCardinality() int
|
||||
// rank returns the number of integers that are
|
||||
// smaller or equal to x. rank(infinity) would be getCardinality().
|
||||
rank(uint16) int
|
||||
|
||||
iadd(x uint16) bool // inplace, returns true if x was new.
|
||||
iaddReturnMinimized(uint16) container // may change return type to minimize storage.
|
||||
|
||||
iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||
|
||||
iremove(x uint16) bool // inplace, returns true if x was present.
|
||||
iremoveReturnMinimized(uint16) container // may change return type to minimize storage.
|
||||
|
||||
not(start, final int) container // range is [firstOfRange,lastOfRange)
|
||||
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
|
||||
xor(r container) container
|
||||
getShortIterator() shortPeekable
|
||||
iterate(cb func(x uint16) bool) bool
|
||||
getReverseIterator() shortIterable
|
||||
getManyIterator() manyIterable
|
||||
contains(i uint16) bool
|
||||
maximum() uint16
|
||||
minimum() uint16
|
||||
|
||||
// equals is now logical equals; it does not require the
|
||||
// same underlying container types, but compares across
|
||||
// any of the implementations.
|
||||
equals(r container) bool
|
||||
|
||||
fillLeastSignificant16bits(array []uint32, i int, mask uint32) int
|
||||
or(r container) container
|
||||
orCardinality(r container) int
|
||||
isFull() bool
|
||||
ior(r container) container // i stands for inplace
|
||||
intersects(r container) bool // whether the two containers intersect
|
||||
lazyOR(r container) container
|
||||
lazyIOR(r container) container
|
||||
getSizeInBytes() int
|
||||
iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
|
||||
selectInt(x uint16) int // selectInt returns the xth integer in the container
|
||||
serializedSizeInBytes() int
|
||||
writeTo(io.Writer) (int, error)
|
||||
|
||||
numberOfRuns() int
|
||||
toEfficientContainer() container
|
||||
String() string
|
||||
containerType() contype
|
||||
|
||||
safeMinimum() (uint16, error)
|
||||
safeMaximum() (uint16, error)
|
||||
nextValue(x uint16) int
|
||||
previousValue(x uint16) int
|
||||
nextAbsentValue(x uint16) int
|
||||
previousAbsentValue(x uint16) int
|
||||
validate() error
|
||||
}
|
||||
|
||||
type contype uint8
|
||||
|
||||
const (
|
||||
bitmapContype contype = iota
|
||||
arrayContype
|
||||
run16Contype
|
||||
run32Contype
|
||||
)
|
||||
|
||||
var (
|
||||
ErrKeySortOrder = errors.New("keys were out of order")
|
||||
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
|
||||
)
|
||||
|
||||
// careful: range is [firstOfRange,lastOfRange]
|
||||
func rangeOfOnes(start, last int) container {
|
||||
if start > MaxUint16 {
|
||||
panic("rangeOfOnes called with start > MaxUint16")
|
||||
}
|
||||
if last > MaxUint16 {
|
||||
panic("rangeOfOnes called with last > MaxUint16")
|
||||
}
|
||||
if start < 0 {
|
||||
panic("rangeOfOnes called with start < 0")
|
||||
}
|
||||
if last < 0 {
|
||||
panic("rangeOfOnes called with last < 0")
|
||||
}
|
||||
return newRunContainer16Range(uint16(start), uint16(last))
|
||||
}
|
||||
|
||||
type roaringArray struct {
|
||||
keys []uint16
|
||||
containers []container `msg:"-"` // don't try to serialize directly.
|
||||
needCopyOnWrite []bool
|
||||
copyOnWrite bool
|
||||
}
|
||||
|
||||
func newRoaringArray() *roaringArray {
|
||||
return &roaringArray{}
|
||||
}
|
||||
|
||||
// runOptimize compresses the element containers to minimize space consumed.
|
||||
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
|
||||
// A: since we aren't changing the logical content, just the representation,
|
||||
//
|
||||
// we don't bother to check the needCopyOnWrite bits. We replace
|
||||
// (possibly all) elements of ra.containers in-place with space
|
||||
// optimized versions.
|
||||
func (ra *roaringArray) runOptimize() {
|
||||
for i := range ra.containers {
|
||||
ra.containers[i] = ra.containers[i].toEfficientContainer()
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendContainer(key uint16, value container, mustCopyOnWrite bool) {
|
||||
ra.keys = append(ra.keys, key)
|
||||
ra.containers = append(ra.containers, value)
|
||||
ra.needCopyOnWrite = append(ra.needCopyOnWrite, mustCopyOnWrite)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendWithoutCopy(sa roaringArray, startingindex int) {
|
||||
mustCopyOnWrite := sa.needCopyOnWrite[startingindex]
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], mustCopyOnWrite)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendCopy(sa roaringArray, startingindex int) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := (ra.copyOnWrite && sa.copyOnWrite) || sa.needsCopyOnWrite(startingindex)
|
||||
if !copyonwrite {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex].clone(), copyonwrite)
|
||||
} else {
|
||||
ra.appendContainer(sa.keys[startingindex], sa.containers[startingindex], copyonwrite)
|
||||
if !sa.needsCopyOnWrite(startingindex) {
|
||||
sa.setNeedsCopyOnWrite(startingindex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendWithoutCopyMany(sa roaringArray, startingindex, end int) {
|
||||
for i := startingindex; i < end; i++ {
|
||||
ra.appendWithoutCopy(sa, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendCopyMany(sa roaringArray, startingindex, end int) {
|
||||
for i := startingindex; i < end; i++ {
|
||||
ra.appendCopy(sa, i)
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||
|
||||
for i := 0; i < sa.size(); i++ {
|
||||
if sa.keys[i] >= stoppingKey {
|
||||
break
|
||||
}
|
||||
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||
if thiscopyonewrite {
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||
if !sa.needsCopyOnWrite(i) {
|
||||
sa.setNeedsCopyOnWrite(i)
|
||||
}
|
||||
|
||||
} else {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
|
||||
// cow only if the two request it, or if we already have a lightweight copy
|
||||
copyonwrite := ra.copyOnWrite && sa.copyOnWrite
|
||||
|
||||
startLocation := sa.getIndex(beforeStart)
|
||||
if startLocation >= 0 {
|
||||
startLocation++
|
||||
} else {
|
||||
startLocation = -startLocation - 1
|
||||
}
|
||||
|
||||
for i := startLocation; i < sa.size(); i++ {
|
||||
thiscopyonewrite := copyonwrite || sa.needsCopyOnWrite(i)
|
||||
if thiscopyonewrite {
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i], thiscopyonewrite)
|
||||
if !sa.needsCopyOnWrite(i) {
|
||||
sa.setNeedsCopyOnWrite(i)
|
||||
}
|
||||
} else {
|
||||
// since there is no copy-on-write, we need to clone the container (this is important)
|
||||
ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) removeIndexRange(begin, end int) {
|
||||
if end <= begin {
|
||||
return
|
||||
}
|
||||
|
||||
r := end - begin
|
||||
|
||||
copy(ra.keys[begin:], ra.keys[end:])
|
||||
copy(ra.containers[begin:], ra.containers[end:])
|
||||
copy(ra.needCopyOnWrite[begin:], ra.needCopyOnWrite[end:])
|
||||
|
||||
ra.resize(len(ra.keys) - r)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) resize(newsize int) {
|
||||
for k := newsize; k < len(ra.containers); k++ {
|
||||
ra.containers[k] = nil
|
||||
}
|
||||
|
||||
ra.keys = ra.keys[:newsize]
|
||||
ra.containers = ra.containers[:newsize]
|
||||
ra.needCopyOnWrite = ra.needCopyOnWrite[:newsize]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) clear() {
|
||||
ra.resize(0)
|
||||
ra.copyOnWrite = false
|
||||
}
|
||||
|
||||
func (ra *roaringArray) clone() *roaringArray {
|
||||
sa := roaringArray{}
|
||||
sa.copyOnWrite = ra.copyOnWrite
|
||||
|
||||
// this is where copyOnWrite is used.
|
||||
if ra.copyOnWrite {
|
||||
sa.keys = make([]uint16, len(ra.keys))
|
||||
copy(sa.keys, ra.keys)
|
||||
sa.containers = make([]container, len(ra.containers))
|
||||
copy(sa.containers, ra.containers)
|
||||
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||
|
||||
ra.markAllAsNeedingCopyOnWrite()
|
||||
sa.markAllAsNeedingCopyOnWrite()
|
||||
|
||||
// sa.needCopyOnWrite is shared
|
||||
} else {
|
||||
// make a full copy
|
||||
|
||||
sa.keys = make([]uint16, len(ra.keys))
|
||||
copy(sa.keys, ra.keys)
|
||||
|
||||
sa.containers = make([]container, len(ra.containers))
|
||||
for i := range sa.containers {
|
||||
sa.containers[i] = ra.containers[i].clone()
|
||||
}
|
||||
|
||||
sa.needCopyOnWrite = make([]bool, len(ra.needCopyOnWrite))
|
||||
}
|
||||
return &sa
|
||||
}
|
||||
|
||||
// clone all containers which have needCopyOnWrite set to true
|
||||
// This can be used to make sure it is safe to munmap a []byte
|
||||
// that the roaring array may still have a reference to.
|
||||
func (ra *roaringArray) cloneCopyOnWriteContainers() {
|
||||
for i, needCopyOnWrite := range ra.needCopyOnWrite {
|
||||
if needCopyOnWrite {
|
||||
ra.containers[i] = ra.containers[i].clone()
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unused function:
|
||||
//func (ra *roaringArray) containsKey(x uint16) bool {
|
||||
// return (ra.binarySearch(0, int64(len(ra.keys)), x) >= 0)
|
||||
//}
|
||||
|
||||
// getContainer returns the container with key `x`
|
||||
// if no such container exists `nil` is returned
|
||||
func (ra *roaringArray) getContainer(x uint16) container {
|
||||
i := ra.binarySearch(0, int64(len(ra.keys)), x)
|
||||
if i < 0 {
|
||||
return nil
|
||||
}
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getContainerAtIndex(i int) container {
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getFastContainerAtIndex(i int, needsWriteable bool) container {
|
||||
c := ra.getContainerAtIndex(i)
|
||||
switch t := c.(type) {
|
||||
case *arrayContainer:
|
||||
c = t.toBitmapContainer()
|
||||
case *runContainer16:
|
||||
if !t.isFull() {
|
||||
c = t.toBitmapContainer()
|
||||
}
|
||||
case *bitmapContainer:
|
||||
if needsWriteable && ra.needCopyOnWrite[i] {
|
||||
c = ra.containers[i].clone()
|
||||
}
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// getUnionedWritableContainer switches behavior for in-place Or
|
||||
// depending on whether the container requires a copy on write.
|
||||
// If it does using the non-inplace or() method leads to fewer allocations.
|
||||
func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) container {
|
||||
if ra.needCopyOnWrite[pos] {
|
||||
return ra.getContainerAtIndex(pos).or(other)
|
||||
}
|
||||
return ra.getContainerAtIndex(pos).ior(other)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
|
||||
if ra.needCopyOnWrite[i] {
|
||||
ra.containers[i] = ra.containers[i].clone()
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
return ra.containers[i]
|
||||
}
|
||||
|
||||
// getIndex returns the index of the container with key `x`
|
||||
// if no such container exists a negative value is returned
|
||||
func (ra *roaringArray) getIndex(x uint16) int {
|
||||
// Todo : test
|
||||
// before the binary search, we optimize for frequent cases
|
||||
size := len(ra.keys)
|
||||
if (size == 0) || (ra.keys[size-1] == x) {
|
||||
return size - 1
|
||||
}
|
||||
return ra.binarySearch(0, int64(size), x)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) getKeyAtIndex(i int) uint16 {
|
||||
return ra.keys[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) insertNewKeyValueAt(i int, key uint16, value container) {
|
||||
ra.keys = append(ra.keys, 0)
|
||||
ra.containers = append(ra.containers, nil)
|
||||
|
||||
copy(ra.keys[i+1:], ra.keys[i:])
|
||||
copy(ra.containers[i+1:], ra.containers[i:])
|
||||
|
||||
ra.keys[i] = key
|
||||
ra.containers[i] = value
|
||||
|
||||
ra.needCopyOnWrite = append(ra.needCopyOnWrite, false)
|
||||
copy(ra.needCopyOnWrite[i+1:], ra.needCopyOnWrite[i:])
|
||||
ra.needCopyOnWrite[i] = false
|
||||
}
|
||||
|
||||
func (ra *roaringArray) remove(key uint16) bool {
|
||||
i := ra.binarySearch(0, int64(len(ra.keys)), key)
|
||||
if i >= 0 { // if a new key
|
||||
ra.removeAtIndex(i)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ra *roaringArray) removeAtIndex(i int) {
|
||||
copy(ra.keys[i:], ra.keys[i+1:])
|
||||
copy(ra.containers[i:], ra.containers[i+1:])
|
||||
|
||||
copy(ra.needCopyOnWrite[i:], ra.needCopyOnWrite[i+1:])
|
||||
|
||||
ra.resize(len(ra.keys) - 1)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) setContainerAtIndex(i int, c container) {
|
||||
ra.containers[i] = c
|
||||
}
|
||||
|
||||
func (ra *roaringArray) replaceKeyAndContainerAtIndex(i int, key uint16, c container, mustCopyOnWrite bool) {
|
||||
ra.keys[i] = key
|
||||
ra.containers[i] = c
|
||||
ra.needCopyOnWrite[i] = mustCopyOnWrite
|
||||
}
|
||||
|
||||
func (ra *roaringArray) size() int {
|
||||
return len(ra.keys)
|
||||
}
|
||||
|
||||
// binarySearch returns the index of the key.
|
||||
// negative value returned if not found
|
||||
func (ra *roaringArray) binarySearch(begin, end int64, ikey uint16) int {
|
||||
// TODO: add unit tests
|
||||
low := begin
|
||||
high := end - 1
|
||||
for low+16 <= high {
|
||||
middleIndex := low + (high-low)/2 // avoid overflow
|
||||
middleValue := ra.keys[middleIndex]
|
||||
|
||||
if middleValue < ikey {
|
||||
low = middleIndex + 1
|
||||
} else if middleValue > ikey {
|
||||
high = middleIndex - 1
|
||||
} else {
|
||||
return int(middleIndex)
|
||||
}
|
||||
}
|
||||
for ; low <= high; low++ {
|
||||
val := ra.keys[low]
|
||||
if val >= ikey {
|
||||
if val == ikey {
|
||||
return int(low)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
return -int(low + 1)
|
||||
}
|
||||
|
||||
func (ra *roaringArray) equals(o interface{}) bool {
|
||||
srb, ok := o.(roaringArray)
|
||||
if ok {
|
||||
|
||||
if srb.size() != ra.size() {
|
||||
return false
|
||||
}
|
||||
for i, k := range ra.keys {
|
||||
if k != srb.keys[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
for i, c := range ra.containers {
|
||||
if !c.equals(srb.containers[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (ra *roaringArray) headerSize() uint64 {
|
||||
size := uint64(len(ra.keys))
|
||||
if ra.hasRunCompression() {
|
||||
if size < noOffsetThreshold { // for small bitmaps, we omit the offsets
|
||||
return 4 + (size+7)/8 + 4*size
|
||||
}
|
||||
return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
|
||||
}
|
||||
return 4 + 4 + 8*size
|
||||
}
|
||||
|
||||
// should be dirt cheap
|
||||
func (ra *roaringArray) serializedSizeInBytes() uint64 {
|
||||
answer := ra.headerSize()
|
||||
for _, c := range ra.containers {
|
||||
answer += uint64(c.serializedSizeInBytes())
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
|
||||
hasRun := ra.hasRunCompression()
|
||||
isRunSizeInBytes := 0
|
||||
cookieSize := 8
|
||||
if hasRun {
|
||||
cookieSize = 4
|
||||
isRunSizeInBytes = (len(ra.keys) + 7) / 8
|
||||
}
|
||||
descriptiveHeaderSize := 4 * len(ra.keys)
|
||||
preambleSize := cookieSize + isRunSizeInBytes + descriptiveHeaderSize
|
||||
|
||||
buf := make([]byte, preambleSize+4*len(ra.keys))
|
||||
|
||||
nw := 0
|
||||
|
||||
if hasRun {
|
||||
binary.LittleEndian.PutUint16(buf[0:], uint16(serialCookie))
|
||||
nw += 2
|
||||
binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
|
||||
nw += 2
|
||||
// compute isRun bitmap without temporary allocation
|
||||
runbitmapslice := buf[nw : nw+isRunSizeInBytes]
|
||||
for i, c := range ra.containers {
|
||||
switch c.(type) {
|
||||
case *runContainer16:
|
||||
runbitmapslice[i/8] |= 1 << (uint(i) % 8)
|
||||
}
|
||||
}
|
||||
nw += isRunSizeInBytes
|
||||
} else {
|
||||
binary.LittleEndian.PutUint32(buf[0:], uint32(serialCookieNoRunContainer))
|
||||
nw += 4
|
||||
binary.LittleEndian.PutUint32(buf[4:], uint32(len(ra.keys)))
|
||||
nw += 4
|
||||
}
|
||||
|
||||
// descriptive header
|
||||
for i, key := range ra.keys {
|
||||
binary.LittleEndian.PutUint16(buf[nw:], key)
|
||||
nw += 2
|
||||
c := ra.containers[i]
|
||||
binary.LittleEndian.PutUint16(buf[nw:], uint16(c.getCardinality()-1))
|
||||
nw += 2
|
||||
}
|
||||
|
||||
startOffset := int64(preambleSize + 4*len(ra.keys))
|
||||
if !hasRun || (len(ra.keys) >= noOffsetThreshold) {
|
||||
// offset header
|
||||
for _, c := range ra.containers {
|
||||
binary.LittleEndian.PutUint32(buf[nw:], uint32(startOffset))
|
||||
nw += 4
|
||||
switch rc := c.(type) {
|
||||
case *runContainer16:
|
||||
startOffset += 2 + int64(len(rc.iv))*4
|
||||
default:
|
||||
startOffset += int64(getSizeInBytesFromCardinality(c.getCardinality()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
written, err := w.Write(buf[:nw])
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
n += int64(written)
|
||||
|
||||
for _, c := range ra.containers {
|
||||
written, err := c.writeTo(w)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
n += int64(written)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
func (ra *roaringArray) toBytes() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
_, err := ra.writeTo(&buf)
|
||||
return buf.Bytes(), err
|
||||
}
|
||||
|
||||
// Reads a serialized roaringArray from a byte slice.
|
||||
func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) {
|
||||
var cookie uint32
|
||||
var err error
|
||||
if len(cookieHeader) > 0 && len(cookieHeader) != 4 {
|
||||
return int64(len(cookieHeader)), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: incorrect size of cookie header")
|
||||
}
|
||||
if len(cookieHeader) == 4 {
|
||||
cookie = binary.LittleEndian.Uint32(cookieHeader)
|
||||
} else {
|
||||
cookie, err = stream.ReadUInt32()
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
|
||||
}
|
||||
}
|
||||
// If NextReturnsSafeSlice is false, then willNeedCopyOnWrite should be true
|
||||
willNeedCopyOnWrite := !stream.NextReturnsSafeSlice()
|
||||
|
||||
var size uint32
|
||||
var isRunBitmap []byte
|
||||
|
||||
if cookie&0x0000FFFF == serialCookie {
|
||||
size = uint32(cookie>>16 + 1)
|
||||
// create is-run-container bitmap
|
||||
isRunBitmapSize := (int(size) + 7) / 8
|
||||
isRunBitmap, err = stream.Next(isRunBitmapSize)
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
|
||||
}
|
||||
} else if cookie == serialCookieNoRunContainer {
|
||||
size, err = stream.ReadUInt32()
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
|
||||
}
|
||||
} else {
|
||||
return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: did not find expected serialCookie in header")
|
||||
}
|
||||
|
||||
if size > (1 << 16) {
|
||||
return stream.GetReadBytes(), fmt.Errorf("it is logically impossible to have more than (1<<16) containers")
|
||||
}
|
||||
|
||||
// descriptive header
|
||||
buf, err := stream.Next(2 * 2 * int(size))
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
|
||||
}
|
||||
|
||||
keycard := byteSliceAsUint16Slice(buf)
|
||||
|
||||
if isRunBitmap == nil || size >= noOffsetThreshold {
|
||||
if err := stream.SkipBytes(int(size) * 4); err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to skip bytes: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate slices upfront as number of containers is known
|
||||
if cap(ra.containers) >= int(size) {
|
||||
ra.containers = ra.containers[:size]
|
||||
} else {
|
||||
ra.containers = make([]container, size)
|
||||
}
|
||||
|
||||
if cap(ra.keys) >= int(size) {
|
||||
ra.keys = ra.keys[:size]
|
||||
} else {
|
||||
ra.keys = make([]uint16, size)
|
||||
}
|
||||
|
||||
if cap(ra.needCopyOnWrite) >= int(size) {
|
||||
ra.needCopyOnWrite = ra.needCopyOnWrite[:size]
|
||||
} else {
|
||||
ra.needCopyOnWrite = make([]bool, size)
|
||||
}
|
||||
|
||||
for i := uint32(0); i < size; i++ {
|
||||
key := keycard[2*i]
|
||||
card := int(keycard[2*i+1]) + 1
|
||||
ra.keys[i] = key
|
||||
ra.needCopyOnWrite[i] = willNeedCopyOnWrite
|
||||
|
||||
if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
|
||||
// run container
|
||||
nr, err := stream.ReadUInt16()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to read runtime container size: %s", err)
|
||||
}
|
||||
|
||||
buf, err := stream.Next(int(nr) * 4)
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
|
||||
}
|
||||
|
||||
nb := runContainer16{
|
||||
iv: byteSliceAsInterval16Slice(buf),
|
||||
}
|
||||
|
||||
ra.containers[i] = &nb
|
||||
} else if card > arrayDefaultMaxSize {
|
||||
// bitmap container
|
||||
buf, err := stream.Next(arrayDefaultMaxSize * 2)
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
|
||||
}
|
||||
|
||||
nb := bitmapContainer{
|
||||
cardinality: card,
|
||||
bitmap: byteSliceAsUint64Slice(buf),
|
||||
}
|
||||
|
||||
ra.containers[i] = &nb
|
||||
} else {
|
||||
// array container
|
||||
buf, err := stream.Next(card * 2)
|
||||
if err != nil {
|
||||
return stream.GetReadBytes(), fmt.Errorf("failed to read array container: %s", err)
|
||||
}
|
||||
|
||||
nb := arrayContainer{
|
||||
byteSliceAsUint16Slice(buf),
|
||||
}
|
||||
|
||||
ra.containers[i] = &nb
|
||||
}
|
||||
}
|
||||
|
||||
return stream.GetReadBytes(), nil
|
||||
}
|
||||
|
||||
func (ra *roaringArray) hasRunCompression() bool {
|
||||
for _, c := range ra.containers {
|
||||
switch c.(type) {
|
||||
case *runContainer16:
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the smallest integer index larger than pos such that array[index].key>=min. If none can
|
||||
* be found, return size. Based on code by O. Kaser.
|
||||
*
|
||||
* @param min minimal value
|
||||
* @param pos index to exceed
|
||||
* @return the smallest index greater than pos such that array[index].key is at least as large as
|
||||
* min, or size if it is not possible.
|
||||
*/
|
||||
func (ra *roaringArray) advanceUntil(min uint16, pos int) int {
|
||||
lower := pos + 1
|
||||
|
||||
if lower >= len(ra.keys) || ra.keys[lower] >= min {
|
||||
return lower
|
||||
}
|
||||
|
||||
spansize := 1
|
||||
|
||||
for lower+spansize < len(ra.keys) && ra.keys[lower+spansize] < min {
|
||||
spansize *= 2
|
||||
}
|
||||
var upper int
|
||||
if lower+spansize < len(ra.keys) {
|
||||
upper = lower + spansize
|
||||
} else {
|
||||
upper = len(ra.keys) - 1
|
||||
}
|
||||
|
||||
if ra.keys[upper] == min {
|
||||
return upper
|
||||
}
|
||||
|
||||
if ra.keys[upper] < min {
|
||||
// means
|
||||
// array
|
||||
// has no
|
||||
// item
|
||||
// >= min
|
||||
// pos = array.length;
|
||||
return len(ra.keys)
|
||||
}
|
||||
|
||||
// we know that the next-smallest span was too small
|
||||
lower += (spansize >> 1)
|
||||
|
||||
mid := 0
|
||||
for lower+1 != upper {
|
||||
mid = (lower + upper) >> 1
|
||||
if ra.keys[mid] == min {
|
||||
return mid
|
||||
} else if ra.keys[mid] < min {
|
||||
lower = mid
|
||||
} else {
|
||||
upper = mid
|
||||
}
|
||||
}
|
||||
return upper
|
||||
}
|
||||
|
||||
func (ra *roaringArray) markAllAsNeedingCopyOnWrite() {
|
||||
for i := range ra.needCopyOnWrite {
|
||||
ra.needCopyOnWrite[i] = true
|
||||
}
|
||||
}
|
||||
|
||||
func (ra *roaringArray) needsCopyOnWrite(i int) bool {
|
||||
return ra.needCopyOnWrite[i]
|
||||
}
|
||||
|
||||
func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
|
||||
ra.needCopyOnWrite[i] = true
|
||||
}
|
||||
|
||||
func (ra *roaringArray) checkKeysSorted() bool {
|
||||
if len(ra.keys) == 0 || len(ra.keys) == 1 {
|
||||
return true
|
||||
}
|
||||
previous := ra.keys[0]
|
||||
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
|
||||
next := ra.keys[nextIdx]
|
||||
if previous >= next {
|
||||
return false
|
||||
}
|
||||
previous = next
|
||||
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// validate checks the referential integrity
|
||||
// ensures len(keys) == len(containers), recurses and checks each container type
|
||||
func (ra *roaringArray) validate() error {
|
||||
if !ra.checkKeysSorted() {
|
||||
return ErrKeySortOrder
|
||||
}
|
||||
|
||||
if len(ra.keys) != len(ra.containers) {
|
||||
return ErrCardinalityConstraint
|
||||
}
|
||||
|
||||
if len(ra.keys) != len(ra.needCopyOnWrite) {
|
||||
return ErrCardinalityConstraint
|
||||
}
|
||||
|
||||
for _, container := range ra.containers {
|
||||
err := container.validate()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,18 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io"
|
||||
)
|
||||
|
||||
// writeTo for runContainer16 follows this
|
||||
// spec: https://github.com/RoaringBitmap/RoaringFormatSpec
|
||||
func (b *runContainer16) writeTo(stream io.Writer) (int, error) {
|
||||
buf := make([]byte, 2+4*len(b.iv))
|
||||
binary.LittleEndian.PutUint16(buf[0:], uint16(len(b.iv)))
|
||||
for i, v := range b.iv {
|
||||
binary.LittleEndian.PutUint16(buf[2+i*4:], v.start)
|
||||
binary.LittleEndian.PutUint16(buf[2+2+i*4:], v.length)
|
||||
}
|
||||
return stream.Write(buf)
|
||||
}
|
||||
145
vendor/github.com/RoaringBitmap/roaring/v2/serialization_generic.go
generated
vendored
Normal file
145
vendor/github.com/RoaringBitmap/roaring/v2/serialization_generic.go
generated
vendored
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
//go:build (!amd64 && !386 && !arm && !arm64 && !ppc64le && !mipsle && !mips64le && !mips64p32le && !wasm) || appengine
|
||||
// +build !amd64,!386,!arm,!arm64,!ppc64le,!mipsle,!mips64le,!mips64p32le,!wasm appengine
|
||||
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
)
|
||||
|
||||
func (b *arrayContainer) writeTo(stream io.Writer) (int, error) {
|
||||
buf := make([]byte, 2*len(b.content))
|
||||
for i, v := range b.content {
|
||||
base := i * 2
|
||||
buf[base] = byte(v)
|
||||
buf[base+1] = byte(v >> 8)
|
||||
}
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func (b *arrayContainer) readFrom(stream io.Reader) (int, error) {
|
||||
err := binary.Read(stream, binary.LittleEndian, b.content)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return 2 * len(b.content), nil
|
||||
}
|
||||
|
||||
func (b *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||
if b.cardinality <= arrayDefaultMaxSize {
|
||||
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
|
||||
}
|
||||
|
||||
// Write set
|
||||
buf := make([]byte, 8*len(b.bitmap))
|
||||
for i, v := range b.bitmap {
|
||||
base := i * 8
|
||||
buf[base] = byte(v)
|
||||
buf[base+1] = byte(v >> 8)
|
||||
buf[base+2] = byte(v >> 16)
|
||||
buf[base+3] = byte(v >> 24)
|
||||
buf[base+4] = byte(v >> 32)
|
||||
buf[base+5] = byte(v >> 40)
|
||||
buf[base+6] = byte(v >> 48)
|
||||
buf[base+7] = byte(v >> 56)
|
||||
}
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func (b *bitmapContainer) readFrom(stream io.Reader) (int, error) {
|
||||
err := binary.Read(stream, binary.LittleEndian, b.bitmap)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b.computeCardinality()
|
||||
return 8 * len(b.bitmap), nil
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||
by := make([]byte, len(bc.bitmap)*8)
|
||||
for i := range bc.bitmap {
|
||||
binary.LittleEndian.PutUint64(by[i*8:], bc.bitmap[i])
|
||||
}
|
||||
return by
|
||||
}
|
||||
|
||||
func uint64SliceAsByteSlice(slice []uint64) []byte {
|
||||
by := make([]byte, len(slice)*8)
|
||||
|
||||
for i, v := range slice {
|
||||
binary.LittleEndian.PutUint64(by[i*8:], v)
|
||||
}
|
||||
|
||||
return by
|
||||
}
|
||||
|
||||
func uint16SliceAsByteSlice(slice []uint16) []byte {
|
||||
by := make([]byte, len(slice)*2)
|
||||
|
||||
for i, v := range slice {
|
||||
binary.LittleEndian.PutUint16(by[i*2:], v)
|
||||
}
|
||||
|
||||
return by
|
||||
}
|
||||
|
||||
func interval16SliceAsByteSlice(slice []interval16) []byte {
|
||||
by := make([]byte, len(slice)*4)
|
||||
|
||||
for i, v := range slice {
|
||||
binary.LittleEndian.PutUint16(by[i*2:], v.start)
|
||||
binary.LittleEndian.PutUint16(by[i*2+2:], v.length)
|
||||
}
|
||||
|
||||
return by
|
||||
}
|
||||
|
||||
func byteSliceAsUint16Slice(slice []byte) []uint16 {
|
||||
if len(slice)%2 != 0 {
|
||||
panic("Slice size should be divisible by 2")
|
||||
}
|
||||
|
||||
b := make([]uint16, len(slice)/2)
|
||||
|
||||
for i := range b {
|
||||
b[i] = binary.LittleEndian.Uint16(slice[2*i:])
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
func byteSliceAsUint64Slice(slice []byte) []uint64 {
|
||||
if len(slice)%8 != 0 {
|
||||
panic("Slice size should be divisible by 8")
|
||||
}
|
||||
|
||||
b := make([]uint64, len(slice)/8)
|
||||
|
||||
for i := range b {
|
||||
b[i] = binary.LittleEndian.Uint64(slice[8*i:])
|
||||
}
|
||||
|
||||
return b
|
||||
}
|
||||
|
||||
// Converts a byte slice to a interval16 slice.
|
||||
// The function assumes that the slice byte buffer is run container data
|
||||
// encoded according to Roaring Format Spec
|
||||
func byteSliceAsInterval16Slice(byteSlice []byte) []interval16 {
|
||||
if len(byteSlice)%4 != 0 {
|
||||
panic("Slice size should be divisible by 4")
|
||||
}
|
||||
|
||||
intervalSlice := make([]interval16, len(byteSlice)/4)
|
||||
|
||||
for i := range intervalSlice {
|
||||
intervalSlice[i] = interval16{
|
||||
start: binary.LittleEndian.Uint16(byteSlice[i*4:]),
|
||||
length: binary.LittleEndian.Uint16(byteSlice[i*4+2:]),
|
||||
}
|
||||
}
|
||||
|
||||
return intervalSlice
|
||||
}
|
||||
671
vendor/github.com/RoaringBitmap/roaring/v2/serialization_littleendian.go
generated
vendored
Normal file
671
vendor/github.com/RoaringBitmap/roaring/v2/serialization_littleendian.go
generated
vendored
Normal file
|
|
@ -0,0 +1,671 @@
|
|||
//go:build (386 && !appengine) || (amd64 && !appengine) || (arm && !appengine) || (arm64 && !appengine) || (ppc64le && !appengine) || (mipsle && !appengine) || (mips64le && !appengine) || (mips64p32le && !appengine) || (wasm && !appengine)
|
||||
// +build 386,!appengine amd64,!appengine arm,!appengine arm64,!appengine ppc64le,!appengine mipsle,!appengine mips64le,!appengine mips64p32le,!appengine wasm,!appengine
|
||||
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func (ac *arrayContainer) writeTo(stream io.Writer) (int, error) {
|
||||
buf := uint16SliceAsByteSlice(ac.content)
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) writeTo(stream io.Writer) (int, error) {
|
||||
if bc.cardinality <= arrayDefaultMaxSize {
|
||||
return 0, errors.New("refusing to write bitmap container with cardinality of array container")
|
||||
}
|
||||
buf := uint64SliceAsByteSlice(bc.bitmap)
|
||||
return stream.Write(buf)
|
||||
}
|
||||
|
||||
func uint64SliceAsByteSlice(slice []uint64) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len *= 8
|
||||
header.Cap *= 8
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||
runtime.KeepAlive(&slice)
|
||||
|
||||
// return it
|
||||
return result
|
||||
}
|
||||
|
||||
func uint16SliceAsByteSlice(slice []uint16) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len *= 2
|
||||
header.Cap *= 2
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||
runtime.KeepAlive(&slice)
|
||||
|
||||
// return it
|
||||
return result
|
||||
}
|
||||
|
||||
func interval16SliceAsByteSlice(slice []interval16) []byte {
|
||||
// make a new slice header
|
||||
header := *(*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
|
||||
// update its capacity and length
|
||||
header.Len *= 4
|
||||
header.Cap *= 4
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
result := *(*[]byte)(unsafe.Pointer(&header))
|
||||
runtime.KeepAlive(&slice)
|
||||
|
||||
// return it
|
||||
return result
|
||||
}
|
||||
|
||||
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
|
||||
return uint64SliceAsByteSlice(bc.bitmap)
|
||||
}
|
||||
|
||||
// Deserialization code follows
|
||||
|
||||
// //
|
||||
// These methods (byteSliceAsUint16Slice,...) do not make copies,
|
||||
// they are pointer-based (unsafe). The caller is responsible to
|
||||
// ensure that the input slice does not get garbage collected, deleted
|
||||
// or modified while you hold the returned slince.
|
||||
// //
|
||||
func byteSliceAsUint16Slice(slice []byte) (result []uint16) { // here we create a new slice holder
|
||||
if len(slice)%2 != 0 {
|
||||
panic("Slice size should be divisible by 2")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 2
|
||||
rHeader.Cap = bHeader.Cap / 2
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsUint64Slice(slice []byte) (result []uint64) {
|
||||
if len(slice)%8 != 0 {
|
||||
panic("Slice size should be divisible by 8")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 8
|
||||
rHeader.Cap = bHeader.Cap / 8
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsInterval16Slice(slice []byte) (result []interval16) {
|
||||
if len(slice)%4 != 0 {
|
||||
panic("Slice size should be divisible by 4")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / 4
|
||||
rHeader.Cap = bHeader.Cap / 4
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsContainerSlice(slice []byte) (result []container) {
|
||||
var c container
|
||||
containerSize := int(unsafe.Sizeof(c))
|
||||
|
||||
if len(slice)%containerSize != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(container)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / containerSize
|
||||
rHeader.Cap = bHeader.Cap / containerSize
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsBitsetSlice(slice []byte) (result []bitmapContainer) {
|
||||
bitsetSize := int(unsafe.Sizeof(bitmapContainer{}))
|
||||
if len(slice)%bitsetSize != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(bitmapContainer)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / bitsetSize
|
||||
rHeader.Cap = bHeader.Cap / bitsetSize
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsArraySlice(slice []byte) (result []arrayContainer) {
|
||||
arraySize := int(unsafe.Sizeof(arrayContainer{}))
|
||||
if len(slice)%arraySize != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(arrayContainer)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / arraySize
|
||||
rHeader.Cap = bHeader.Cap / arraySize
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsRun16Slice(slice []byte) (result []runContainer16) {
|
||||
run16Size := int(unsafe.Sizeof(runContainer16{}))
|
||||
if len(slice)%run16Size != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(runContainer16)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / run16Size
|
||||
rHeader.Cap = bHeader.Cap / run16Size
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
func byteSliceAsBoolSlice(slice []byte) (result []bool) {
|
||||
boolSize := int(unsafe.Sizeof(true))
|
||||
if len(slice)%boolSize != 0 {
|
||||
panic("Slice size should be divisible by unsafe.Sizeof(bool)")
|
||||
}
|
||||
// reference: https://go101.org/article/unsafe.html
|
||||
|
||||
// make a new slice header
|
||||
bHeader := (*reflect.SliceHeader)(unsafe.Pointer(&slice))
|
||||
rHeader := (*reflect.SliceHeader)(unsafe.Pointer(&result))
|
||||
|
||||
// transfer the data from the given slice to a new variable (our result)
|
||||
rHeader.Data = bHeader.Data
|
||||
rHeader.Len = bHeader.Len / boolSize
|
||||
rHeader.Cap = bHeader.Cap / boolSize
|
||||
|
||||
// instantiate result and use KeepAlive so data isn't unmapped.
|
||||
runtime.KeepAlive(&slice) // it is still crucial, GC can free it)
|
||||
|
||||
// return result
|
||||
return
|
||||
}
|
||||
|
||||
// FrozenView creates a static view of a serialized bitmap stored in buf.
|
||||
// It uses CRoaring's frozen bitmap format.
|
||||
//
|
||||
// The format specification is available here:
|
||||
// https://github.com/RoaringBitmap/CRoaring/blob/2c867e9f9c9e2a3a7032791f94c4c7ae3013f6e0/src/roaring.c#L2756-L2783
|
||||
//
|
||||
// The provided byte array (buf) is expected to be a constant.
|
||||
// The function makes the best effort attempt not to copy data.
|
||||
// Only little endian is supported. The function will err if it detects a big
|
||||
// endian serialized file.
|
||||
// You should take care not to modify buff as it will likely result in
|
||||
// unexpected program behavior.
|
||||
// If said buffer comes from a memory map, it's advisable to give it read
|
||||
// only permissions, either at creation or by calling Mprotect from the
|
||||
// golang.org/x/sys/unix package.
|
||||
//
|
||||
// Resulting bitmaps are effectively immutable in the following sense:
|
||||
// a copy-on-write marker is used so that when you modify the resulting
|
||||
// bitmap, copies of selected data (containers) are made.
|
||||
// You should *not* change the copy-on-write status of the resulting
|
||||
// bitmaps (SetCopyOnWrite).
|
||||
//
|
||||
// If buf becomes unavailable, then a bitmap created with
|
||||
// FromBuffer would be effectively broken. Furthermore, any
|
||||
// bitmap derived from this bitmap (e.g., via Or, And) might
|
||||
// also be broken. Thus, before making buf unavailable, you should
|
||||
// call CloneCopyOnWriteContainers on all such bitmaps.
|
||||
func (rb *Bitmap) FrozenView(buf []byte) error {
|
||||
return rb.highlowcontainer.frozenView(buf)
|
||||
}
|
||||
|
||||
func (rb *Bitmap) MustFrozenView(buf []byte) error {
|
||||
if err := rb.FrozenView(buf); err != nil {
|
||||
return err
|
||||
}
|
||||
err := rb.Validate()
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
/* Verbatim specification from CRoaring.
|
||||
*
|
||||
* FROZEN SERIALIZATION FORMAT DESCRIPTION
|
||||
*
|
||||
* -- (beginning must be aligned by 32 bytes) --
|
||||
* <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
|
||||
* <run_data> rle16_t[total number of rle elements in all run containers]
|
||||
* <array_data> uint16_t[total number of array elements in all array containers]
|
||||
* <keys> uint16_t[num_containers]
|
||||
* <counts> uint16_t[num_containers]
|
||||
* <typecodes> uint8_t[num_containers]
|
||||
* <header> uint32_t
|
||||
*
|
||||
* <header> is a 4-byte value which is a bit union of frozenCookie (15 bits)
|
||||
* and the number of containers (17 bits).
|
||||
*
|
||||
* <counts> stores number of elements for every container.
|
||||
* Its meaning depends on container type.
|
||||
* For array and bitset containers, this value is the container cardinality minus one.
|
||||
* For run container, it is the number of rle_t elements (n_runs).
|
||||
*
|
||||
* <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
|
||||
* all containers of respective type.
|
||||
*
|
||||
* <*_data> and <keys> are kept close together because they are not accessed
|
||||
* during deserilization. This may reduce IO in case of large mmaped bitmaps.
|
||||
* All members have their native alignments during deserilization except <header>,
|
||||
* which is not guaranteed to be aligned by 4 bytes.
|
||||
*/
|
||||
const frozenCookie = 13766
|
||||
|
||||
var (
|
||||
// ErrFrozenBitmapInvalidCookie is returned when the header does not contain the frozenCookie.
|
||||
ErrFrozenBitmapInvalidCookie = errors.New("header does not contain the frozenCookie")
|
||||
// ErrFrozenBitmapBigEndian is returned when the header is big endian.
|
||||
ErrFrozenBitmapBigEndian = errors.New("loading big endian frozen bitmaps is not supported")
|
||||
// ErrFrozenBitmapIncomplete is returned when the buffer is too small to contain a frozen bitmap.
|
||||
ErrFrozenBitmapIncomplete = errors.New("input buffer too small to contain a frozen bitmap")
|
||||
// ErrFrozenBitmapOverpopulated is returned when the number of containers is too large.
|
||||
ErrFrozenBitmapOverpopulated = errors.New("too many containers")
|
||||
// ErrFrozenBitmapUnexpectedData is returned when the buffer contains unexpected data.
|
||||
ErrFrozenBitmapUnexpectedData = errors.New("spurious data in input")
|
||||
// ErrFrozenBitmapInvalidTypecode is returned when the typecode is invalid.
|
||||
ErrFrozenBitmapInvalidTypecode = errors.New("unrecognized typecode")
|
||||
// ErrFrozenBitmapBufferTooSmall is returned when the buffer is too small.
|
||||
ErrFrozenBitmapBufferTooSmall = errors.New("buffer too small")
|
||||
)
|
||||
|
||||
func (ra *roaringArray) frozenView(buf []byte) error {
|
||||
if len(buf) < 4 {
|
||||
return ErrFrozenBitmapIncomplete
|
||||
}
|
||||
|
||||
headerBE := binary.BigEndian.Uint32(buf[len(buf)-4:])
|
||||
if headerBE&0x7fff == frozenCookie {
|
||||
return ErrFrozenBitmapBigEndian
|
||||
}
|
||||
|
||||
header := binary.LittleEndian.Uint32(buf[len(buf)-4:])
|
||||
buf = buf[:len(buf)-4]
|
||||
|
||||
if header&0x7fff != frozenCookie {
|
||||
return ErrFrozenBitmapInvalidCookie
|
||||
}
|
||||
|
||||
nCont := int(header >> 15)
|
||||
if nCont > (1 << 16) {
|
||||
return ErrFrozenBitmapOverpopulated
|
||||
}
|
||||
|
||||
// 1 byte per type, 2 bytes per key, 2 bytes per count.
|
||||
if len(buf) < 5*nCont {
|
||||
return ErrFrozenBitmapIncomplete
|
||||
}
|
||||
|
||||
types := buf[len(buf)-nCont:]
|
||||
buf = buf[:len(buf)-nCont]
|
||||
|
||||
counts := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
|
||||
buf = buf[:len(buf)-2*nCont]
|
||||
|
||||
keys := byteSliceAsUint16Slice(buf[len(buf)-2*nCont:])
|
||||
buf = buf[:len(buf)-2*nCont]
|
||||
|
||||
nBitmap, nArray, nRun := 0, 0, 0
|
||||
nArrayEl, nRunEl := 0, 0
|
||||
for i, t := range types {
|
||||
switch t {
|
||||
case 1:
|
||||
nBitmap++
|
||||
case 2:
|
||||
nArray++
|
||||
nArrayEl += int(counts[i]) + 1
|
||||
case 3:
|
||||
nRun++
|
||||
nRunEl += int(counts[i])
|
||||
default:
|
||||
return ErrFrozenBitmapInvalidTypecode
|
||||
}
|
||||
}
|
||||
|
||||
if len(buf) < (1<<13)*nBitmap+4*nRunEl+2*nArrayEl {
|
||||
return ErrFrozenBitmapIncomplete
|
||||
}
|
||||
|
||||
bitsetsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBitmap])
|
||||
buf = buf[(1<<13)*nBitmap:]
|
||||
|
||||
runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
|
||||
buf = buf[4*nRunEl:]
|
||||
|
||||
arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
|
||||
buf = buf[2*nArrayEl:]
|
||||
|
||||
if len(buf) != 0 {
|
||||
return ErrFrozenBitmapUnexpectedData
|
||||
}
|
||||
|
||||
var c container
|
||||
containersSz := int(unsafe.Sizeof(c)) * nCont
|
||||
bitsetsSz := int(unsafe.Sizeof(bitmapContainer{})) * nBitmap
|
||||
arraysSz := int(unsafe.Sizeof(arrayContainer{})) * nArray
|
||||
runsSz := int(unsafe.Sizeof(runContainer16{})) * nRun
|
||||
needCOWSz := int(unsafe.Sizeof(true)) * nCont
|
||||
|
||||
bitmapArenaSz := containersSz + bitsetsSz + arraysSz + runsSz + needCOWSz
|
||||
bitmapArena := make([]byte, bitmapArenaSz)
|
||||
|
||||
containers := byteSliceAsContainerSlice(bitmapArena[:containersSz])
|
||||
bitmapArena = bitmapArena[containersSz:]
|
||||
|
||||
bitsets := byteSliceAsBitsetSlice(bitmapArena[:bitsetsSz])
|
||||
bitmapArena = bitmapArena[bitsetsSz:]
|
||||
|
||||
arrays := byteSliceAsArraySlice(bitmapArena[:arraysSz])
|
||||
bitmapArena = bitmapArena[arraysSz:]
|
||||
|
||||
runs := byteSliceAsRun16Slice(bitmapArena[:runsSz])
|
||||
bitmapArena = bitmapArena[runsSz:]
|
||||
|
||||
needCOW := byteSliceAsBoolSlice(bitmapArena)
|
||||
|
||||
iBitset, iArray, iRun := 0, 0, 0
|
||||
for i, t := range types {
|
||||
needCOW[i] = true
|
||||
|
||||
switch t {
|
||||
case 1:
|
||||
containers[i] = &bitsets[iBitset]
|
||||
bitsets[iBitset].cardinality = int(counts[i]) + 1
|
||||
bitsets[iBitset].bitmap = bitsetsArena[:1024]
|
||||
bitsetsArena = bitsetsArena[1024:]
|
||||
iBitset++
|
||||
case 2:
|
||||
containers[i] = &arrays[iArray]
|
||||
sz := int(counts[i]) + 1
|
||||
arrays[iArray].content = arraysArena[:sz]
|
||||
arraysArena = arraysArena[sz:]
|
||||
iArray++
|
||||
case 3:
|
||||
containers[i] = &runs[iRun]
|
||||
runs[iRun].iv = runsArena[:counts[i]]
|
||||
runsArena = runsArena[counts[i]:]
|
||||
iRun++
|
||||
}
|
||||
}
|
||||
|
||||
// Not consuming the full input is a bug.
|
||||
if iBitset != nBitmap || len(bitsetsArena) != 0 ||
|
||||
iArray != nArray || len(arraysArena) != 0 ||
|
||||
iRun != nRun || len(runsArena) != 0 {
|
||||
panic("we missed something")
|
||||
}
|
||||
|
||||
ra.keys = keys
|
||||
ra.containers = containers
|
||||
ra.needCopyOnWrite = needCOW
|
||||
ra.copyOnWrite = true
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetFrozenSizeInBytes returns the size in bytes of the frozen bitmap.
|
||||
func (rb *Bitmap) GetFrozenSizeInBytes() uint64 {
|
||||
nBits, nArrayEl, nRunEl := uint64(0), uint64(0), uint64(0)
|
||||
for _, c := range rb.highlowcontainer.containers {
|
||||
switch v := c.(type) {
|
||||
case *bitmapContainer:
|
||||
nBits++
|
||||
case *arrayContainer:
|
||||
nArrayEl += uint64(len(v.content))
|
||||
case *runContainer16:
|
||||
nRunEl += uint64(len(v.iv))
|
||||
}
|
||||
}
|
||||
return 4 + 5*uint64(len(rb.highlowcontainer.containers)) +
|
||||
(nBits << 13) + 2*nArrayEl + 4*nRunEl
|
||||
}
|
||||
|
||||
// Freeze serializes the bitmap in the CRoaring's frozen format.
|
||||
func (rb *Bitmap) Freeze() ([]byte, error) {
|
||||
sz := rb.GetFrozenSizeInBytes()
|
||||
buf := make([]byte, sz)
|
||||
_, err := rb.FreezeTo(buf)
|
||||
return buf, err
|
||||
}
|
||||
|
||||
// FreezeTo serializes the bitmap in the CRoaring's frozen format.
|
||||
func (rb *Bitmap) FreezeTo(buf []byte) (int, error) {
|
||||
containers := rb.highlowcontainer.containers
|
||||
nCont := len(containers)
|
||||
|
||||
nBits, nArrayEl, nRunEl := 0, 0, 0
|
||||
for _, c := range containers {
|
||||
switch v := c.(type) {
|
||||
case *bitmapContainer:
|
||||
nBits++
|
||||
case *arrayContainer:
|
||||
nArrayEl += len(v.content)
|
||||
case *runContainer16:
|
||||
nRunEl += len(v.iv)
|
||||
}
|
||||
}
|
||||
|
||||
serialSize := 4 + 5*nCont + (1<<13)*nBits + 4*nRunEl + 2*nArrayEl
|
||||
if len(buf) < serialSize {
|
||||
return 0, ErrFrozenBitmapBufferTooSmall
|
||||
}
|
||||
|
||||
bitsArena := byteSliceAsUint64Slice(buf[:(1<<13)*nBits])
|
||||
buf = buf[(1<<13)*nBits:]
|
||||
|
||||
runsArena := byteSliceAsInterval16Slice(buf[:4*nRunEl])
|
||||
buf = buf[4*nRunEl:]
|
||||
|
||||
arraysArena := byteSliceAsUint16Slice(buf[:2*nArrayEl])
|
||||
buf = buf[2*nArrayEl:]
|
||||
|
||||
keys := byteSliceAsUint16Slice(buf[:2*nCont])
|
||||
buf = buf[2*nCont:]
|
||||
|
||||
counts := byteSliceAsUint16Slice(buf[:2*nCont])
|
||||
buf = buf[2*nCont:]
|
||||
|
||||
types := buf[:nCont]
|
||||
buf = buf[nCont:]
|
||||
|
||||
header := uint32(frozenCookie | (nCont << 15))
|
||||
binary.LittleEndian.PutUint32(buf[:4], header)
|
||||
|
||||
copy(keys, rb.highlowcontainer.keys[:])
|
||||
|
||||
for i, c := range containers {
|
||||
switch v := c.(type) {
|
||||
case *bitmapContainer:
|
||||
copy(bitsArena, v.bitmap)
|
||||
bitsArena = bitsArena[1024:]
|
||||
counts[i] = uint16(v.cardinality - 1)
|
||||
types[i] = 1
|
||||
case *arrayContainer:
|
||||
copy(arraysArena, v.content)
|
||||
arraysArena = arraysArena[len(v.content):]
|
||||
elems := len(v.content)
|
||||
counts[i] = uint16(elems - 1)
|
||||
types[i] = 2
|
||||
case *runContainer16:
|
||||
copy(runsArena, v.iv)
|
||||
runs := len(v.iv)
|
||||
runsArena = runsArena[runs:]
|
||||
counts[i] = uint16(runs)
|
||||
types[i] = 3
|
||||
}
|
||||
}
|
||||
|
||||
return serialSize, nil
|
||||
}
|
||||
|
||||
// WriteFrozenTo serializes the bitmap in the CRoaring's frozen format.
|
||||
func (rb *Bitmap) WriteFrozenTo(wr io.Writer) (int, error) {
|
||||
// FIXME: this is a naive version that iterates 4 times through the
|
||||
// containers and allocates 3*len(containers) bytes; it's quite likely
|
||||
// it can be done more efficiently.
|
||||
containers := rb.highlowcontainer.containers
|
||||
written := 0
|
||||
|
||||
for _, c := range containers {
|
||||
c, ok := c.(*bitmapContainer)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
n, err := wr.Write(uint64SliceAsByteSlice(c.bitmap))
|
||||
written += n
|
||||
if err != nil {
|
||||
return written, err
|
||||
}
|
||||
}
|
||||
|
||||
for _, c := range containers {
|
||||
c, ok := c.(*runContainer16)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
n, err := wr.Write(interval16SliceAsByteSlice(c.iv))
|
||||
written += n
|
||||
if err != nil {
|
||||
return written, err
|
||||
}
|
||||
}
|
||||
|
||||
for _, c := range containers {
|
||||
c, ok := c.(*arrayContainer)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
n, err := wr.Write(uint16SliceAsByteSlice(c.content))
|
||||
written += n
|
||||
if err != nil {
|
||||
return written, err
|
||||
}
|
||||
}
|
||||
|
||||
n, err := wr.Write(uint16SliceAsByteSlice(rb.highlowcontainer.keys))
|
||||
written += n
|
||||
if err != nil {
|
||||
return written, err
|
||||
}
|
||||
|
||||
countTypeBuf := make([]byte, 3*len(containers))
|
||||
counts := byteSliceAsUint16Slice(countTypeBuf[:2*len(containers)])
|
||||
types := countTypeBuf[2*len(containers):]
|
||||
|
||||
for i, c := range containers {
|
||||
switch c := c.(type) {
|
||||
case *bitmapContainer:
|
||||
counts[i] = uint16(c.cardinality - 1)
|
||||
types[i] = 1
|
||||
case *arrayContainer:
|
||||
elems := len(c.content)
|
||||
counts[i] = uint16(elems - 1)
|
||||
types[i] = 2
|
||||
case *runContainer16:
|
||||
runs := len(c.iv)
|
||||
counts[i] = uint16(runs)
|
||||
types[i] = 3
|
||||
}
|
||||
}
|
||||
|
||||
n, err = wr.Write(countTypeBuf)
|
||||
written += n
|
||||
if err != nil {
|
||||
return written, err
|
||||
}
|
||||
|
||||
header := uint32(frozenCookie | (len(containers) << 15))
|
||||
if err := binary.Write(wr, binary.LittleEndian, header); err != nil {
|
||||
return written, err
|
||||
}
|
||||
written += 4
|
||||
|
||||
return written, nil
|
||||
}
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
|
||||
package roaring
|
||||
|
||||
import "bytes"
|
||||
|
||||
func FuzzSerializationStream(data []byte) int {
|
||||
newrb := NewBitmap()
|
||||
if _, err := newrb.ReadFrom(bytes.NewReader(data)); err != nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
||||
func FuzzSerializationBuffer(data []byte) int {
|
||||
newrb := NewBitmap()
|
||||
if _, err := newrb.FromBuffer(data); err != nil {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
}
|
||||
|
|
@ -0,0 +1,665 @@
|
|||
package roaring
|
||||
|
||||
func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
if len(set2) == 0 {
|
||||
buffer = buffer[:len(set1)]
|
||||
copy(buffer, set1)
|
||||
return len(set1)
|
||||
}
|
||||
if len(set1) == 0 {
|
||||
return 0
|
||||
}
|
||||
pos := 0
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
buffer = buffer[:cap(buffer)]
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
for {
|
||||
if s1 < s2 {
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
if k1 >= len(set1) {
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
} else if s1 == s2 {
|
||||
k1++
|
||||
k2++
|
||||
if k1 >= len(set1) {
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
if k2 >= len(set2) {
|
||||
for ; k1 < len(set1); k1++ {
|
||||
buffer[pos] = set1[k1]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
} else { // if (val1>val2)
|
||||
k2++
|
||||
if k2 >= len(set2) {
|
||||
for ; k1 < len(set1); k1++ {
|
||||
buffer[pos] = set1[k1]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
if 0 == len(set2) {
|
||||
buffer = buffer[:len(set1)]
|
||||
copy(buffer, set1[:])
|
||||
return len(set1)
|
||||
}
|
||||
if 0 == len(set1) {
|
||||
buffer = buffer[:len(set2)]
|
||||
copy(buffer, set2[:])
|
||||
return len(set2)
|
||||
}
|
||||
pos := 0
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
buffer = buffer[:cap(buffer)]
|
||||
for {
|
||||
if s1 < s2 {
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
if k1 >= len(set1) {
|
||||
for ; k2 < len(set2); k2++ {
|
||||
buffer[pos] = set2[k2]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
} else if s1 == s2 {
|
||||
k1++
|
||||
k2++
|
||||
if k1 >= len(set1) {
|
||||
for ; k2 < len(set2); k2++ {
|
||||
buffer[pos] = set2[k2]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
if k2 >= len(set2) {
|
||||
for ; k1 < len(set1); k1++ {
|
||||
buffer[pos] = set1[k1]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
s2 = set2[k2]
|
||||
} else { // if (val1>val2)
|
||||
buffer[pos] = s2
|
||||
pos++
|
||||
k2++
|
||||
if k2 >= len(set2) {
|
||||
for ; k1 < len(set1); k1++ {
|
||||
buffer[pos] = set1[k1]
|
||||
pos++
|
||||
}
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
// union2by2Cardinality computes the cardinality of the union
|
||||
func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
|
||||
pos := 0
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
if 0 == len(set2) {
|
||||
return len(set1)
|
||||
}
|
||||
if 0 == len(set1) {
|
||||
return len(set2)
|
||||
}
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
for {
|
||||
if s1 < s2 {
|
||||
pos++
|
||||
k1++
|
||||
if k1 >= len(set1) {
|
||||
pos += len(set2) - k2
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
} else if s1 == s2 {
|
||||
pos++
|
||||
k1++
|
||||
k2++
|
||||
if k1 >= len(set1) {
|
||||
pos += len(set2) - k2
|
||||
break
|
||||
}
|
||||
if k2 >= len(set2) {
|
||||
pos += len(set1) - k1
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
s2 = set2[k2]
|
||||
} else { // if (set1[k1]>set2[k2])
|
||||
pos++
|
||||
k2++
|
||||
if k2 >= len(set2) {
|
||||
pos += len(set1) - k1
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func intersection2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
buffer []uint16,
|
||||
) int {
|
||||
if len(set1)*64 < len(set2) {
|
||||
return onesidedgallopingintersect2by2(set1, set2, buffer)
|
||||
} else if len(set2)*64 < len(set1) {
|
||||
return onesidedgallopingintersect2by2(set2, set1, buffer)
|
||||
} else {
|
||||
return localintersect2by2(set1, set2, buffer)
|
||||
}
|
||||
}
|
||||
|
||||
// intersection2by2Cardinality computes the cardinality of the intersection
|
||||
func intersection2by2Cardinality(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
) int {
|
||||
if len(set1)*64 < len(set2) {
|
||||
return onesidedgallopingintersect2by2Cardinality(set1, set2)
|
||||
} else if len(set2)*64 < len(set1) {
|
||||
return onesidedgallopingintersect2by2Cardinality(set2, set1)
|
||||
} else {
|
||||
return localintersect2by2Cardinality(set1, set2)
|
||||
}
|
||||
}
|
||||
|
||||
// intersects2by2 computes whether the two sets intersect
|
||||
func intersects2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
) bool {
|
||||
// could be optimized if one set is much larger than the other one
|
||||
if (len(set1) == 0) || (len(set2) == 0) {
|
||||
return false
|
||||
}
|
||||
index1 := 0
|
||||
index2 := 0
|
||||
value1 := set1[index1]
|
||||
value2 := set2[index2]
|
||||
mainwhile:
|
||||
for {
|
||||
|
||||
if value2 < value1 {
|
||||
for {
|
||||
index2++
|
||||
if index2 == len(set2) {
|
||||
break mainwhile
|
||||
}
|
||||
value2 = set2[index2]
|
||||
if value2 >= value1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if value1 < value2 {
|
||||
for {
|
||||
index1++
|
||||
if index1 == len(set1) {
|
||||
break mainwhile
|
||||
}
|
||||
value1 = set1[index1]
|
||||
if value1 >= value2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func localintersect2by2(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
buffer []uint16,
|
||||
) int {
|
||||
if (len(set1) == 0) || (len(set2) == 0) {
|
||||
return 0
|
||||
}
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
pos := 0
|
||||
buffer = buffer[:cap(buffer)]
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
mainwhile:
|
||||
for {
|
||||
if s2 < s1 {
|
||||
for {
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = set2[k2]
|
||||
if s2 >= s1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if s1 < s2 {
|
||||
for {
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = set1[k1]
|
||||
if s1 >= s2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
if k1 == len(set1) {
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
k2++
|
||||
if k2 == len(set2) {
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
// / localintersect2by2Cardinality computes the cardinality of the intersection
|
||||
func localintersect2by2Cardinality(
|
||||
set1 []uint16,
|
||||
set2 []uint16,
|
||||
) int {
|
||||
if (len(set1) == 0) || (len(set2) == 0) {
|
||||
return 0
|
||||
}
|
||||
index1 := 0
|
||||
index2 := 0
|
||||
pos := 0
|
||||
value1 := set1[index1]
|
||||
value2 := set2[index2]
|
||||
mainwhile:
|
||||
for {
|
||||
if value2 < value1 {
|
||||
for {
|
||||
index2++
|
||||
if index2 == len(set2) {
|
||||
break mainwhile
|
||||
}
|
||||
value2 = set2[index2]
|
||||
if value2 >= value1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if value1 < value2 {
|
||||
for {
|
||||
index1++
|
||||
if index1 == len(set1) {
|
||||
break mainwhile
|
||||
}
|
||||
value1 = set1[index1]
|
||||
if value1 >= value2 {
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// (set2[k2] == set1[k1])
|
||||
pos++
|
||||
index1++
|
||||
if index1 == len(set1) {
|
||||
break
|
||||
}
|
||||
value1 = set1[index1]
|
||||
index2++
|
||||
if index2 == len(set2) {
|
||||
break
|
||||
}
|
||||
value2 = set2[index2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func advanceUntil(
|
||||
array []uint16,
|
||||
pos int,
|
||||
length int,
|
||||
min uint16,
|
||||
) int {
|
||||
lower := pos + 1
|
||||
|
||||
if lower >= length || array[lower] >= min {
|
||||
return lower
|
||||
}
|
||||
|
||||
spansize := 1
|
||||
|
||||
for lower+spansize < length && array[lower+spansize] < min {
|
||||
spansize *= 2
|
||||
}
|
||||
var upper int
|
||||
if lower+spansize < length {
|
||||
upper = lower + spansize
|
||||
} else {
|
||||
upper = length - 1
|
||||
}
|
||||
|
||||
if array[upper] == min {
|
||||
return upper
|
||||
}
|
||||
|
||||
if array[upper] < min {
|
||||
// means
|
||||
// array
|
||||
// has no
|
||||
// item
|
||||
// >= min
|
||||
// pos = array.length;
|
||||
return length
|
||||
}
|
||||
|
||||
// we know that the next-smallest span was too small
|
||||
lower += (spansize >> 1)
|
||||
|
||||
mid := 0
|
||||
for lower+1 != upper {
|
||||
mid = (lower + upper) >> 1
|
||||
if array[mid] == min {
|
||||
return mid
|
||||
} else if array[mid] < min {
|
||||
lower = mid
|
||||
} else {
|
||||
upper = mid
|
||||
}
|
||||
}
|
||||
return upper
|
||||
}
|
||||
|
||||
func onesidedgallopingintersect2by2(
|
||||
smallset []uint16,
|
||||
largeset []uint16,
|
||||
buffer []uint16,
|
||||
) int {
|
||||
if 0 == len(smallset) {
|
||||
return 0
|
||||
}
|
||||
buffer = buffer[:cap(buffer)]
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
pos := 0
|
||||
s1 := largeset[k1]
|
||||
s2 := smallset[k2]
|
||||
mainwhile:
|
||||
|
||||
for {
|
||||
if s1 < s2 {
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
if s2 < s1 {
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
} else {
|
||||
|
||||
buffer[pos] = s2
|
||||
pos++
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func onesidedgallopingintersect2by2Cardinality(
|
||||
smallset []uint16,
|
||||
largeset []uint16,
|
||||
) int {
|
||||
if 0 == len(smallset) {
|
||||
return 0
|
||||
}
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
pos := 0
|
||||
s1 := largeset[k1]
|
||||
s2 := smallset[k2]
|
||||
mainwhile:
|
||||
|
||||
for {
|
||||
if s1 < s2 {
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
if s2 < s1 {
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break mainwhile
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
} else {
|
||||
|
||||
pos++
|
||||
k2++
|
||||
if k2 == len(smallset) {
|
||||
break
|
||||
}
|
||||
s2 = smallset[k2]
|
||||
k1 = advanceUntil(largeset, k1, len(largeset), s2)
|
||||
if k1 == len(largeset) {
|
||||
break mainwhile
|
||||
}
|
||||
s1 = largeset[k1]
|
||||
}
|
||||
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
||||
func binarySearch(array []uint16, ikey uint16) int {
|
||||
low := 0
|
||||
high := len(array) - 1
|
||||
for low+16 <= high {
|
||||
middleIndex := int(uint32(low+high) >> 1)
|
||||
middleValue := array[middleIndex]
|
||||
if middleValue < ikey {
|
||||
low = middleIndex + 1
|
||||
} else if middleValue > ikey {
|
||||
high = middleIndex - 1
|
||||
} else {
|
||||
return middleIndex
|
||||
}
|
||||
}
|
||||
for ; low <= high; low++ {
|
||||
val := array[low]
|
||||
if val >= ikey {
|
||||
if val == ikey {
|
||||
return low
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
return -(low + 1)
|
||||
}
|
||||
|
||||
// searchResult provides information about a search request.
|
||||
// The values will depend on the context of the search
|
||||
type searchResult struct {
|
||||
value uint16
|
||||
index int
|
||||
exactMatch bool
|
||||
}
|
||||
|
||||
// notFound returns a bool depending the search context
|
||||
// For cases `previousValue` and `nextValue` if target is present in the slice
|
||||
// this function will return `true` otherwise `false`
|
||||
// For `nextAbsentValue` and `previousAbsentValue` this will only return `False`
|
||||
func (sr *searchResult) notFound() bool {
|
||||
return !sr.exactMatch
|
||||
}
|
||||
|
||||
// outOfBounds indicates whether the target was outside the lower and upper bounds of the container
|
||||
func (sr *searchResult) outOfBounds() bool {
|
||||
return sr.index <= -1
|
||||
}
|
||||
|
||||
// binarySearchUntil is a helper function around binarySearchUntilWithBounds
|
||||
// The user does not have to pass in the lower and upper bound
|
||||
// The lower bound is taken to be `0` and the upper bound `len(array)-1`
|
||||
func binarySearchUntil(array []uint16, target uint16) searchResult {
|
||||
return binarySearchUntilWithBounds(array, target, 0, len(array)-1)
|
||||
}
|
||||
|
||||
// binarySearchUntilWithBounds returns a `searchResult`.
|
||||
// If an exact match is found the `searchResult{target, <index>, true}` will be returned, where `<index>` is
|
||||
// `target`s index in `array`, and `result.notFound()` evaluates to `false`.
|
||||
// If a match is not found, but `target` was in-bounds then the result.index will be the closest smaller value
|
||||
// Example: [ 8,9,11,12] if the target was 10, then `searchResult{9, 1, false}` will be returned.
|
||||
// If `target` was out of bounds `searchResult{0, -1, false}` will be returned.
|
||||
func binarySearchUntilWithBounds(array []uint16, target uint16, lowIndex int, maxIndex int) searchResult {
|
||||
highIndex := maxIndex
|
||||
|
||||
closestIndex := -1
|
||||
|
||||
if target < array[lowIndex] {
|
||||
return searchResult{0, closestIndex, false}
|
||||
}
|
||||
|
||||
if target > array[maxIndex] {
|
||||
return searchResult{0, len(array), false}
|
||||
}
|
||||
|
||||
for lowIndex <= highIndex {
|
||||
middleIndex := (lowIndex + highIndex) / 2
|
||||
middleValue := array[middleIndex]
|
||||
|
||||
if middleValue == target {
|
||||
return searchResult{middleValue, middleIndex, true}
|
||||
}
|
||||
|
||||
if target < middleValue {
|
||||
|
||||
if middleIndex > 0 && target > array[middleIndex-1] {
|
||||
return searchResult{array[middleIndex-1], middleIndex - 1, false}
|
||||
}
|
||||
|
||||
highIndex = middleIndex
|
||||
} else {
|
||||
if middleIndex < maxIndex && target < array[middleIndex+1] {
|
||||
return searchResult{middleValue, middleIndex, false}
|
||||
}
|
||||
lowIndex = middleIndex + 1
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return searchResult{array[closestIndex], closestIndex, false}
|
||||
}
|
||||
|
||||
// binarySearchPast is a wrapper around binarySearchPastWithBounds
|
||||
// The user does not have to pass in the lower and upper bound
|
||||
// The lower bound is taken to be `0` and the upper bound `len(array)-1`
|
||||
func binarySearchPast(array []uint16, target uint16) searchResult {
|
||||
return binarySearchPastWithBounds(array, target, 0, len(array)-1)
|
||||
}
|
||||
|
||||
// binarySearchPastWithBounds looks for the smallest value larger than or equal to `target`
|
||||
// If `target` is out of bounds a `searchResult` indicating out of bounds is returned
|
||||
// `target` does not have to exist in the slice.
|
||||
//
|
||||
// Example:
|
||||
// Suppose the slice is [...10,13...] with `target` equal to 11
|
||||
// The searchResult will have searchResult.value = 13
|
||||
func binarySearchPastWithBounds(array []uint16, target uint16, lowIndex int, maxIndex int) searchResult {
|
||||
highIndex := maxIndex
|
||||
|
||||
closestIndex := -1
|
||||
|
||||
if target < array[lowIndex] {
|
||||
return searchResult{0, closestIndex, false}
|
||||
}
|
||||
|
||||
if target > array[maxIndex] {
|
||||
return searchResult{0, len(array), false}
|
||||
}
|
||||
|
||||
for lowIndex <= highIndex {
|
||||
middleIndex := (lowIndex + highIndex) / 2
|
||||
middleValue := array[middleIndex]
|
||||
|
||||
if middleValue == target {
|
||||
return searchResult{middleValue, middleIndex, true}
|
||||
}
|
||||
|
||||
if target < middleValue {
|
||||
|
||||
if middleIndex > 0 && target > array[middleIndex-1] {
|
||||
return searchResult{array[middleIndex], middleIndex, false}
|
||||
}
|
||||
|
||||
highIndex = middleIndex
|
||||
} else {
|
||||
if middleIndex < maxIndex && target < array[middleIndex+1] {
|
||||
return searchResult{array[middleIndex+1], middleIndex + 1, false}
|
||||
}
|
||||
lowIndex = middleIndex + 1
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return searchResult{array[closestIndex], closestIndex, false}
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
//go:build arm64 && !gccgo && !appengine
|
||||
// +build arm64,!gccgo,!appengine
|
||||
|
||||
package roaring
|
||||
|
||||
//go:noescape
|
||||
func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) (size int)
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
// +build arm64,!gccgo,!appengine
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
|
||||
// This implements union2by2 using golang's version of arm64 assembly
|
||||
// The algorithm is very similar to the generic one,
|
||||
// but makes better use of arm64 features so is notably faster.
|
||||
// The basic algorithm structure is as follows:
|
||||
// 1. If either set is empty, copy the other set into the buffer and return the length
|
||||
// 2. Otherwise, load the first element of each set into a variable (s1 and s2).
|
||||
// 3. a. Compare the values of s1 and s2.
|
||||
// b. add the smaller one to the buffer.
|
||||
// c. perform a bounds check before incrementing.
|
||||
// If one set is finished, copy the rest of the other set over.
|
||||
// d. update s1 and or s2 to the next value, continue loop.
|
||||
//
|
||||
// Past the fact of the algorithm, this code makes use of several arm64 features
|
||||
// Condition Codes:
|
||||
// arm64's CMP operation sets 4 bits that can be used for branching,
|
||||
// rather than just true or false.
|
||||
// As a consequence, a single comparison gives enough information to distinguish the three cases
|
||||
//
|
||||
// Post-increment pointers after load/store:
|
||||
// Instructions like `MOVHU.P 2(R0), R6`
|
||||
// increment the register by a specified amount, in this example 2.
|
||||
// Because uint16's are exactly 2 bytes and the length of the slices
|
||||
// is part of the slice header,
|
||||
// there is no need to separately track the index into the slice.
|
||||
// Instead, the code can calculate the final read value and compare against that,
|
||||
// using the post-increment reads to move the pointers along.
|
||||
//
|
||||
// TODO: CALL out to memmove once the list is exhausted.
|
||||
// Right now it moves the necessary shorts so that the remaining count
|
||||
// is a multiple of 4 and then copies 64 bits at a time.
|
||||
|
||||
TEXT ·union2by2(SB), NOSPLIT, $0-80
|
||||
// R0, R1, and R2 for the pointers to the three slices
|
||||
MOVD set1+0(FP), R0
|
||||
MOVD set2+24(FP), R1
|
||||
MOVD buffer+48(FP), R2
|
||||
|
||||
//R3 and R4 will be the values at which we will have finished reading set1 and set2.
|
||||
// R3 should be R0 + 2 * set1_len+8(FP)
|
||||
MOVD set1_len+8(FP), R3
|
||||
MOVD set2_len+32(FP), R4
|
||||
|
||||
ADD R3<<1, R0, R3
|
||||
ADD R4<<1, R1, R4
|
||||
|
||||
|
||||
//Rather than counting the number of elements added separately
|
||||
//Save the starting register of buffer.
|
||||
MOVD buffer+48(FP), R5
|
||||
|
||||
// set1 is empty, just flush set2
|
||||
CMP R0, R3
|
||||
BEQ flush_right
|
||||
|
||||
// set2 is empty, just flush set1
|
||||
CMP R1, R4
|
||||
BEQ flush_left
|
||||
|
||||
// R6, R7 are the working space for s1 and s2
|
||||
MOVD ZR, R6
|
||||
MOVD ZR, R7
|
||||
|
||||
MOVHU.P 2(R0), R6
|
||||
MOVHU.P 2(R1), R7
|
||||
loop:
|
||||
|
||||
CMP R6, R7
|
||||
BEQ pop_both // R6 == R7
|
||||
BLS pop_right // R6 > R7
|
||||
//pop_left: // R6 < R7
|
||||
MOVHU.P R6, 2(R2)
|
||||
CMP R0, R3
|
||||
BEQ pop_then_flush_right
|
||||
MOVHU.P 2(R0), R6
|
||||
JMP loop
|
||||
pop_both:
|
||||
MOVHU.P R6, 2(R2) //could also use R7, since they are equal
|
||||
CMP R0, R3
|
||||
BEQ flush_right
|
||||
CMP R1, R4
|
||||
BEQ flush_left
|
||||
MOVHU.P 2(R0), R6
|
||||
MOVHU.P 2(R1), R7
|
||||
JMP loop
|
||||
pop_right:
|
||||
MOVHU.P R7, 2(R2)
|
||||
CMP R1, R4
|
||||
BEQ pop_then_flush_left
|
||||
MOVHU.P 2(R1), R7
|
||||
JMP loop
|
||||
|
||||
pop_then_flush_right:
|
||||
MOVHU.P R7, 2(R2)
|
||||
flush_right:
|
||||
MOVD R1, R0
|
||||
MOVD R4, R3
|
||||
JMP flush_left
|
||||
pop_then_flush_left:
|
||||
MOVHU.P R6, 2(R2)
|
||||
flush_left:
|
||||
CMP R0, R3
|
||||
BEQ return
|
||||
//figure out how many bytes to slough off. Must be a multiple of two
|
||||
SUB R0, R3, R4
|
||||
ANDS $6, R4
|
||||
BEQ long_flush //handles the 0 mod 8 case
|
||||
SUBS $4, R4, R4 // since possible values are 2, 4, 6, this splits evenly
|
||||
BLT pop_single // exactly the 2 case
|
||||
MOVW.P 4(R0), R6
|
||||
MOVW.P R6, 4(R2)
|
||||
BEQ long_flush // we're now aligned by 64 bits, as R4==4, otherwise 2 more
|
||||
pop_single:
|
||||
MOVHU.P 2(R0), R6
|
||||
MOVHU.P R6, 2(R2)
|
||||
long_flush:
|
||||
// at this point we know R3 - R0 is a multiple of 8.
|
||||
CMP R0, R3
|
||||
BEQ return
|
||||
MOVD.P 8(R0), R6
|
||||
MOVD.P R6, 8(R2)
|
||||
JMP long_flush
|
||||
return:
|
||||
// number of shorts written is (R5 - R2) >> 1
|
||||
SUB R5, R2
|
||||
LSR $1, R2, R2
|
||||
MOVD R2, size+72(FP)
|
||||
RET
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
//go:build !arm64 || gccgo || appengine
|
||||
// +build !arm64 gccgo appengine
|
||||
|
||||
package roaring
|
||||
|
||||
func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
|
||||
pos := 0
|
||||
k1 := 0
|
||||
k2 := 0
|
||||
if 0 == len(set2) {
|
||||
buffer = buffer[:len(set1)]
|
||||
copy(buffer, set1[:])
|
||||
return len(set1)
|
||||
}
|
||||
if 0 == len(set1) {
|
||||
buffer = buffer[:len(set2)]
|
||||
copy(buffer, set2[:])
|
||||
return len(set2)
|
||||
}
|
||||
s1 := set1[k1]
|
||||
s2 := set2[k2]
|
||||
buffer = buffer[:cap(buffer)]
|
||||
for {
|
||||
if s1 < s2 {
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
if k1 >= len(set1) {
|
||||
copy(buffer[pos:], set2[k2:])
|
||||
pos += len(set2) - k2
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
} else if s1 == s2 {
|
||||
buffer[pos] = s1
|
||||
pos++
|
||||
k1++
|
||||
k2++
|
||||
if k1 >= len(set1) {
|
||||
copy(buffer[pos:], set2[k2:])
|
||||
pos += len(set2) - k2
|
||||
break
|
||||
}
|
||||
if k2 >= len(set2) {
|
||||
copy(buffer[pos:], set1[k1:])
|
||||
pos += len(set1) - k1
|
||||
break
|
||||
}
|
||||
s1 = set1[k1]
|
||||
s2 = set2[k2]
|
||||
} else { // if (set1[k1]>set2[k2])
|
||||
buffer[pos] = s2
|
||||
pos++
|
||||
k2++
|
||||
if k2 >= len(set2) {
|
||||
copy(buffer[pos:], set1[k1:])
|
||||
pos += len(set1) - k1
|
||||
break
|
||||
}
|
||||
s2 = set2[k2]
|
||||
}
|
||||
}
|
||||
return pos
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
package roaring
|
||||
|
||||
type shortIterable interface {
|
||||
hasNext() bool
|
||||
next() uint16
|
||||
}
|
||||
|
||||
type shortPeekable interface {
|
||||
shortIterable
|
||||
peekNext() uint16
|
||||
advanceIfNeeded(minval uint16)
|
||||
}
|
||||
|
||||
type shortIterator struct {
|
||||
slice []uint16
|
||||
loc int
|
||||
}
|
||||
|
||||
func (si *shortIterator) hasNext() bool {
|
||||
return si.loc < len(si.slice)
|
||||
}
|
||||
|
||||
func (si *shortIterator) next() uint16 {
|
||||
a := si.slice[si.loc]
|
||||
si.loc++
|
||||
return a
|
||||
}
|
||||
|
||||
func (si *shortIterator) peekNext() uint16 {
|
||||
return si.slice[si.loc]
|
||||
}
|
||||
|
||||
func (si *shortIterator) advanceIfNeeded(minval uint16) {
|
||||
if si.hasNext() && si.peekNext() < minval {
|
||||
si.loc = advanceUntil(si.slice, si.loc, len(si.slice), minval)
|
||||
}
|
||||
}
|
||||
|
||||
type reverseIterator struct {
|
||||
slice []uint16
|
||||
loc int
|
||||
}
|
||||
|
||||
func (si *reverseIterator) hasNext() bool {
|
||||
return si.loc >= 0
|
||||
}
|
||||
|
||||
func (si *reverseIterator) next() uint16 {
|
||||
a := si.slice[si.loc]
|
||||
si.loc--
|
||||
return a
|
||||
}
|
||||
|
|
@ -0,0 +1,384 @@
|
|||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
|
||||
/*
|
||||
# Instructions for smat testing for roaring
|
||||
|
||||
[smat](https://github.com/mschoch/smat) is a framework that provides
|
||||
state machine assisted fuzz testing.
|
||||
|
||||
To run the smat tests for roaring...
|
||||
|
||||
## Prerequisites
|
||||
|
||||
$ go get github.com/dvyukov/go-fuzz/go-fuzz
|
||||
$ go get github.com/dvyukov/go-fuzz/go-fuzz-build
|
||||
|
||||
## Steps
|
||||
|
||||
1. Generate initial smat corpus:
|
||||
```
|
||||
go test -tags=gofuzz -run=TestGenerateSmatCorpus
|
||||
```
|
||||
|
||||
2. Build go-fuzz test program with instrumentation:
|
||||
```
|
||||
go-fuzz-build -func FuzzSmat github.com/RoaringBitmap/roaring
|
||||
```
|
||||
|
||||
3. Run go-fuzz:
|
||||
```
|
||||
go-fuzz -bin=./roaring-fuzz.zip -workdir=workdir/ -timeout=200
|
||||
```
|
||||
|
||||
You should see output like...
|
||||
```
|
||||
2016/09/16 13:58:35 slaves: 8, corpus: 1 (3s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 3s
|
||||
2016/09/16 13:58:38 slaves: 8, corpus: 1 (6s ago), crashers: 0, restarts: 1/0, execs: 0 (0/sec), cover: 0, uptime: 6s
|
||||
2016/09/16 13:58:41 slaves: 8, corpus: 1 (9s ago), crashers: 0, restarts: 1/44, execs: 44 (5/sec), cover: 0, uptime: 9s
|
||||
2016/09/16 13:58:44 slaves: 8, corpus: 1 (12s ago), crashers: 0, restarts: 1/45, execs: 45 (4/sec), cover: 0, uptime: 12s
|
||||
2016/09/16 13:58:47 slaves: 8, corpus: 1 (15s ago), crashers: 0, restarts: 1/46, execs: 46 (3/sec), cover: 0, uptime: 15s
|
||||
2016/09/16 13:58:50 slaves: 8, corpus: 1 (18s ago), crashers: 0, restarts: 1/47, execs: 47 (3/sec), cover: 0, uptime: 18s
|
||||
2016/09/16 13:58:53 slaves: 8, corpus: 1 (21s ago), crashers: 0, restarts: 1/63, execs: 63 (3/sec), cover: 0, uptime: 21s
|
||||
2016/09/16 13:58:56 slaves: 8, corpus: 1 (24s ago), crashers: 0, restarts: 1/65, execs: 65 (3/sec), cover: 0, uptime: 24s
|
||||
2016/09/16 13:58:59 slaves: 8, corpus: 1 (27s ago), crashers: 0, restarts: 1/66, execs: 66 (2/sec), cover: 0, uptime: 27s
|
||||
2016/09/16 13:59:02 slaves: 8, corpus: 1 (30s ago), crashers: 0, restarts: 1/67, execs: 67 (2/sec), cover: 0, uptime: 30s
|
||||
2016/09/16 13:59:05 slaves: 8, corpus: 1 (33s ago), crashers: 0, restarts: 1/83, execs: 83 (3/sec), cover: 0, uptime: 33s
|
||||
2016/09/16 13:59:08 slaves: 8, corpus: 1 (36s ago), crashers: 0, restarts: 1/84, execs: 84 (2/sec), cover: 0, uptime: 36s
|
||||
2016/09/16 13:59:11 slaves: 8, corpus: 2 (0s ago), crashers: 0, restarts: 1/85, execs: 85 (2/sec), cover: 0, uptime: 39s
|
||||
2016/09/16 13:59:14 slaves: 8, corpus: 17 (2s ago), crashers: 0, restarts: 1/86, execs: 86 (2/sec), cover: 480, uptime: 42s
|
||||
2016/09/16 13:59:17 slaves: 8, corpus: 17 (5s ago), crashers: 0, restarts: 1/66, execs: 132 (3/sec), cover: 487, uptime: 45s
|
||||
2016/09/16 13:59:20 slaves: 8, corpus: 17 (8s ago), crashers: 0, restarts: 1/440, execs: 2645 (55/sec), cover: 487, uptime: 48s
|
||||
|
||||
```
|
||||
|
||||
Let it run, and if the # of crashers is > 0, check out the reports in
|
||||
the workdir where you should be able to find the panic goroutine stack
|
||||
traces.
|
||||
*/
|
||||
|
||||
package roaring
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
"github.com/mschoch/smat"
|
||||
)
|
||||
|
||||
// fuzz test using state machine driven by byte stream.
|
||||
func FuzzSmat(data []byte) int {
|
||||
return smat.Fuzz(&smatContext{}, smat.ActionID('S'), smat.ActionID('T'),
|
||||
smatActionMap, data)
|
||||
}
|
||||
|
||||
var smatDebug = false
|
||||
|
||||
func smatLog(prefix, format string, args ...interface{}) {
|
||||
if smatDebug {
|
||||
fmt.Print(prefix)
|
||||
fmt.Printf(format, args...)
|
||||
}
|
||||
}
|
||||
|
||||
type smatContext struct {
|
||||
pairs []*smatPair
|
||||
|
||||
// Two registers, x & y.
|
||||
x int
|
||||
y int
|
||||
|
||||
actions int
|
||||
}
|
||||
|
||||
type smatPair struct {
|
||||
bm *Bitmap
|
||||
bs *bitset.BitSet
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
var smatActionMap = smat.ActionMap{
|
||||
smat.ActionID('X'): smatAction("x++", smatWrap(func(c *smatContext) { c.x++ })),
|
||||
smat.ActionID('x'): smatAction("x--", smatWrap(func(c *smatContext) { c.x-- })),
|
||||
smat.ActionID('Y'): smatAction("y++", smatWrap(func(c *smatContext) { c.y++ })),
|
||||
smat.ActionID('y'): smatAction("y--", smatWrap(func(c *smatContext) { c.y-- })),
|
||||
smat.ActionID('*'): smatAction("x*y", smatWrap(func(c *smatContext) { c.x = c.x * c.y })),
|
||||
smat.ActionID('<'): smatAction("x<<", smatWrap(func(c *smatContext) { c.x = c.x << 1 })),
|
||||
|
||||
smat.ActionID('^'): smatAction("swap", smatWrap(func(c *smatContext) { c.x, c.y = c.y, c.x })),
|
||||
|
||||
smat.ActionID('['): smatAction(" pushPair", smatWrap(smatPushPair)),
|
||||
smat.ActionID(']'): smatAction(" popPair", smatWrap(smatPopPair)),
|
||||
|
||||
smat.ActionID('B'): smatAction(" setBit", smatWrap(smatSetBit)),
|
||||
smat.ActionID('b'): smatAction(" removeBit", smatWrap(smatRemoveBit)),
|
||||
|
||||
smat.ActionID('o'): smatAction(" or", smatWrap(smatOr)),
|
||||
smat.ActionID('a'): smatAction(" and", smatWrap(smatAnd)),
|
||||
|
||||
smat.ActionID('#'): smatAction(" cardinality", smatWrap(smatCardinality)),
|
||||
|
||||
smat.ActionID('O'): smatAction(" orCardinality", smatWrap(smatOrCardinality)),
|
||||
smat.ActionID('A'): smatAction(" andCardinality", smatWrap(smatAndCardinality)),
|
||||
|
||||
smat.ActionID('c'): smatAction(" clear", smatWrap(smatClear)),
|
||||
smat.ActionID('r'): smatAction(" runOptimize", smatWrap(smatRunOptimize)),
|
||||
|
||||
smat.ActionID('e'): smatAction(" isEmpty", smatWrap(smatIsEmpty)),
|
||||
|
||||
smat.ActionID('i'): smatAction(" intersects", smatWrap(smatIntersects)),
|
||||
|
||||
smat.ActionID('f'): smatAction(" flip", smatWrap(smatFlip)),
|
||||
|
||||
smat.ActionID('-'): smatAction(" difference", smatWrap(smatDifference)),
|
||||
}
|
||||
|
||||
var smatRunningPercentActions []smat.PercentAction
|
||||
|
||||
func init() {
|
||||
var ids []int
|
||||
for actionId := range smatActionMap {
|
||||
ids = append(ids, int(actionId))
|
||||
}
|
||||
sort.Ints(ids)
|
||||
|
||||
pct := 100 / len(smatActionMap)
|
||||
for _, actionId := range ids {
|
||||
smatRunningPercentActions = append(smatRunningPercentActions,
|
||||
smat.PercentAction{pct, smat.ActionID(actionId)})
|
||||
}
|
||||
|
||||
smatActionMap[smat.ActionID('S')] = smatAction("SETUP", smatSetupFunc)
|
||||
smatActionMap[smat.ActionID('T')] = smatAction("TEARDOWN", smatTeardownFunc)
|
||||
}
|
||||
|
||||
// We only have one smat state: running.
|
||||
func smatRunning(next byte) smat.ActionID {
|
||||
return smat.PercentExecute(next, smatRunningPercentActions...)
|
||||
}
|
||||
|
||||
func smatAction(name string, f func(ctx smat.Context) (smat.State, error)) func(smat.Context) (smat.State, error) {
|
||||
return func(ctx smat.Context) (smat.State, error) {
|
||||
c := ctx.(*smatContext)
|
||||
c.actions++
|
||||
|
||||
smatLog(" ", "%s\n", name)
|
||||
|
||||
return f(ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// Creates an smat action func based on a simple callback.
|
||||
func smatWrap(cb func(c *smatContext)) func(smat.Context) (next smat.State, err error) {
|
||||
return func(ctx smat.Context) (next smat.State, err error) {
|
||||
c := ctx.(*smatContext)
|
||||
cb(c)
|
||||
return smatRunning, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Invokes a callback function with the input v bounded to len(c.pairs).
|
||||
func (c *smatContext) withPair(v int, cb func(*smatPair)) {
|
||||
if len(c.pairs) > 0 {
|
||||
if v < 0 {
|
||||
v = -v
|
||||
}
|
||||
v = v % len(c.pairs)
|
||||
cb(c.pairs[v])
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
func smatSetupFunc(ctx smat.Context) (next smat.State, err error) {
|
||||
return smatRunning, nil
|
||||
}
|
||||
|
||||
func smatTeardownFunc(ctx smat.Context) (next smat.State, err error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
func smatPushPair(c *smatContext) {
|
||||
c.pairs = append(c.pairs, &smatPair{
|
||||
bm: NewBitmap(),
|
||||
bs: bitset.New(100),
|
||||
})
|
||||
}
|
||||
|
||||
func smatPopPair(c *smatContext) {
|
||||
if len(c.pairs) > 0 {
|
||||
c.pairs = c.pairs[0 : len(c.pairs)-1]
|
||||
}
|
||||
}
|
||||
|
||||
func smatSetBit(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
y := uint32(c.y)
|
||||
p.bm.AddInt(int(y))
|
||||
p.bs.Set(uint(y))
|
||||
p.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatRemoveBit(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
y := uint32(c.y)
|
||||
p.bm.Remove(y)
|
||||
p.bs.Clear(uint(y))
|
||||
p.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatAnd(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.bm.And(py.bm)
|
||||
px.bs = px.bs.Intersection(py.bs)
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatOr(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.bm.Or(py.bm)
|
||||
px.bs = px.bs.Union(py.bs)
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatAndCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
c0 := px.bm.AndCardinality(py.bm)
|
||||
c1 := px.bs.IntersectionCardinality(py.bs)
|
||||
if c0 != uint64(c1) {
|
||||
panic("expected same add cardinality")
|
||||
}
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatOrCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
c0 := px.bm.OrCardinality(py.bm)
|
||||
c1 := px.bs.UnionCardinality(py.bs)
|
||||
if c0 != uint64(c1) {
|
||||
panic("expected same or cardinality")
|
||||
}
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatRunOptimize(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
px.bm.RunOptimize()
|
||||
px.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatClear(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
px.bm.Clear()
|
||||
px.bs = px.bs.ClearAll()
|
||||
px.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatCardinality(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c0 := px.bm.GetCardinality()
|
||||
c1 := px.bs.Count()
|
||||
if c0 != uint64(c1) {
|
||||
panic("expected same cardinality")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func smatIsEmpty(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c0 := px.bm.IsEmpty()
|
||||
c1 := px.bs.None()
|
||||
if c0 != c1 {
|
||||
panic("expected same is empty")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func smatIntersects(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
v0 := px.bm.Intersects(py.bm)
|
||||
v1 := px.bs.IntersectionCardinality(py.bs) > 0
|
||||
if v0 != v1 {
|
||||
panic("intersects not equal")
|
||||
}
|
||||
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func smatFlip(c *smatContext) {
|
||||
c.withPair(c.x, func(p *smatPair) {
|
||||
y := uint32(c.y)
|
||||
p.bm.Flip(uint64(y), uint64(y)+1)
|
||||
p.bs = p.bs.Flip(uint(y))
|
||||
p.checkEquals()
|
||||
})
|
||||
}
|
||||
|
||||
func smatDifference(c *smatContext) {
|
||||
c.withPair(c.x, func(px *smatPair) {
|
||||
c.withPair(c.y, func(py *smatPair) {
|
||||
px.bm.AndNot(py.bm)
|
||||
px.bs = px.bs.Difference(py.bs)
|
||||
px.checkEquals()
|
||||
py.checkEquals()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func (p *smatPair) checkEquals() {
|
||||
if !p.equalsBitSet(p.bs, p.bm) {
|
||||
panic("bitset mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
func (p *smatPair) equalsBitSet(a *bitset.BitSet, b *Bitmap) bool {
|
||||
for i, e := a.NextSet(0); e; i, e = a.NextSet(i + 1) {
|
||||
if !b.ContainsInt(int(i)) {
|
||||
fmt.Printf("in a bitset, not b bitmap, i: %d\n", i)
|
||||
fmt.Printf(" a bitset: %s\n b bitmap: %s\n",
|
||||
a.String(), b.String())
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
i := b.Iterator()
|
||||
for i.HasNext() {
|
||||
v := i.Next()
|
||||
if !a.Test(uint(v)) {
|
||||
fmt.Printf("in b bitmap, not a bitset, v: %d\n", v)
|
||||
fmt.Printf(" a bitset: %s\n b bitmap: %s\n",
|
||||
a.String(), b.String())
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
@ -0,0 +1,313 @@
|
|||
package roaring
|
||||
|
||||
import (
|
||||
"math"
|
||||
"math/rand"
|
||||
"sort"
|
||||
)
|
||||
|
||||
const (
|
||||
arrayDefaultMaxSize = 4096 // containers with 4096 or fewer integers should be array containers.
|
||||
arrayLazyLowerBound = 1024
|
||||
maxCapacity = 1 << 16
|
||||
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
|
||||
invalidCardinality = -1
|
||||
serialCookie = 12347 // runs, arrays, and bitmaps
|
||||
noOffsetThreshold = 4
|
||||
|
||||
// MaxUint32 is the largest uint32 value.
|
||||
MaxUint32 = math.MaxUint32
|
||||
|
||||
// MaxRange is One more than the maximum allowed bitmap bit index. For use as an upper
|
||||
// bound for ranges.
|
||||
MaxRange uint64 = MaxUint32 + 1
|
||||
|
||||
// MaxUint16 is the largest 16 bit unsigned int.
|
||||
// This is the largest value an interval16 can store.
|
||||
MaxUint16 = math.MaxUint16
|
||||
|
||||
// Compute wordSizeInBytes, the size of a word in bytes.
|
||||
_m = ^uint64(0)
|
||||
_logS = _m>>8&1 + _m>>16&1 + _m>>32&1
|
||||
wordSizeInBytes = 1 << _logS
|
||||
|
||||
// other constants used in ctz_generic.go
|
||||
wordSizeInBits = wordSizeInBytes << 3 // word size in bits
|
||||
)
|
||||
|
||||
const maxWord = 1<<wordSizeInBits - 1
|
||||
|
||||
// doesn't apply to runContainers
|
||||
func getSizeInBytesFromCardinality(card int) int {
|
||||
if card > arrayDefaultMaxSize {
|
||||
// bitmapContainer
|
||||
return maxCapacity / 8
|
||||
}
|
||||
// arrayContainer
|
||||
return 2 * card
|
||||
}
|
||||
|
||||
func fill(arr []uint64, val uint64) {
|
||||
for i := range arr {
|
||||
arr[i] = val
|
||||
}
|
||||
}
|
||||
|
||||
func fillRange(arr []uint64, start, end int, val uint64) {
|
||||
for i := start; i < end; i++ {
|
||||
arr[i] = val
|
||||
}
|
||||
}
|
||||
|
||||
func fillArrayAND(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||
if len(bitmap1) != len(bitmap2) {
|
||||
panic("array lengths don't match")
|
||||
}
|
||||
// TODO: rewrite in assembly
|
||||
pos := 0
|
||||
for k := range bitmap1 {
|
||||
bitset := bitmap1[k] & bitmap2[k]
|
||||
for bitset != 0 {
|
||||
t := bitset & -bitset
|
||||
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||
pos = pos + 1
|
||||
bitset ^= t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fillArrayANDNOT(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||
if len(bitmap1) != len(bitmap2) {
|
||||
panic("array lengths don't match")
|
||||
}
|
||||
// TODO: rewrite in assembly
|
||||
pos := 0
|
||||
for k := range bitmap1 {
|
||||
bitset := bitmap1[k] &^ bitmap2[k]
|
||||
for bitset != 0 {
|
||||
t := bitset & -bitset
|
||||
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||
pos = pos + 1
|
||||
bitset ^= t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fillArrayXOR(container []uint16, bitmap1, bitmap2 []uint64) {
|
||||
if len(bitmap1) != len(bitmap2) {
|
||||
panic("array lengths don't match")
|
||||
}
|
||||
// TODO: rewrite in assembly
|
||||
pos := 0
|
||||
for k := 0; k < len(bitmap1); k++ {
|
||||
bitset := bitmap1[k] ^ bitmap2[k]
|
||||
for bitset != 0 {
|
||||
t := bitset & -bitset
|
||||
container[pos] = uint16((k*64 + int(popcount(t-1))))
|
||||
pos = pos + 1
|
||||
bitset ^= t
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func highbits(x uint32) uint16 {
|
||||
return uint16(x >> 16)
|
||||
}
|
||||
|
||||
func lowbits(x uint32) uint16 {
|
||||
return uint16(x & maxLowBit)
|
||||
}
|
||||
|
||||
func combineLoHi16(lob uint16, hob uint16) uint32 {
|
||||
return combineLoHi32(uint32(lob), uint32(hob))
|
||||
}
|
||||
|
||||
func combineLoHi32(lob uint32, hob uint32) uint32 {
|
||||
return uint32(lob) | (hob << 16)
|
||||
}
|
||||
|
||||
const maxLowBit = 0xFFFF
|
||||
|
||||
func flipBitmapRange(bitmap []uint64, start int, end int) {
|
||||
if start >= end {
|
||||
return
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
bitmap[firstword] ^= ^(^uint64(0) << uint(start%64))
|
||||
for i := firstword; i < endword; i++ {
|
||||
bitmap[i] = ^bitmap[i]
|
||||
}
|
||||
bitmap[endword] ^= ^uint64(0) >> (uint(-end) % 64)
|
||||
}
|
||||
|
||||
func resetBitmapRange(bitmap []uint64, start int, end int) {
|
||||
if start >= end {
|
||||
return
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
if firstword == endword {
|
||||
bitmap[firstword] &= ^((^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64)))
|
||||
return
|
||||
}
|
||||
bitmap[firstword] &= ^(^uint64(0) << uint(start%64))
|
||||
for i := firstword + 1; i < endword; i++ {
|
||||
bitmap[i] = 0
|
||||
}
|
||||
bitmap[endword] &= ^(^uint64(0) >> (uint(-end) % 64))
|
||||
}
|
||||
|
||||
func setBitmapRange(bitmap []uint64, start int, end int) {
|
||||
if start >= end {
|
||||
return
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
if firstword == endword {
|
||||
bitmap[firstword] |= (^uint64(0) << uint(start%64)) & (^uint64(0) >> (uint(-end) % 64))
|
||||
return
|
||||
}
|
||||
bitmap[firstword] |= ^uint64(0) << uint(start%64)
|
||||
for i := firstword + 1; i < endword; i++ {
|
||||
bitmap[i] = ^uint64(0)
|
||||
}
|
||||
bitmap[endword] |= ^uint64(0) >> (uint(-end) % 64)
|
||||
}
|
||||
|
||||
func flipBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
flipBitmapRange(bitmap, start, end)
|
||||
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
return int(after - before)
|
||||
}
|
||||
|
||||
func resetBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
resetBitmapRange(bitmap, start, end)
|
||||
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
return int(after - before)
|
||||
}
|
||||
|
||||
func setBitmapRangeAndCardinalityChange(bitmap []uint64, start int, end int) int {
|
||||
before := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
setBitmapRange(bitmap, start, end)
|
||||
after := wordCardinalityForBitmapRange(bitmap, start, end)
|
||||
return int(after - before)
|
||||
}
|
||||
|
||||
func wordCardinalityForBitmapRange(bitmap []uint64, start int, end int) uint64 {
|
||||
answer := uint64(0)
|
||||
if start >= end {
|
||||
return answer
|
||||
}
|
||||
firstword := start / 64
|
||||
endword := (end - 1) / 64
|
||||
for i := firstword; i <= endword; i++ {
|
||||
answer += popcount(bitmap[i])
|
||||
}
|
||||
return answer
|
||||
}
|
||||
|
||||
func selectBitPosition(w uint64, j int) int {
|
||||
seen := 0
|
||||
|
||||
// Divide 64bit
|
||||
part := w & 0xFFFFFFFF
|
||||
n := popcount(part)
|
||||
if n <= uint64(j) {
|
||||
part = w >> 32
|
||||
seen += 32
|
||||
j -= int(n)
|
||||
}
|
||||
w = part
|
||||
|
||||
// Divide 32bit
|
||||
part = w & 0xFFFF
|
||||
n = popcount(part)
|
||||
if n <= uint64(j) {
|
||||
part = w >> 16
|
||||
seen += 16
|
||||
j -= int(n)
|
||||
}
|
||||
w = part
|
||||
|
||||
// Divide 16bit
|
||||
part = w & 0xFF
|
||||
n = popcount(part)
|
||||
if n <= uint64(j) {
|
||||
part = w >> 8
|
||||
seen += 8
|
||||
j -= int(n)
|
||||
}
|
||||
w = part
|
||||
|
||||
// Lookup in final byte
|
||||
var counter uint
|
||||
for counter = 0; counter < 8; counter++ {
|
||||
j -= int((w >> counter) & 1)
|
||||
if j < 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return seen + int(counter)
|
||||
}
|
||||
|
||||
func panicOn(err error) {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
type ph struct {
|
||||
orig int
|
||||
rand int
|
||||
}
|
||||
|
||||
type pha []ph
|
||||
|
||||
func (p pha) Len() int { return len(p) }
|
||||
func (p pha) Less(i, j int) bool { return p[i].rand < p[j].rand }
|
||||
func (p pha) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
|
||||
func getRandomPermutation(n int) []int {
|
||||
r := make([]ph, n)
|
||||
for i := 0; i < n; i++ {
|
||||
r[i].orig = i
|
||||
r[i].rand = rand.Intn(1 << 29)
|
||||
}
|
||||
sort.Sort(pha(r))
|
||||
m := make([]int, n)
|
||||
for i := range m {
|
||||
m[i] = r[i].orig
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func minOfInt(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxOfInt(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func maxOfUint16(a, b uint16) uint16 {
|
||||
if a > b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func minOfUint16(a, b uint16) uint16 {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
|
||||
target
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
language: go
|
||||
|
||||
sudo: false
|
||||
|
||||
branches:
|
||||
except:
|
||||
- release
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- travis
|
||||
|
||||
go:
|
||||
- "1.11.x"
|
||||
- tip
|
||||
|
||||
matrix:
|
||||
allow_failures:
|
||||
- go: tip
|
||||
|
||||
before_install:
|
||||
- if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi;
|
||||
- if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi;
|
||||
- go get github.com/mattn/goveralls
|
||||
|
||||
before_script:
|
||||
- make deps
|
||||
|
||||
script:
|
||||
- make qa
|
||||
|
||||
after_failure:
|
||||
- cat ./target/test/report.xml
|
||||
|
||||
after_success:
|
||||
- if [ "$TRAVIS_GO_VERSION" = "1.11.1" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi;
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
Copyright (c) 2014 Will Fitzgerald. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
|
@ -0,0 +1,166 @@
|
|||
# bitset
|
||||
|
||||
*Go language library to map between non-negative integers and boolean values*
|
||||
|
||||
[](https://github.com/willf/bitset/actions?query=workflow%3ATest)
|
||||
[](https://goreportcard.com/report/github.com/willf/bitset)
|
||||
[](https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc)
|
||||
|
||||
|
||||
This library is part of the [awesome go collection](https://github.com/avelino/awesome-go). It is used in production by several important systems:
|
||||
|
||||
* [beego](https://github.com/beego/beego)
|
||||
* [CubeFS](https://github.com/cubefs/cubefs)
|
||||
* [Amazon EKS Distro](https://github.com/aws/eks-distro)
|
||||
* [sourcegraph](https://github.com/sourcegraph/sourcegraph-public-snapshot)
|
||||
* [torrent](https://github.com/anacrolix/torrent)
|
||||
|
||||
|
||||
## Description
|
||||
|
||||
Package bitset implements bitsets, a mapping between non-negative integers and boolean values.
|
||||
It should be more efficient than map[uint] bool.
|
||||
|
||||
It provides methods for setting, clearing, flipping, and testing individual integers.
|
||||
|
||||
But it also provides set intersection, union, difference, complement, and symmetric operations, as well as tests to check whether any, all, or no bits are set, and querying a bitset's current length and number of positive bits.
|
||||
|
||||
BitSets are expanded to the size of the largest set bit; the memory allocation is approximately Max bits, where Max is the largest set bit. BitSets are never shrunk automatically, but `Shrink` and `Compact` methods are available. On creation, a hint can be given for the number of bits that will be used.
|
||||
|
||||
Many of the methods, including Set, Clear, and Flip, return a BitSet pointer, which allows for chaining.
|
||||
|
||||
### Example use:
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
|
||||
"github.com/bits-and-blooms/bitset"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("Hello from BitSet!\n")
|
||||
var b bitset.BitSet
|
||||
// play some Go Fish
|
||||
for i := 0; i < 100; i++ {
|
||||
card1 := uint(rand.Intn(52))
|
||||
card2 := uint(rand.Intn(52))
|
||||
b.Set(card1)
|
||||
if b.Test(card2) {
|
||||
fmt.Println("Go Fish!")
|
||||
}
|
||||
b.Clear(card1)
|
||||
}
|
||||
|
||||
// Chaining
|
||||
b.Set(10).Set(11)
|
||||
|
||||
for i, e := b.NextSet(0); e; i, e = b.NextSet(i + 1) {
|
||||
fmt.Println("The following bit is set:", i)
|
||||
}
|
||||
if b.Intersection(bitset.New(100).Set(10)).Count() == 1 {
|
||||
fmt.Println("Intersection works.")
|
||||
} else {
|
||||
fmt.Println("Intersection doesn't work???")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you have Go 1.23 or better, you can iterate over the set bits like so:
|
||||
|
||||
```go
|
||||
for i := range b.EachSet() {}
|
||||
```
|
||||
|
||||
|
||||
|
||||
Package documentation is at: https://pkg.go.dev/github.com/bits-and-blooms/bitset?tab=doc
|
||||
|
||||
## Serialization
|
||||
|
||||
|
||||
You may serialize a bitset safely and portably to a stream
|
||||
of bytes as follows:
|
||||
```Go
|
||||
const length = 9585
|
||||
const oneEvery = 97
|
||||
bs := bitset.New(length)
|
||||
// Add some bits
|
||||
for i := uint(0); i < length; i += oneEvery {
|
||||
bs = bs.Set(i)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
n, err := bs.WriteTo(&buf)
|
||||
if err != nil {
|
||||
// failure
|
||||
}
|
||||
// Here n == buf.Len()
|
||||
```
|
||||
You can later deserialize the result as follows:
|
||||
|
||||
```Go
|
||||
// Read back from buf
|
||||
bs = bitset.New()
|
||||
n, err = bs.ReadFrom(&buf)
|
||||
if err != nil {
|
||||
// error
|
||||
}
|
||||
// n is the number of bytes read
|
||||
```
|
||||
|
||||
The `ReadFrom` function attempts to read the data into the existing
|
||||
BitSet instance, to minimize memory allocations.
|
||||
|
||||
|
||||
*Performance tip*:
|
||||
When reading and writing to a file or a network connection, you may get better performance by
|
||||
wrapping your streams with `bufio` instances.
|
||||
|
||||
E.g.,
|
||||
```Go
|
||||
f, err := os.Create("myfile")
|
||||
w := bufio.NewWriter(f)
|
||||
```
|
||||
```Go
|
||||
f, err := os.Open("myfile")
|
||||
r := bufio.NewReader(f)
|
||||
```
|
||||
|
||||
## Memory Usage
|
||||
|
||||
The memory usage of a bitset using `N` bits is at least `N/8` bytes. The number of bits in a bitset is at least as large as one plus the greatest bit index you have accessed. Thus it is possible to run out of memory while using a bitset. If you have lots of bits, you might prefer compressed bitsets, like the [Roaring bitmaps](https://roaringbitmap.org) and its [Go implementation](https://github.com/RoaringBitmap/roaring).
|
||||
|
||||
The `roaring` library allows you to go back and forth between compressed Roaring bitmaps and the conventional bitset instances:
|
||||
```Go
|
||||
mybitset := roaringbitmap.ToBitSet()
|
||||
newroaringbitmap := roaring.FromBitSet(mybitset)
|
||||
```
|
||||
|
||||
|
||||
### Goroutine safety
|
||||
|
||||
In general, it's not safe to access the same BitSet using different goroutines--they are unsynchronized for performance.
|
||||
|
||||
Should you want to access a BitSet from more than one goroutine, you should provide synchronization. Typically this is done by using channels to pass the *BitSet around (in Go style; so there is only ever one owner), or by using `sync.Mutex` to serialize operations on BitSets.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
go get github.com/bits-and-blooms/bitset
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)")
|
||||
|
||||
## Running all tests
|
||||
|
||||
Before committing the code, please check if it passes tests, has adequate coverage, etc.
|
||||
```bash
|
||||
go test
|
||||
go test -cover
|
||||
```
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
# Security Policy
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
You can report privately a vulnerability by email at daniel@lemire.me (current maintainer).
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
# Go
|
||||
# Build your Go project.
|
||||
# Add steps that test, save build artifacts, deploy, and more:
|
||||
# https://docs.microsoft.com/azure/devops/pipelines/languages/go
|
||||
|
||||
trigger:
|
||||
- master
|
||||
|
||||
pool:
|
||||
vmImage: 'Ubuntu-16.04'
|
||||
|
||||
variables:
|
||||
GOBIN: '$(GOPATH)/bin' # Go binaries path
|
||||
GOROOT: '/usr/local/go1.11' # Go installation path
|
||||
GOPATH: '$(system.defaultWorkingDirectory)/gopath' # Go workspace path
|
||||
modulePath: '$(GOPATH)/src/github.com/$(build.repository.name)' # Path to the module's code
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
mkdir -p '$(GOBIN)'
|
||||
mkdir -p '$(GOPATH)/pkg'
|
||||
mkdir -p '$(modulePath)'
|
||||
shopt -s extglob
|
||||
shopt -s dotglob
|
||||
mv !(gopath) '$(modulePath)'
|
||||
echo '##vso[task.prependpath]$(GOBIN)'
|
||||
echo '##vso[task.prependpath]$(GOROOT)/bin'
|
||||
displayName: 'Set up the Go workspace'
|
||||
|
||||
- script: |
|
||||
go version
|
||||
go get -v -t -d ./...
|
||||
if [ -f Gopkg.toml ]; then
|
||||
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
|
||||
dep ensure
|
||||
fi
|
||||
go build -v .
|
||||
workingDirectory: '$(modulePath)'
|
||||
displayName: 'Get dependencies, then build'
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,23 @@
|
|||
//go:build go1.23
|
||||
// +build go1.23
|
||||
|
||||
package bitset
|
||||
|
||||
import (
|
||||
"iter"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
func (b *BitSet) EachSet() iter.Seq[uint] {
|
||||
return func(yield func(uint) bool) {
|
||||
for wordIndex, word := range b.set {
|
||||
idx := 0
|
||||
for trail := bits.TrailingZeros64(word); trail != 64; trail = bits.TrailingZeros64(word >> idx) {
|
||||
if !yield(uint(wordIndex<<log2WordSize + idx + trail)) {
|
||||
return
|
||||
}
|
||||
idx += trail + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,59 @@
|
|||
package bitset
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func popcntSlice(s []uint64) uint64 {
|
||||
var cnt int
|
||||
for _, x := range s {
|
||||
cnt += bits.OnesCount64(x)
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntMaskSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] &^ m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntAndSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] & m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntOrSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] | m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
||||
func popcntXorSlice(s, m []uint64) uint64 {
|
||||
var cnt int
|
||||
// this explicit check eliminates a bounds check in the loop
|
||||
if len(m) < len(s) {
|
||||
panic("mask slice is too short")
|
||||
}
|
||||
for i := range s {
|
||||
cnt += bits.OnesCount64(s[i] ^ m[i])
|
||||
}
|
||||
return uint64(cnt)
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
package bitset
|
||||
|
||||
import "math/bits"
|
||||
|
||||
func select64(w uint64, j uint) uint {
|
||||
seen := 0
|
||||
// Divide 64bit
|
||||
part := w & 0xFFFFFFFF
|
||||
n := uint(bits.OnesCount64(part))
|
||||
if n <= j {
|
||||
part = w >> 32
|
||||
seen += 32
|
||||
j -= n
|
||||
}
|
||||
ww := part
|
||||
|
||||
// Divide 32bit
|
||||
part = ww & 0xFFFF
|
||||
|
||||
n = uint(bits.OnesCount64(part))
|
||||
if n <= j {
|
||||
part = ww >> 16
|
||||
seen += 16
|
||||
j -= n
|
||||
}
|
||||
ww = part
|
||||
|
||||
// Divide 16bit
|
||||
part = ww & 0xFF
|
||||
n = uint(bits.OnesCount64(part))
|
||||
if n <= j {
|
||||
part = ww >> 8
|
||||
seen += 8
|
||||
j -= n
|
||||
}
|
||||
ww = part
|
||||
|
||||
// Lookup in final byte
|
||||
counter := 0
|
||||
for ; counter < 8; counter++ {
|
||||
j -= uint((ww >> counter) & 1)
|
||||
if j+1 == 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
return uint(seen + counter)
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#*
|
||||
*.sublime-*
|
||||
*~
|
||||
.#*
|
||||
.project
|
||||
.settings
|
||||
**/.idea/
|
||||
**/*.iml
|
||||
.DS_Store
|
||||
query_string.y.go.tmp
|
||||
/analysis/token_filters/cld2/cld2-read-only
|
||||
/analysis/token_filters/cld2/libcld2_full.a
|
||||
/cmd/bleve/bleve
|
||||
vendor/**
|
||||
!vendor/manifest
|
||||
/y.output
|
||||
/search/query/y.output
|
||||
*.test
|
||||
tags
|
||||
go.sum
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
sudo: false
|
||||
|
||||
language: go
|
||||
|
||||
go:
|
||||
- "1.21.x"
|
||||
- "1.22.x"
|
||||
- "1.23.x"
|
||||
|
||||
script:
|
||||
- go get golang.org/x/tools/cmd/cover
|
||||
- go get github.com/mattn/goveralls
|
||||
- go get github.com/kisielk/errcheck
|
||||
- go get -u github.com/FiloSottile/gvt
|
||||
- gvt restore
|
||||
- go test -race -v $(go list ./... | grep -v vendor/)
|
||||
- go vet $(go list ./... | grep -v vendor/)
|
||||
- go test ./test -v -indexType scorch
|
||||
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
|
||||
- scripts/project-code-coverage.sh
|
||||
- scripts/build_children.sh
|
||||
|
||||
notifications:
|
||||
email:
|
||||
- fts-team@couchbase.com
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
# Contributing to Bleve
|
||||
|
||||
We look forward to your contributions, but ask that you first review these guidelines.
|
||||
|
||||
## Sign the CLA
|
||||
|
||||
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements.
|
||||
|
||||
## Submitting a Pull Request
|
||||
|
||||
All types of contributions are welcome, but please keep the following in mind:
|
||||
|
||||
- If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged.
|
||||
- Existing tests should continue to pass, new tests for the contribution are nice to have.
|
||||
- All code should have gone through `go fmt`
|
||||
- All code should pass `go vet`
|
||||
|
|
@ -0,0 +1,202 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
#  bleve
|
||||
|
||||
[](https://github.com/blevesearch/bleve/actions/workflows/tests.yml?query=event%3Apush+branch%3Amaster)
|
||||
[](https://coveralls.io/github/blevesearch/bleve?branch=master)
|
||||
[](https://pkg.go.dev/github.com/blevesearch/bleve/v2)
|
||||
[](https://app.gitter.im/#/room/#blevesearch_bleve:gitter.im)
|
||||
[](https://codebeat.co/projects/github-com-blevesearch-bleve)
|
||||
[](https://goreportcard.com/report/github.com/blevesearch/bleve/v2)
|
||||
[](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
|
||||
[](https://opensource.org/licenses/Apache-2.0)
|
||||
|
||||
A modern indexing + search library in GO
|
||||
|
||||
## Features
|
||||
|
||||
* Index any GO data structure or JSON
|
||||
* Intelligent defaults backed up by powerful configuration ([scorch](https://github.com/blevesearch/bleve/blob/master/index/scorch/README.md))
|
||||
* Supported field types:
|
||||
* `text`, `number`, `datetime`, `boolean`, `geopoint`, `geoshape`, `IP`, `vector`
|
||||
* Supported query types:
|
||||
* `term`, `phrase`, `match`, `match_phrase`, `prefix`, `regexp`, `wildcard`, `fuzzy`
|
||||
* term range, numeric range, date range, boolean field
|
||||
* compound queries: `conjuncts`, `disjuncts`, boolean (`must`/`should`/`must_not`)
|
||||
* [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/)
|
||||
* [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
|
||||
* approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md)
|
||||
* [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md)
|
||||
* [tf-idf](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#tf-idf) / [bm25](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#bm25) scoring models
|
||||
* Hybrid search: exact + semantic
|
||||
* Query time boosting
|
||||
* Search result match highlighting with document fragments
|
||||
* Aggregations/faceting support:
|
||||
* terms facet
|
||||
* numeric range facet
|
||||
* date range facet
|
||||
|
||||
## Indexing
|
||||
|
||||
```go
|
||||
message := struct {
|
||||
Id string
|
||||
From string
|
||||
Body string
|
||||
}{
|
||||
Id: "example",
|
||||
From: "xyz@couchbase.com",
|
||||
Body: "bleve indexing is easy",
|
||||
}
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
index, err := bleve.New("example.bleve", mapping)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
index.Index(message.Id, message)
|
||||
```
|
||||
|
||||
## Querying
|
||||
|
||||
```go
|
||||
index, _ := bleve.Open("example.bleve")
|
||||
query := bleve.NewQueryStringQuery("bleve")
|
||||
searchRequest := bleve.NewSearchRequest(query)
|
||||
searchResult, _ := index.Search(searchRequest)
|
||||
```
|
||||
|
||||
## Command Line Interface
|
||||
|
||||
To install the CLI for the latest release of bleve, run:
|
||||
|
||||
```bash
|
||||
go install github.com/blevesearch/bleve/v2/cmd/bleve@latest
|
||||
```
|
||||
|
||||
```text
|
||||
$ bleve --help
|
||||
Bleve is a command-line tool to interact with a bleve index.
|
||||
|
||||
Usage:
|
||||
bleve [command]
|
||||
|
||||
Available Commands:
|
||||
bulk bulk loads from newline delimited JSON files
|
||||
check checks the contents of the index
|
||||
count counts the number documents in the index
|
||||
create creates a new index
|
||||
dictionary prints the term dictionary for the specified field in the index
|
||||
dump dumps the contents of the index
|
||||
fields lists the fields in this index
|
||||
help Help about any command
|
||||
index adds the files to the index
|
||||
mapping prints the mapping used for this index
|
||||
query queries the index
|
||||
registry registry lists the bleve components compiled into this executable
|
||||
scorch command-line tool to interact with a scorch index
|
||||
|
||||
Flags:
|
||||
-h, --help help for bleve
|
||||
|
||||
Use "bleve [command] --help" for more information about a command.
|
||||
```
|
||||
|
||||
## Text Analysis
|
||||
|
||||
Bleve includes general-purpose analyzers (customizable) as well as pre-built text analyzers for the following languages:
|
||||
|
||||
Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Polish (pl), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr)
|
||||
|
||||
## Text Analysis Wizard
|
||||
|
||||
[bleveanalysis.couchbase.com](https://bleveanalysis.couchbase.com)
|
||||
|
||||
## Discussion/Issues
|
||||
|
||||
Discuss usage/development of bleve and/or report issues here:
|
||||
|
||||
* [Github issues](https://github.com/blevesearch/bleve/issues)
|
||||
* [Google group](https://groups.google.com/forum/#!forum/bleve)
|
||||
|
||||
## License
|
||||
|
||||
Apache License Version 2.0
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
We support the latest release (for example, bleve v2.5.x).
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
All security issues for this project should be reported via email to [security@couchbase.com](mailto:security@couchbase.com) and [fts-team@couchbase.com](mailto:fts-team@couchbase.com).
|
||||
|
||||
This mail will be delivered to the owners of this project.
|
||||
|
||||
- To ensure your report is NOT marked as spam, please include the word "security/vulnerability" along with the project name (blevesearch/bleve) in the subject of the email.
|
||||
- Please be as descriptive as possible while explaining the issue, and a testcase highlighting the issue is always welcome.
|
||||
|
||||
Your email will be acknowledged at the soonest possible.
|
||||
41
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/keyword/keyword.go
generated
vendored
Normal file
41
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/keyword/keyword.go
generated
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package keyword
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "keyword"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
|
||||
keywordTokenizer, err := cache.TokenizerNamed(single.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.DefaultAnalyzer{
|
||||
Tokenizer: keywordTokenizer,
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
55
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/standard/standard.go
generated
vendored
Normal file
55
vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/standard/standard.go
generated
vendored
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package standard
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/lang/en"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "standard"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.DefaultAnalyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
67
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/flexible/flexible.go
generated
vendored
Normal file
67
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/flexible/flexible.go
generated
vendored
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package flexible
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "flexiblego"
|
||||
|
||||
type DateTimeParser struct {
|
||||
layouts []string
|
||||
}
|
||||
|
||||
func New(layouts []string) *DateTimeParser {
|
||||
return &DateTimeParser{
|
||||
layouts: layouts,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
|
||||
for _, layout := range p.layouts {
|
||||
rv, err := time.Parse(layout, input)
|
||||
if err == nil {
|
||||
return rv, layout, nil
|
||||
}
|
||||
}
|
||||
return time.Time{}, "", analysis.ErrInvalidDateTime
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
layouts, ok := config["layouts"].([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify layouts")
|
||||
}
|
||||
var layoutStrs []string
|
||||
for _, layout := range layouts {
|
||||
layoutStr, ok := layout.(string)
|
||||
if ok {
|
||||
layoutStrs = append(layoutStrs, layoutStr)
|
||||
}
|
||||
}
|
||||
return New(layoutStrs), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
50
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/optional/optional.go
generated
vendored
Normal file
50
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/optional/optional.go
generated
vendored
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package optional
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/datetime/flexible"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "dateTimeOptional"
|
||||
|
||||
const rfc3339NoTimezone = "2006-01-02T15:04:05"
|
||||
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
|
||||
const rfc3339Offset = "2006-01-02 15:04:05 -0700"
|
||||
const rfc3339NoTime = "2006-01-02"
|
||||
|
||||
var layouts = []string{
|
||||
time.RFC3339Nano,
|
||||
time.RFC3339,
|
||||
rfc3339NoTimezone,
|
||||
rfc3339NoTimezoneNoT,
|
||||
rfc3339Offset,
|
||||
rfc3339NoTime,
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return flexible.New(layouts), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
55
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds/microseconds.go
generated
vendored
Normal file
55
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds/microseconds.go
generated
vendored
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package microseconds
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "unix_micro"
|
||||
|
||||
type DateTimeParser struct {
|
||||
}
|
||||
|
||||
var minBound int64 = math.MinInt64 / 1000
|
||||
var maxBound int64 = math.MaxInt64 / 1000
|
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
|
||||
// unix timestamp is milliseconds since UNIX epoch
|
||||
timestamp, err := strconv.ParseInt(input, 10, 64)
|
||||
if err != nil {
|
||||
return time.Time{}, "", analysis.ErrInvalidTimestampString
|
||||
}
|
||||
if timestamp < minBound || timestamp > maxBound {
|
||||
return time.Time{}, "", analysis.ErrInvalidTimestampRange
|
||||
}
|
||||
return time.UnixMicro(timestamp), Name, nil
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return &DateTimeParser{}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
55
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds/milliseconds.go
generated
vendored
Normal file
55
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds/milliseconds.go
generated
vendored
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package milliseconds
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "unix_milli"
|
||||
|
||||
type DateTimeParser struct {
|
||||
}
|
||||
|
||||
var minBound int64 = math.MinInt64 / 1000000
|
||||
var maxBound int64 = math.MaxInt64 / 1000000
|
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
|
||||
// unix timestamp is milliseconds since UNIX epoch
|
||||
timestamp, err := strconv.ParseInt(input, 10, 64)
|
||||
if err != nil {
|
||||
return time.Time{}, "", analysis.ErrInvalidTimestampString
|
||||
}
|
||||
if timestamp < minBound || timestamp > maxBound {
|
||||
return time.Time{}, "", analysis.ErrInvalidTimestampRange
|
||||
}
|
||||
return time.UnixMilli(timestamp), Name, nil
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return &DateTimeParser{}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
55
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds/nanoseconds.go
generated
vendored
Normal file
55
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds/nanoseconds.go
generated
vendored
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2023 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package nanoseconds
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "unix_nano"
|
||||
|
||||
type DateTimeParser struct {
|
||||
}
|
||||
|
||||
var minBound int64 = math.MinInt64
|
||||
var maxBound int64 = math.MaxInt64
|
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
|
||||
// unix timestamp is milliseconds since UNIX epoch
|
||||
timestamp, err := strconv.ParseInt(input, 10, 64)
|
||||
if err != nil {
|
||||
return time.Time{}, "", analysis.ErrInvalidTimestampString
|
||||
}
|
||||
if timestamp < minBound || timestamp > maxBound {
|
||||
return time.Time{}, "", analysis.ErrInvalidTimestampRange
|
||||
}
|
||||
return time.Unix(0, timestamp), Name, nil
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return &DateTimeParser{}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
55
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds/seconds.go
generated
vendored
Normal file
55
vendor/github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds/seconds.go
generated
vendored
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package seconds
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "unix_sec"
|
||||
|
||||
type DateTimeParser struct {
|
||||
}
|
||||
|
||||
var minBound int64 = math.MinInt64 / 1000000000
|
||||
var maxBound int64 = math.MaxInt64 / 1000000000
|
||||
|
||||
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
|
||||
// unix timestamp is seconds since UNIX epoch
|
||||
timestamp, err := strconv.ParseInt(input, 10, 64)
|
||||
if err != nil {
|
||||
return time.Time{}, "", analysis.ErrInvalidTimestampString
|
||||
}
|
||||
if timestamp < minBound || timestamp > maxBound {
|
||||
return time.Time{}, "", analysis.ErrInvalidTimestampRange
|
||||
}
|
||||
return time.Unix(timestamp, 0), Name, nil
|
||||
}
|
||||
|
||||
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
|
||||
return &DateTimeParser{}, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, options index.FieldIndexingOptions) index.TokenFrequencies {
|
||||
rv := make(map[string]*index.TokenFreq, len(tokens))
|
||||
|
||||
if options.IncludeTermVectors() {
|
||||
tls := make([]index.TokenLocation, len(tokens))
|
||||
tlNext := 0
|
||||
|
||||
for _, token := range tokens {
|
||||
tls[tlNext] = index.TokenLocation{
|
||||
ArrayPositions: arrayPositions,
|
||||
Start: token.Start,
|
||||
End: token.End,
|
||||
Position: token.Position,
|
||||
}
|
||||
|
||||
curr, ok := rv[string(token.Term)]
|
||||
if ok {
|
||||
curr.Locations = append(curr.Locations, &tls[tlNext])
|
||||
} else {
|
||||
curr = &index.TokenFreq{
|
||||
Term: token.Term,
|
||||
Locations: []*index.TokenLocation{&tls[tlNext]},
|
||||
}
|
||||
rv[string(token.Term)] = curr
|
||||
}
|
||||
|
||||
if !options.SkipFreqNorm() {
|
||||
curr.SetFrequency(curr.Frequency() + 1)
|
||||
}
|
||||
|
||||
tlNext++
|
||||
}
|
||||
} else {
|
||||
for _, token := range tokens {
|
||||
curr, exists := rv[string(token.Term)]
|
||||
if !exists {
|
||||
curr = &index.TokenFreq{
|
||||
Term: token.Term,
|
||||
}
|
||||
rv[string(token.Term)] = curr
|
||||
}
|
||||
|
||||
if !options.SkipFreqNorm() {
|
||||
curr.SetFrequency(curr.Frequency() + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
73
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/analyzer_en.go
generated
vendored
Normal file
73
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/analyzer_en.go
generated
vendored
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package en implements an analyzer with reasonable defaults for processing
|
||||
// English text.
|
||||
//
|
||||
// It strips possessive suffixes ('s), transforms tokens to lower case,
|
||||
// removes stopwords from a built-in list, and applies porter stemming.
|
||||
//
|
||||
// The built-in stopwords list is defined in EnglishStopWords.
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/porter"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
)
|
||||
|
||||
const AnalyzerName = "en"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(unicode.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stopEnFilter, err := cache.TokenFilterNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.DefaultAnalyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
possEnFilter,
|
||||
toLowerFilter,
|
||||
stopEnFilter,
|
||||
stemmerEnFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
177
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/plural_stemmer.go
generated
vendored
Normal file
177
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/plural_stemmer.go
generated
vendored
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
This code was ported from the Open Search Project
|
||||
https://github.com/opensearch-project/OpenSearch/blob/main/modules/analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java
|
||||
The algorithm itself was created by Mark Harwood
|
||||
https://github.com/markharwood
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* The OpenSearch Contributors require contributions made to
|
||||
* this file be licensed under the Apache-2.0 license or a
|
||||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const PluralStemmerName = "stemmer_en_plural"
|
||||
|
||||
type EnglishPluralStemmerFilter struct {
|
||||
}
|
||||
|
||||
func NewEnglishPluralStemmerFilter() *EnglishPluralStemmerFilter {
|
||||
return &EnglishPluralStemmerFilter{}
|
||||
}
|
||||
|
||||
func (s *EnglishPluralStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
token.Term = []byte(stem(string(token.Term)))
|
||||
}
|
||||
|
||||
return input
|
||||
}
|
||||
|
||||
func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewEnglishPluralStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// Words ending in oes that retain the e when stemmed
|
||||
var oesExceptions = []string{"shoes", "canoes", "oboes"}
|
||||
|
||||
// Words ending in ches that retain the e when stemmed
|
||||
var chesExceptions = []string{
|
||||
"cliches",
|
||||
"avalanches",
|
||||
"mustaches",
|
||||
"moustaches",
|
||||
"quiches",
|
||||
"headaches",
|
||||
"heartaches",
|
||||
"porsches",
|
||||
"tranches",
|
||||
"caches",
|
||||
}
|
||||
|
||||
func stem(word string) string {
|
||||
runes := []rune(strings.ToLower(word))
|
||||
|
||||
if len(runes) < 3 || runes[len(runes)-1] != 's' {
|
||||
return string(runes)
|
||||
}
|
||||
|
||||
switch runes[len(runes)-2] {
|
||||
case 'u':
|
||||
fallthrough
|
||||
case 's':
|
||||
return string(runes)
|
||||
case 'e':
|
||||
// Modified ies->y logic from original s-stemmer - only work on strings > 4
|
||||
// so spies -> spy still but pies->pie.
|
||||
// The original code also special-cased aies and eies for no good reason as far as I can tell.
|
||||
// ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies )
|
||||
if len(runes) > 4 && runes[len(runes)-3] == 'i' {
|
||||
runes[len(runes)-3] = 'y'
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
|
||||
// Suffix rules to remove any dangling "e"
|
||||
if len(runes) > 3 {
|
||||
// xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe")
|
||||
if len(runes) > 4 && runes[len(runes)-3] == 'x' {
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
|
||||
// oes
|
||||
if len(runes) > 3 && runes[len(runes)-3] == 'o' {
|
||||
if isException(runes, oesExceptions) {
|
||||
// Only remove the S
|
||||
return string(runes[0 : len(runes)-1])
|
||||
}
|
||||
// Remove the es
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
|
||||
if len(runes) > 4 {
|
||||
// shes/sses
|
||||
if runes[len(runes)-4] == 's' && (runes[len(runes)-3] == 'h' || runes[len(runes)-3] == 's') {
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
|
||||
// ches
|
||||
if len(runes) > 4 {
|
||||
if runes[len(runes)-4] == 'c' && runes[len(runes)-3] == 'h' {
|
||||
if isException(runes, chesExceptions) {
|
||||
// Only remove the S
|
||||
return string(runes[0 : len(runes)-1])
|
||||
}
|
||||
// Remove the es
|
||||
return string(runes[0 : len(runes)-2])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
return string(runes[0 : len(runes)-1])
|
||||
}
|
||||
}
|
||||
|
||||
func isException(word []rune, exceptions []string) bool {
|
||||
for _, exception := range exceptions {
|
||||
|
||||
exceptionRunes := []rune(exception)
|
||||
|
||||
exceptionPos := len(exceptionRunes) - 1
|
||||
wordPos := len(word) - 1
|
||||
|
||||
matched := true
|
||||
for exceptionPos >= 0 && wordPos >= 0 {
|
||||
if exceptionRunes[exceptionPos] != word[wordPos] {
|
||||
matched = false
|
||||
break
|
||||
}
|
||||
exceptionPos--
|
||||
wordPos--
|
||||
}
|
||||
if matched {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
70
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/possessive_filter_en.go
generated
vendored
Normal file
70
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/possessive_filter_en.go
generated
vendored
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
// PossessiveName is the name PossessiveFilter is registered as
|
||||
// in the bleve registry.
|
||||
const PossessiveName = "possessive_en"
|
||||
|
||||
const rightSingleQuotationMark = '’'
|
||||
const apostrophe = '\''
|
||||
const fullWidthApostrophe = '''
|
||||
|
||||
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
|
||||
|
||||
// PossessiveFilter implements a TokenFilter which
|
||||
// strips the English possessive suffix ('s) from tokens.
|
||||
// It handle a variety of apostrophe types, is case-insensitive
|
||||
// and doesn't distinguish between possessive and contraction.
|
||||
// (ie "She's So Rad" becomes "She So Rad")
|
||||
type PossessiveFilter struct {
|
||||
}
|
||||
|
||||
func NewPossessiveFilter() *PossessiveFilter {
|
||||
return &PossessiveFilter{}
|
||||
}
|
||||
|
||||
func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term)
|
||||
if lastRune == 's' || lastRune == 'S' {
|
||||
nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize])
|
||||
if nextLastRune == rightSingleQuotationMark ||
|
||||
nextLastRune == apostrophe ||
|
||||
nextLastRune == fullWidthApostrophe {
|
||||
token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize]
|
||||
}
|
||||
}
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewPossessiveFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
52
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stemmer_en_snowball.go
generated
vendored
Normal file
52
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stemmer_en_snowball.go
generated
vendored
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright (c) 2020 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
|
||||
"github.com/blevesearch/snowballstem"
|
||||
"github.com/blevesearch/snowballstem/english"
|
||||
)
|
||||
|
||||
const SnowballStemmerName = "stemmer_en_snowball"
|
||||
|
||||
type EnglishStemmerFilter struct {
|
||||
}
|
||||
|
||||
func NewEnglishStemmerFilter() *EnglishStemmerFilter {
|
||||
return &EnglishStemmerFilter{}
|
||||
}
|
||||
|
||||
func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
env := snowballstem.NewEnv(string(token.Term))
|
||||
english.Stem(env)
|
||||
token.Term = []byte(env.Current())
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewEnglishStemmerFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
36
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_filter_en.go
generated
vendored
Normal file
36
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_filter_en.go
generated
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/stop"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
tokenMap, err := cache.TokenMapNamed(StopName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return stop.NewStopTokensFilter(tokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
347
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_words_en.go
generated
vendored
Normal file
347
vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_words_en.go
generated
vendored
Normal file
|
|
@ -0,0 +1,347 @@
|
|||
package en
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const StopName = "stop_en"
|
||||
|
||||
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
|
||||
//
|
||||
// this content was obtained from:
|
||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||
// ` was changed to ' to allow for literal string
|
||||
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| An English stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| Many of the forms below are quite rare (e.g. "yourselves") but included for
|
||||
| completeness.
|
||||
|
||||
| PRONOUNS FORMS
|
||||
| 1st person sing
|
||||
|
||||
i | subject, always in upper case of course
|
||||
|
||||
me | object
|
||||
my | possessive adjective
|
||||
| the possessive pronoun 'mine' is best suppressed, because of the
|
||||
| sense of coal-mine etc.
|
||||
myself | reflexive
|
||||
| 1st person plural
|
||||
we | subject
|
||||
|
||||
| us | object
|
||||
| care is required here because US = United States. It is usually
|
||||
| safe to remove it if it is in lower case.
|
||||
our | possessive adjective
|
||||
ours | possessive pronoun
|
||||
ourselves | reflexive
|
||||
| second person (archaic 'thou' forms not included)
|
||||
you | subject and object
|
||||
your | possessive adjective
|
||||
yours | possessive pronoun
|
||||
yourself | reflexive (singular)
|
||||
yourselves | reflexive (plural)
|
||||
| third person singular
|
||||
he | subject
|
||||
him | object
|
||||
his | possessive adjective and pronoun
|
||||
himself | reflexive
|
||||
|
||||
she | subject
|
||||
her | object and possessive adjective
|
||||
hers | possessive pronoun
|
||||
herself | reflexive
|
||||
|
||||
it | subject and object
|
||||
its | possessive adjective
|
||||
itself | reflexive
|
||||
| third person plural
|
||||
they | subject
|
||||
them | object
|
||||
their | possessive adjective
|
||||
theirs | possessive pronoun
|
||||
themselves | reflexive
|
||||
| other forms (demonstratives, interrogatives)
|
||||
what
|
||||
which
|
||||
who
|
||||
whom
|
||||
this
|
||||
that
|
||||
these
|
||||
those
|
||||
|
||||
| VERB FORMS (using F.R. Palmer's nomenclature)
|
||||
| BE
|
||||
am | 1st person, present
|
||||
is | -s form (3rd person, present)
|
||||
are | present
|
||||
was | 1st person, past
|
||||
were | past
|
||||
be | infinitive
|
||||
been | past participle
|
||||
being | -ing form
|
||||
| HAVE
|
||||
have | simple
|
||||
has | -s form
|
||||
had | past
|
||||
having | -ing form
|
||||
| DO
|
||||
do | simple
|
||||
does | -s form
|
||||
did | past
|
||||
doing | -ing form
|
||||
|
||||
| The forms below are, I believe, best omitted, because of the significant
|
||||
| homonym forms:
|
||||
|
||||
| He made a WILL
|
||||
| old tin CAN
|
||||
| merry month of MAY
|
||||
| a smell of MUST
|
||||
| fight the good fight with all thy MIGHT
|
||||
|
||||
| would, could, should, ought might however be included
|
||||
|
||||
| | AUXILIARIES
|
||||
| | WILL
|
||||
|will
|
||||
|
||||
would
|
||||
|
||||
| | SHALL
|
||||
|shall
|
||||
|
||||
should
|
||||
|
||||
| | CAN
|
||||
|can
|
||||
|
||||
could
|
||||
|
||||
| | MAY
|
||||
|may
|
||||
|might
|
||||
| | MUST
|
||||
|must
|
||||
| | OUGHT
|
||||
|
||||
ought
|
||||
|
||||
| COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
|
||||
| pronoun + verb
|
||||
|
||||
i'm
|
||||
you're
|
||||
he's
|
||||
she's
|
||||
it's
|
||||
we're
|
||||
they're
|
||||
i've
|
||||
you've
|
||||
we've
|
||||
they've
|
||||
i'd
|
||||
you'd
|
||||
he'd
|
||||
she'd
|
||||
we'd
|
||||
they'd
|
||||
i'll
|
||||
you'll
|
||||
he'll
|
||||
she'll
|
||||
we'll
|
||||
they'll
|
||||
|
||||
| verb + negation
|
||||
|
||||
isn't
|
||||
aren't
|
||||
wasn't
|
||||
weren't
|
||||
hasn't
|
||||
haven't
|
||||
hadn't
|
||||
doesn't
|
||||
don't
|
||||
didn't
|
||||
|
||||
| auxiliary + negation
|
||||
|
||||
won't
|
||||
wouldn't
|
||||
shan't
|
||||
shouldn't
|
||||
can't
|
||||
cannot
|
||||
couldn't
|
||||
mustn't
|
||||
|
||||
| miscellaneous forms
|
||||
|
||||
let's
|
||||
that's
|
||||
who's
|
||||
what's
|
||||
here's
|
||||
there's
|
||||
when's
|
||||
where's
|
||||
why's
|
||||
how's
|
||||
|
||||
| rarer forms
|
||||
|
||||
| daren't needn't
|
||||
|
||||
| doubtful forms
|
||||
|
||||
| oughtn't mightn't
|
||||
|
||||
| ARTICLES
|
||||
a
|
||||
an
|
||||
the
|
||||
|
||||
| THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
|
||||
| high, that classification is pointless.)
|
||||
and
|
||||
but
|
||||
if
|
||||
or
|
||||
because
|
||||
as
|
||||
until
|
||||
while
|
||||
|
||||
of
|
||||
at
|
||||
by
|
||||
for
|
||||
with
|
||||
about
|
||||
against
|
||||
between
|
||||
into
|
||||
through
|
||||
during
|
||||
before
|
||||
after
|
||||
above
|
||||
below
|
||||
to
|
||||
from
|
||||
up
|
||||
down
|
||||
in
|
||||
out
|
||||
on
|
||||
off
|
||||
over
|
||||
under
|
||||
|
||||
again
|
||||
further
|
||||
then
|
||||
once
|
||||
|
||||
here
|
||||
there
|
||||
when
|
||||
where
|
||||
why
|
||||
how
|
||||
|
||||
all
|
||||
any
|
||||
both
|
||||
each
|
||||
few
|
||||
more
|
||||
most
|
||||
other
|
||||
some
|
||||
such
|
||||
|
||||
no
|
||||
nor
|
||||
not
|
||||
only
|
||||
own
|
||||
same
|
||||
so
|
||||
than
|
||||
too
|
||||
very
|
||||
|
||||
| Just for the record, the following words are among the commonest in English
|
||||
|
||||
| one
|
||||
| every
|
||||
| least
|
||||
| less
|
||||
| many
|
||||
| now
|
||||
| ever
|
||||
| never
|
||||
| say
|
||||
| says
|
||||
| said
|
||||
| also
|
||||
| get
|
||||
| go
|
||||
| goes
|
||||
| just
|
||||
| made
|
||||
| make
|
||||
| put
|
||||
| see
|
||||
| seen
|
||||
| whether
|
||||
| like
|
||||
| well
|
||||
| back
|
||||
| even
|
||||
| still
|
||||
| way
|
||||
| take
|
||||
| since
|
||||
| another
|
||||
| however
|
||||
| two
|
||||
| three
|
||||
| four
|
||||
| five
|
||||
| first
|
||||
| second
|
||||
| new
|
||||
| old
|
||||
| high
|
||||
| long
|
||||
`)
|
||||
|
||||
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
|
||||
rv := analysis.NewTokenMap()
|
||||
err := rv.LoadBytes(EnglishStopWords)
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
# full line comment
|
||||
marty
|
||||
steve # trailing comment
|
||||
| different format of comment
|
||||
dustin
|
||||
siri | different style trailing comment
|
||||
multiple words with different whitespace
|
||||
108
vendor/github.com/blevesearch/bleve/v2/analysis/token/lowercase/lowercase.go
generated
vendored
Normal file
108
vendor/github.com/blevesearch/bleve/v2/analysis/token/lowercase/lowercase.go
generated
vendored
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package lowercase implements a TokenFilter which converts
|
||||
// tokens to lower case according to unicode rules.
|
||||
package lowercase
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
// Name is the name used to register LowerCaseFilter in the bleve registry
|
||||
const Name = "to_lower"
|
||||
|
||||
type LowerCaseFilter struct {
|
||||
}
|
||||
|
||||
func NewLowerCaseFilter() *LowerCaseFilter {
|
||||
return &LowerCaseFilter{}
|
||||
}
|
||||
|
||||
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
token.Term = toLowerDeferredCopy(token.Term)
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewLowerCaseFilter(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// toLowerDeferredCopy will function exactly like
|
||||
// bytes.ToLower() only it will reuse (overwrite)
|
||||
// the original byte array when possible
|
||||
// NOTE: because its possible that the lower-case
|
||||
// form of a rune has a different utf-8 encoded
|
||||
// length, in these cases a new byte array is allocated
|
||||
func toLowerDeferredCopy(s []byte) []byte {
|
||||
j := 0
|
||||
for i := 0; i < len(s); {
|
||||
wid := 1
|
||||
r := rune(s[i])
|
||||
if r >= utf8.RuneSelf {
|
||||
r, wid = utf8.DecodeRune(s[i:])
|
||||
}
|
||||
|
||||
l := unicode.ToLower(r)
|
||||
|
||||
// If the rune is already lowercased, just move to the
|
||||
// next rune.
|
||||
if l == r {
|
||||
i += wid
|
||||
j += wid
|
||||
continue
|
||||
}
|
||||
|
||||
// Handles the Unicode edge-case where the last
|
||||
// rune in a word on the greek Σ needs to be converted
|
||||
// differently.
|
||||
if l == 'σ' && i+2 == len(s) {
|
||||
l = 'ς'
|
||||
}
|
||||
|
||||
lwid := utf8.RuneLen(l)
|
||||
if lwid > wid {
|
||||
// utf-8 encoded replacement is wider
|
||||
// for now, punt and defer
|
||||
// to bytes.ToLower() for the remainder
|
||||
// only known to happen with chars
|
||||
// Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
|
||||
// Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
|
||||
rest := bytes.ToLower(s[i:])
|
||||
rv := make([]byte, j+len(rest))
|
||||
copy(rv[:j], s[:j])
|
||||
copy(rv[j:], rest)
|
||||
return rv
|
||||
} else {
|
||||
utf8.EncodeRune(s[j:], l)
|
||||
}
|
||||
i += wid
|
||||
j += lwid
|
||||
}
|
||||
return s[:j]
|
||||
}
|
||||
56
vendor/github.com/blevesearch/bleve/v2/analysis/token/porter/porter.go
generated
vendored
Normal file
56
vendor/github.com/blevesearch/bleve/v2/analysis/token/porter/porter.go
generated
vendored
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package porter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
|
||||
"github.com/blevesearch/go-porterstemmer"
|
||||
)
|
||||
|
||||
const Name = "stemmer_porter"
|
||||
|
||||
type PorterStemmer struct {
|
||||
}
|
||||
|
||||
func NewPorterStemmer() *PorterStemmer {
|
||||
return &PorterStemmer{}
|
||||
}
|
||||
|
||||
func (s *PorterStemmer) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
// if it is not a protected keyword, stem it
|
||||
if !token.KeyWord {
|
||||
termRunes := bytes.Runes(token.Term)
|
||||
stemmedRunes := porterstemmer.StemWithoutLowerCasing(termRunes)
|
||||
token.Term = analysis.BuildTermFromRunes(stemmedRunes)
|
||||
}
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
return NewPorterStemmer(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenFilter(Name, PorterStemmerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
73
vendor/github.com/blevesearch/bleve/v2/analysis/token/stop/stop.go
generated
vendored
Normal file
73
vendor/github.com/blevesearch/bleve/v2/analysis/token/stop/stop.go
generated
vendored
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package stop implements a TokenFilter removing tokens found in
|
||||
// a TokenMap.
|
||||
//
|
||||
// It constructor takes the following arguments:
|
||||
//
|
||||
// "stop_token_map" (string): the name of the token map identifying tokens to
|
||||
// remove.
|
||||
package stop
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "stop_tokens"
|
||||
|
||||
type StopTokensFilter struct {
|
||||
stopTokens analysis.TokenMap
|
||||
}
|
||||
|
||||
func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter {
|
||||
return &StopTokensFilter{
|
||||
stopTokens: stopTokens,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
j := 0
|
||||
for _, token := range input {
|
||||
_, isStopToken := f.stopTokens[string(token.Term)]
|
||||
if !isStopToken {
|
||||
input[j] = token
|
||||
j++
|
||||
}
|
||||
}
|
||||
|
||||
return input[:j]
|
||||
}
|
||||
|
||||
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
stopTokenMapName, ok := config["stop_token_map"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify stop_token_map")
|
||||
}
|
||||
stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error building stop words filter: %v", err)
|
||||
}
|
||||
return NewStopTokensFilter(stopTokenMap), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
52
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/single/single.go
generated
vendored
Normal file
52
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/single/single.go
generated
vendored
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package single
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "single"
|
||||
|
||||
type SingleTokenTokenizer struct {
|
||||
}
|
||||
|
||||
func NewSingleTokenTokenizer() *SingleTokenTokenizer {
|
||||
return &SingleTokenTokenizer{}
|
||||
}
|
||||
|
||||
func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
return analysis.TokenStream{
|
||||
&analysis.Token{
|
||||
Term: input,
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: len(input),
|
||||
Type: analysis.AlphaNumeric,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
return NewSingleTokenTokenizer(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
134
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode/unicode.go
generated
vendored
Normal file
134
vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode/unicode.go
generated
vendored
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package unicode
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/segment"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/analysis"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
)
|
||||
|
||||
const Name = "unicode"
|
||||
|
||||
type UnicodeTokenizer struct {
|
||||
}
|
||||
|
||||
func NewUnicodeTokenizer() *UnicodeTokenizer {
|
||||
return &UnicodeTokenizer{}
|
||||
}
|
||||
|
||||
func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx.
|
||||
rv := make(analysis.TokenStream, 0, 1)
|
||||
|
||||
ta := []analysis.Token(nil)
|
||||
taNext := 0
|
||||
|
||||
segmenter := segment.NewWordSegmenterDirect(input)
|
||||
start := 0
|
||||
pos := 1
|
||||
|
||||
guessRemaining := func(end int) int {
|
||||
avgSegmentLen := end / (len(rv) + 1)
|
||||
if avgSegmentLen < 1 {
|
||||
avgSegmentLen = 1
|
||||
}
|
||||
|
||||
remainingLen := len(input) - end
|
||||
|
||||
return remainingLen / avgSegmentLen
|
||||
}
|
||||
|
||||
for segmenter.Segment() {
|
||||
segmentBytes := segmenter.Bytes()
|
||||
end := start + len(segmentBytes)
|
||||
if segmenter.Type() != segment.None {
|
||||
if taNext >= len(ta) {
|
||||
remainingSegments := guessRemaining(end)
|
||||
if remainingSegments > 1000 {
|
||||
remainingSegments = 1000
|
||||
}
|
||||
if remainingSegments < 1 {
|
||||
remainingSegments = 1
|
||||
}
|
||||
|
||||
ta = make([]analysis.Token, remainingSegments)
|
||||
taNext = 0
|
||||
}
|
||||
|
||||
token := &ta[taNext]
|
||||
taNext++
|
||||
|
||||
token.Term = segmentBytes
|
||||
token.Start = start
|
||||
token.End = end
|
||||
token.Position = pos
|
||||
token.Type = convertType(segmenter.Type())
|
||||
|
||||
if len(rv) >= cap(rv) { // When rv is full, save it into rvx.
|
||||
rvx = append(rvx, rv)
|
||||
|
||||
rvCap := cap(rv) * 2
|
||||
if rvCap > 256 {
|
||||
rvCap = 256
|
||||
}
|
||||
|
||||
rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger.
|
||||
}
|
||||
|
||||
rv = append(rv, token)
|
||||
pos++
|
||||
}
|
||||
start = end
|
||||
}
|
||||
|
||||
if len(rvx) > 0 {
|
||||
n := len(rv)
|
||||
for _, r := range rvx {
|
||||
n += len(r)
|
||||
}
|
||||
rall := make(analysis.TokenStream, 0, n)
|
||||
for _, r := range rvx {
|
||||
rall = append(rall, r...)
|
||||
}
|
||||
return append(rall, rv...)
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
return NewUnicodeTokenizer(), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
err := registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
func convertType(segmentWordType int) analysis.TokenType {
|
||||
switch segmentWordType {
|
||||
case segment.Ideo:
|
||||
return analysis.Ideographic
|
||||
case segment.Kana:
|
||||
return analysis.Ideographic
|
||||
case segment.Number:
|
||||
return analysis.Numeric
|
||||
}
|
||||
return analysis.AlphaNumeric
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type TokenMap map[string]bool
|
||||
|
||||
func NewTokenMap() TokenMap {
|
||||
return make(TokenMap, 0)
|
||||
}
|
||||
|
||||
// LoadFile reads in a list of tokens from a text file,
|
||||
// one per line.
|
||||
// Comments are supported using `#` or `|`
|
||||
func (t TokenMap) LoadFile(filename string) error {
|
||||
data, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return t.LoadBytes(data)
|
||||
}
|
||||
|
||||
// LoadBytes reads in a list of tokens from memory,
|
||||
// one per line.
|
||||
// Comments are supported using `#` or `|`
|
||||
func (t TokenMap) LoadBytes(data []byte) error {
|
||||
bytesReader := bytes.NewReader(data)
|
||||
bufioReader := bufio.NewReader(bytesReader)
|
||||
line, err := bufioReader.ReadString('\n')
|
||||
for err == nil {
|
||||
t.LoadLine(line)
|
||||
line, err = bufioReader.ReadString('\n')
|
||||
}
|
||||
// if the err was EOF we still need to process the last value
|
||||
if err == io.EOF {
|
||||
t.LoadLine(line)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (t TokenMap) LoadLine(line string) {
|
||||
// find the start of a comment, if any
|
||||
startComment := strings.IndexAny(line, "#|")
|
||||
if startComment >= 0 {
|
||||
line = line[:startComment]
|
||||
}
|
||||
|
||||
tokens := strings.Fields(line)
|
||||
for _, token := range tokens {
|
||||
t.AddToken(token)
|
||||
}
|
||||
}
|
||||
|
||||
func (t TokenMap) AddToken(token string) {
|
||||
t[token] = true
|
||||
}
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
type CharFilter interface {
|
||||
Filter([]byte) []byte
|
||||
}
|
||||
|
||||
type TokenType int
|
||||
|
||||
const (
|
||||
AlphaNumeric TokenType = iota
|
||||
Ideographic
|
||||
Numeric
|
||||
DateTime
|
||||
Shingle
|
||||
Single
|
||||
Double
|
||||
Boolean
|
||||
IP
|
||||
)
|
||||
|
||||
// Token represents one occurrence of a term at a particular location in a
|
||||
// field.
|
||||
type Token struct {
|
||||
// Start specifies the byte offset of the beginning of the term in the
|
||||
// field.
|
||||
Start int `json:"start"`
|
||||
|
||||
// End specifies the byte offset of the end of the term in the field.
|
||||
End int `json:"end"`
|
||||
Term []byte `json:"term"`
|
||||
|
||||
// Position specifies the 1-based index of the token in the sequence of
|
||||
// occurrences of its term in the field.
|
||||
Position int `json:"position"`
|
||||
Type TokenType `json:"type"`
|
||||
KeyWord bool `json:"keyword"`
|
||||
}
|
||||
|
||||
func (t *Token) String() string {
|
||||
return fmt.Sprintf("Start: %d End: %d Position: %d Token: %s Type: %d", t.Start, t.End, t.Position, string(t.Term), t.Type)
|
||||
}
|
||||
|
||||
type TokenStream []*Token
|
||||
|
||||
// A Tokenizer splits an input string into tokens, the usual behaviour being to
|
||||
// map words to tokens.
|
||||
type Tokenizer interface {
|
||||
Tokenize([]byte) TokenStream
|
||||
}
|
||||
|
||||
// A TokenFilter adds, transforms or removes tokens from a token stream.
|
||||
type TokenFilter interface {
|
||||
Filter(TokenStream) TokenStream
|
||||
}
|
||||
|
||||
type Analyzer interface {
|
||||
Analyze([]byte) TokenStream
|
||||
}
|
||||
|
||||
type DefaultAnalyzer struct {
|
||||
CharFilters []CharFilter
|
||||
Tokenizer Tokenizer
|
||||
TokenFilters []TokenFilter
|
||||
}
|
||||
|
||||
func (a *DefaultAnalyzer) Analyze(input []byte) TokenStream {
|
||||
if a.CharFilters != nil {
|
||||
for _, cf := range a.CharFilters {
|
||||
input = cf.Filter(input)
|
||||
}
|
||||
}
|
||||
tokens := a.Tokenizer.Tokenize(input)
|
||||
if a.TokenFilters != nil {
|
||||
for _, tf := range a.TokenFilters {
|
||||
tokens = tf.Filter(tokens)
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts")
|
||||
|
||||
var ErrInvalidTimestampString = fmt.Errorf("unable to parse timestamp string")
|
||||
var ErrInvalidTimestampRange = fmt.Errorf("timestamp out of range")
|
||||
|
||||
type DateTimeParser interface {
|
||||
ParseDateTime(string) (time.Time, string, error)
|
||||
}
|
||||
|
||||
const SynonymSourceType = "synonym"
|
||||
|
||||
type SynonymSourceVisitor func(name string, item SynonymSource) error
|
||||
|
||||
type SynonymSource interface {
|
||||
Analyzer() string
|
||||
Collection() string
|
||||
}
|
||||
|
||||
type ByteArrayConverter interface {
|
||||
Convert([]byte) (interface{}, error)
|
||||
}
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package analysis
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func DeleteRune(in []rune, pos int) []rune {
|
||||
if pos >= len(in) {
|
||||
return in
|
||||
}
|
||||
copy(in[pos:], in[pos+1:])
|
||||
return in[:len(in)-1]
|
||||
}
|
||||
|
||||
func InsertRune(in []rune, pos int, r rune) []rune {
|
||||
// create a new slice 1 rune larger
|
||||
rv := make([]rune, len(in)+1)
|
||||
// copy the characters before the insert pos
|
||||
copy(rv[0:pos], in[0:pos])
|
||||
// set the inserted rune
|
||||
rv[pos] = r
|
||||
// copy the characters after the insert pos
|
||||
copy(rv[pos+1:], in[pos:])
|
||||
return rv
|
||||
}
|
||||
|
||||
// BuildTermFromRunesOptimistic will build a term from the provided runes
|
||||
// AND optimistically attempt to encode into the provided buffer
|
||||
// if at any point it appears the buffer is too small, a new buffer is
|
||||
// allocated and that is used instead
|
||||
// this should be used in cases where frequently the new term is the same
|
||||
// length or shorter than the original term (in number of bytes)
|
||||
func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte {
|
||||
rv := buf
|
||||
used := 0
|
||||
for _, r := range runes {
|
||||
nextLen := utf8.RuneLen(r)
|
||||
if used+nextLen > len(rv) {
|
||||
// alloc new buf
|
||||
buf = make([]byte, len(runes)*utf8.UTFMax)
|
||||
// copy work we've already done
|
||||
copy(buf, rv[:used])
|
||||
rv = buf
|
||||
}
|
||||
written := utf8.EncodeRune(rv[used:], r)
|
||||
used += written
|
||||
}
|
||||
return rv[:used]
|
||||
}
|
||||
|
||||
func BuildTermFromRunes(runes []rune) []byte {
|
||||
return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes)
|
||||
}
|
||||
|
||||
func TruncateRunes(input []byte, num int) []byte {
|
||||
runes := bytes.Runes(input)
|
||||
runes = runes[:len(runes)-num]
|
||||
out := BuildTermFromRunes(runes)
|
||||
return out
|
||||
}
|
||||
|
||||
func RunesEndsWith(input []rune, suffix string) bool {
|
||||
inputLen := len(input)
|
||||
suffixRunes := []rune(suffix)
|
||||
suffixLen := len(suffixRunes)
|
||||
if suffixLen > inputLen {
|
||||
return false
|
||||
}
|
||||
|
||||
for i := suffixLen - 1; i >= 0; i-- {
|
||||
if input[inputLen-(suffixLen-i)] != suffixRunes[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/document"
|
||||
"github.com/blevesearch/bleve/v2/index/scorch"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
"github.com/blevesearch/bleve/v2/util"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
type builderImpl struct {
|
||||
b index.IndexBuilder
|
||||
m mapping.IndexMapping
|
||||
}
|
||||
|
||||
func (b *builderImpl) Index(id string, data interface{}) error {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
doc := document.NewDocument(id)
|
||||
err := b.m.MapDocument(doc, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = b.b.Index(doc)
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *builderImpl) Close() error {
|
||||
return b.b.Close()
|
||||
}
|
||||
|
||||
func newBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
|
||||
if path == "" {
|
||||
return nil, fmt.Errorf("builder requires path")
|
||||
}
|
||||
|
||||
err := mapping.Validate()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if config == nil {
|
||||
config = map[string]interface{}{}
|
||||
}
|
||||
|
||||
// the builder does not have an API to interact with internal storage
|
||||
// however we can pass k/v pairs through the config
|
||||
mappingBytes, err := util.MarshalJSON(mapping)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config["internal"] = map[string][]byte{
|
||||
string(mappingInternalKey): mappingBytes,
|
||||
}
|
||||
|
||||
// do not use real config, as these are options for the builder,
|
||||
// not the resulting index
|
||||
meta := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{})
|
||||
err = meta.Save(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
config["path"] = indexStorePath(path)
|
||||
|
||||
b, err := scorch.NewBuilder(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := &builderImpl{
|
||||
b: b,
|
||||
m: mapping,
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"expvar"
|
||||
"io"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/index/scorch"
|
||||
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
|
||||
"github.com/blevesearch/bleve/v2/registry"
|
||||
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/html"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var bleveExpVar = expvar.NewMap("bleve")
|
||||
|
||||
type configuration struct {
|
||||
Cache *registry.Cache
|
||||
DefaultHighlighter string
|
||||
DefaultKVStore string
|
||||
DefaultMemKVStore string
|
||||
DefaultIndexType string
|
||||
SlowSearchLogThreshold time.Duration
|
||||
analysisQueue *index.AnalysisQueue
|
||||
}
|
||||
|
||||
func (c *configuration) SetAnalysisQueueSize(n int) {
|
||||
if c.analysisQueue != nil {
|
||||
c.analysisQueue.Close()
|
||||
}
|
||||
c.analysisQueue = index.NewAnalysisQueue(n)
|
||||
}
|
||||
|
||||
func (c *configuration) Shutdown() {
|
||||
c.SetAnalysisQueueSize(0)
|
||||
}
|
||||
|
||||
func newConfiguration() *configuration {
|
||||
return &configuration{
|
||||
Cache: registry.NewCache(),
|
||||
analysisQueue: index.NewAnalysisQueue(4),
|
||||
}
|
||||
}
|
||||
|
||||
// Config contains library level configuration
|
||||
var Config *configuration
|
||||
|
||||
func init() {
|
||||
bootStart := time.Now()
|
||||
|
||||
// build the default configuration
|
||||
Config = newConfiguration()
|
||||
|
||||
// set the default highlighter
|
||||
Config.DefaultHighlighter = html.Name
|
||||
|
||||
// default kv store
|
||||
Config.DefaultKVStore = ""
|
||||
|
||||
// default mem only kv store
|
||||
Config.DefaultMemKVStore = gtreap.Name
|
||||
|
||||
// default index
|
||||
Config.DefaultIndexType = scorch.Name
|
||||
|
||||
bootDuration := time.Since(bootStart)
|
||||
bleveExpVar.Add("bootDuration", int64(bootDuration))
|
||||
indexStats = NewIndexStats()
|
||||
bleveExpVar.Set("indexes", indexStats)
|
||||
|
||||
initDisk()
|
||||
}
|
||||
|
||||
var logger = log.New(io.Discard, "bleve", log.LstdFlags)
|
||||
|
||||
// SetLog sets the logger used for logging
|
||||
// by default log messages are sent to io.Discard
|
||||
func SetLog(l *log.Logger) {
|
||||
logger = l
|
||||
}
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build appengine || appenginevm
|
||||
// +build appengine appenginevm
|
||||
|
||||
package bleve
|
||||
|
||||
// in the appengine environment we cannot support disk based indexes
|
||||
// so we do no extra configuration in this method
|
||||
func initDisk() {
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
//go:build !appengine && !appenginevm
|
||||
// +build !appengine,!appenginevm
|
||||
|
||||
package bleve
|
||||
|
||||
import "github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
|
||||
|
||||
// in normal environments we configure boltdb as the default storage
|
||||
func initDisk() {
|
||||
// default kv store
|
||||
Config.DefaultKVStore = boltdb.Name
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
Package bleve is a library for indexing and searching text.
|
||||
|
||||
Example Opening New Index, Indexing Data
|
||||
|
||||
message := struct{
|
||||
Id: "example"
|
||||
From: "xyz@couchbase.com",
|
||||
Body: "bleve indexing is easy",
|
||||
}
|
||||
|
||||
mapping := bleve.NewIndexMapping()
|
||||
index, _ := bleve.New("example.bleve", mapping)
|
||||
index.Index(message.Id, message)
|
||||
|
||||
Example Opening Existing Index, Searching Data
|
||||
|
||||
index, _ := bleve.Open("example.bleve")
|
||||
query := bleve.NewQueryStringQuery("bleve")
|
||||
searchRequest := bleve.NewSearchRequest(query)
|
||||
searchResult, _ := index.Search(searchRequest)
|
||||
*/
|
||||
package bleve
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/blevesearch/bleve/v2/size"
|
||||
index "github.com/blevesearch/bleve_index_api"
|
||||
)
|
||||
|
||||
var reflectStaticSizeDocument int
|
||||
|
||||
func init() {
|
||||
var d Document
|
||||
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
|
||||
}
|
||||
|
||||
type Document struct {
|
||||
id string `json:"id"`
|
||||
Fields []Field `json:"fields"`
|
||||
CompositeFields []*CompositeField
|
||||
StoredFieldsSize uint64
|
||||
indexed bool
|
||||
}
|
||||
|
||||
func (d *Document) StoredFieldsBytes() uint64 {
|
||||
return d.StoredFieldsSize
|
||||
}
|
||||
|
||||
func NewDocument(id string) *Document {
|
||||
return &Document{
|
||||
id: id,
|
||||
Fields: make([]Field, 0),
|
||||
CompositeFields: make([]*CompositeField, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func NewSynonymDocument(id string) *Document {
|
||||
return &Document{
|
||||
id: id,
|
||||
Fields: make([]Field, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) Size() int {
|
||||
sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
|
||||
len(d.id)
|
||||
|
||||
for _, entry := range d.Fields {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
for _, entry := range d.CompositeFields {
|
||||
sizeInBytes += entry.Size()
|
||||
}
|
||||
|
||||
return sizeInBytes
|
||||
}
|
||||
|
||||
func (d *Document) AddField(f Field) *Document {
|
||||
switch f := f.(type) {
|
||||
case *CompositeField:
|
||||
d.CompositeFields = append(d.CompositeFields, f)
|
||||
default:
|
||||
d.Fields = append(d.Fields, f)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func (d *Document) GoString() string {
|
||||
fields := ""
|
||||
for i, field := range d.Fields {
|
||||
if i != 0 {
|
||||
fields += ", "
|
||||
}
|
||||
fields += fmt.Sprintf("%#v", field)
|
||||
}
|
||||
compositeFields := ""
|
||||
for i, field := range d.CompositeFields {
|
||||
if i != 0 {
|
||||
compositeFields += ", "
|
||||
}
|
||||
compositeFields += fmt.Sprintf("%#v", field)
|
||||
}
|
||||
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID(), fields, compositeFields)
|
||||
}
|
||||
|
||||
func (d *Document) NumPlainTextBytes() uint64 {
|
||||
rv := uint64(0)
|
||||
for _, field := range d.Fields {
|
||||
rv += field.NumPlainTextBytes()
|
||||
}
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
for _, field := range d.Fields {
|
||||
if compositeField.includesField(field.Name()) {
|
||||
rv += field.NumPlainTextBytes()
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (d *Document) ID() string {
|
||||
return d.id
|
||||
}
|
||||
|
||||
func (d *Document) SetID(id string) {
|
||||
d.id = id
|
||||
}
|
||||
|
||||
func (d *Document) AddIDField() {
|
||||
d.AddField(NewTextFieldCustom("_id", nil, []byte(d.ID()), index.IndexField|index.StoreField, nil))
|
||||
}
|
||||
|
||||
func (d *Document) VisitFields(visitor index.FieldVisitor) {
|
||||
for _, f := range d.Fields {
|
||||
visitor(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) VisitComposite(visitor index.CompositeFieldVisitor) {
|
||||
for _, f := range d.CompositeFields {
|
||||
visitor(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) HasComposite() bool {
|
||||
return len(d.CompositeFields) > 0
|
||||
}
|
||||
|
||||
func (d *Document) VisitSynonymFields(visitor index.SynonymFieldVisitor) {
|
||||
for _, f := range d.Fields {
|
||||
if sf, ok := f.(index.SynonymField); ok {
|
||||
visitor(sf)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) SetIndexed() {
|
||||
d.indexed = true
|
||||
}
|
||||
|
||||
func (d *Document) Indexed() bool {
|
||||
return d.indexed
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue