index.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. // Copyright 2020 gorse Project Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package base
  15. import (
  16. "encoding/binary"
  17. "github.com/juju/errors"
  18. "github.com/zhenghaoz/gorse/base/encoding"
  19. "io"
  20. "reflect"
  21. "strconv"
  22. )
  23. // Index keeps the mapping between names (string) and indices (integer).
  24. type Index interface {
  25. Len() int32
  26. Add(name string)
  27. ToNumber(name string) int32
  28. ToName(index int32) string
  29. GetNames() []string
  30. Marshal(w io.Writer) error
  31. Unmarshal(r io.Reader) error
  32. Bytes() int
  33. }
  34. const (
  35. mapIndex uint8 = iota
  36. directIndex
  37. )
  38. // MarshalIndex marshal index into byte stream.
  39. func MarshalIndex(w io.Writer, index Index) error {
  40. // write index type
  41. var indexType uint8
  42. switch index.(type) {
  43. case *MapIndex:
  44. indexType = mapIndex
  45. case *DirectIndex:
  46. indexType = directIndex
  47. default:
  48. return errors.New("unknown index type")
  49. }
  50. err := binary.Write(w, binary.LittleEndian, indexType)
  51. if err != nil {
  52. return errors.Trace(err)
  53. }
  54. // write index
  55. return index.Marshal(w)
  56. }
  57. // UnmarshalIndex unmarshal index from byte stream.
  58. func UnmarshalIndex(r io.Reader) (Index, error) {
  59. // read type index
  60. var indexType uint8
  61. err := binary.Read(r, binary.LittleEndian, &indexType)
  62. if err != nil {
  63. return nil, errors.Trace(err)
  64. }
  65. var index Index
  66. switch indexType {
  67. case mapIndex:
  68. index = &MapIndex{}
  69. case directIndex:
  70. index = &DirectIndex{}
  71. default:
  72. return nil, errors.New("unknown index type")
  73. }
  74. // read index
  75. err = index.Unmarshal(r)
  76. if err != nil {
  77. return nil, errors.Trace(err)
  78. }
  79. return index, nil
  80. }
  81. // MapIndex manages the map between sparse Names and dense indices. A sparse ID is
  82. // a user ID or item ID. The dense index is the internal user index or item index
  83. // optimized for faster parameter access and less memory usage.
  84. type MapIndex struct {
  85. Numbers map[string]int32 // sparse ID -> dense index
  86. Names []string // dense index -> sparse ID
  87. Characters int
  88. }
  89. // NotId represents an ID doesn't exist.
  90. const NotId = int32(-1)
  91. // NewMapIndex creates a MapIndex.
  92. func NewMapIndex() *MapIndex {
  93. set := new(MapIndex)
  94. set.Numbers = make(map[string]int32)
  95. set.Names = make([]string, 0)
  96. return set
  97. }
  98. // Len returns the number of indexed Names.
  99. func (idx *MapIndex) Len() int32 {
  100. if idx == nil {
  101. return 0
  102. }
  103. return int32(len(idx.Names))
  104. }
  105. // Add adds a new ID to the indexer.
  106. func (idx *MapIndex) Add(name string) {
  107. if _, exist := idx.Numbers[name]; !exist {
  108. idx.Numbers[name] = int32(len(idx.Names))
  109. idx.Names = append(idx.Names, name)
  110. idx.Characters += len(name)
  111. }
  112. }
  113. // ToNumber converts a sparse ID to a dense index.
  114. func (idx *MapIndex) ToNumber(name string) int32 {
  115. if denseId, exist := idx.Numbers[name]; exist {
  116. return denseId
  117. }
  118. return NotId
  119. }
  120. // ToName converts a dense index to a sparse ID.
  121. func (idx *MapIndex) ToName(index int32) string {
  122. return idx.Names[index]
  123. }
  124. // GetNames returns all names in current index.
  125. func (idx *MapIndex) GetNames() []string {
  126. return idx.Names
  127. }
  128. // Marshal map index into byte stream.
  129. func (idx *MapIndex) Marshal(w io.Writer) error {
  130. // write length
  131. err := binary.Write(w, binary.LittleEndian, int32(len(idx.Names)))
  132. if err != nil {
  133. return errors.Trace(err)
  134. }
  135. // write names
  136. for _, s := range idx.Names {
  137. err = encoding.WriteString(w, s)
  138. if err != nil {
  139. return errors.Trace(err)
  140. }
  141. }
  142. return nil
  143. }
  144. // Unmarshal map index from byte stream.
  145. func (idx *MapIndex) Unmarshal(r io.Reader) error {
  146. // read length
  147. var n int32
  148. err := binary.Read(r, binary.LittleEndian, &n)
  149. if err != nil {
  150. return errors.Trace(err)
  151. }
  152. // write names
  153. idx.Names = make([]string, 0, n)
  154. idx.Numbers = make(map[string]int32, n)
  155. for i := 0; i < int(n); i++ {
  156. name, err := encoding.ReadString(r)
  157. if err != nil {
  158. return errors.Trace(err)
  159. }
  160. idx.Add(name)
  161. }
  162. return nil
  163. }
  164. func (idx *MapIndex) Bytes() int {
  165. // The memory usage of MapIndex consists of:
  166. // 1. struct
  167. // 2. string in idx.Names
  168. // 3. int32 (value) in idx.Numbers
  169. // 4. string (key) in idx.Numbers
  170. // 5. rune in string
  171. // The cost of map is omitted.
  172. bytes := reflect.TypeOf(idx).Elem().Size()
  173. if idx.Len() > 0 {
  174. bytes += reflect.TypeOf(idx.Names).Elem().Size() * uintptr(cap(idx.Names))
  175. bytes += reflect.TypeOf(idx.Numbers).Elem().Size() * uintptr(len(idx.Numbers))
  176. bytes += reflect.TypeOf(idx.Numbers).Key().Size() * uintptr(len(idx.Numbers))
  177. bytes += reflect.TypeOf(rune(0)).Size() * uintptr(idx.Characters)
  178. }
  179. return int(bytes)
  180. }
  181. // DirectIndex means that the name and its index is the same. For example,
  182. // the index of "1" is 1, vice versa.
  183. type DirectIndex struct {
  184. Limit int32
  185. }
  186. // NewDirectIndex create a direct mapping index.
  187. func NewDirectIndex() *DirectIndex {
  188. return &DirectIndex{Limit: 0}
  189. }
  190. // Len returns the number of names in current index.
  191. func (idx *DirectIndex) Len() int32 {
  192. return idx.Limit
  193. }
  194. // Add a name to current index.
  195. func (idx *DirectIndex) Add(s string) {
  196. i, err := strconv.Atoi(s)
  197. if err != nil {
  198. panic(err)
  199. }
  200. if int32(i) >= idx.Limit {
  201. idx.Limit = int32(i + 1)
  202. }
  203. }
  204. // ToNumber converts a name to corresponding index.
  205. func (idx *DirectIndex) ToNumber(name string) int32 {
  206. i, err := strconv.Atoi(name)
  207. if err != nil {
  208. panic(err)
  209. }
  210. if int32(i) >= idx.Limit {
  211. return NotId
  212. }
  213. return int32(i)
  214. }
  215. // ToName converts a index to corresponding name.
  216. func (idx *DirectIndex) ToName(index int32) string {
  217. if index >= idx.Limit {
  218. panic("index out of range")
  219. }
  220. return strconv.Itoa(int(index))
  221. }
  222. // GetNames returns all names in current index.
  223. func (idx *DirectIndex) GetNames() []string {
  224. names := make([]string, idx.Limit)
  225. for i := range names {
  226. names[i] = strconv.Itoa(i)
  227. }
  228. return names
  229. }
  230. // Marshal direct index into byte stream.
  231. func (idx *DirectIndex) Marshal(w io.Writer) error {
  232. return binary.Write(w, binary.LittleEndian, idx.Limit)
  233. }
  234. // Unmarshal direct index from byte stream.
  235. func (idx *DirectIndex) Unmarshal(r io.Reader) error {
  236. return binary.Read(r, binary.LittleEndian, &idx.Limit)
  237. }
  238. func (idx *DirectIndex) Bytes() int {
  239. return int(reflect.TypeOf(idx).Elem().Size())
  240. }