masterclient.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. package wdclient
  2. import (
  3. "context"
  4. "fmt"
  5. "math/rand"
  6. "sync"
  7. "time"
  8. "github.com/seaweedfs/seaweedfs/weed/stats"
  9. "github.com/seaweedfs/seaweedfs/weed/util"
  10. "google.golang.org/grpc"
  11. "github.com/seaweedfs/seaweedfs/weed/glog"
  12. "github.com/seaweedfs/seaweedfs/weed/pb"
  13. "github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
  14. )
  15. type MasterClient struct {
  16. FilerGroup string
  17. clientType string
  18. clientHost pb.ServerAddress
  19. rack string
  20. currentMaster pb.ServerAddress
  21. currentMasterLock sync.RWMutex
  22. masters pb.ServerDiscovery
  23. grpcDialOption grpc.DialOption
  24. *vidMap
  25. vidMapCacheSize int
  26. OnPeerUpdate func(update *master_pb.ClusterNodeUpdate, startFrom time.Time)
  27. OnPeerUpdateLock sync.RWMutex
  28. }
  29. func NewMasterClient(grpcDialOption grpc.DialOption, filerGroup string, clientType string, clientHost pb.ServerAddress, clientDataCenter string, rack string, masters pb.ServerDiscovery) *MasterClient {
  30. return &MasterClient{
  31. FilerGroup: filerGroup,
  32. clientType: clientType,
  33. clientHost: clientHost,
  34. rack: rack,
  35. masters: masters,
  36. grpcDialOption: grpcDialOption,
  37. vidMap: newVidMap(clientDataCenter),
  38. vidMapCacheSize: 5,
  39. }
  40. }
  41. func (mc *MasterClient) SetOnPeerUpdateFn(onPeerUpdate func(update *master_pb.ClusterNodeUpdate, startFrom time.Time)) {
  42. mc.OnPeerUpdateLock.Lock()
  43. mc.OnPeerUpdate = onPeerUpdate
  44. mc.OnPeerUpdateLock.Unlock()
  45. }
  46. func (mc *MasterClient) GetLookupFileIdFunction() LookupFileIdFunctionType {
  47. return mc.LookupFileIdWithFallback
  48. }
  49. func (mc *MasterClient) LookupFileIdWithFallback(fileId string) (fullUrls []string, err error) {
  50. fullUrls, err = mc.vidMap.LookupFileId(fileId)
  51. if err == nil && len(fullUrls) > 0 {
  52. return
  53. }
  54. err = pb.WithMasterClient(false, mc.GetMaster(context.Background()), mc.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  55. resp, err := client.LookupVolume(context.Background(), &master_pb.LookupVolumeRequest{
  56. VolumeOrFileIds: []string{fileId},
  57. })
  58. if err != nil {
  59. return fmt.Errorf("LookupVolume %s failed: %v", fileId, err)
  60. }
  61. for vid, vidLocation := range resp.VolumeIdLocations {
  62. for _, vidLoc := range vidLocation.Locations {
  63. loc := Location{
  64. Url: vidLoc.Url,
  65. PublicUrl: vidLoc.PublicUrl,
  66. GrpcPort: int(vidLoc.GrpcPort),
  67. DataCenter: vidLoc.DataCenter,
  68. }
  69. mc.vidMap.addLocation(uint32(vid), loc)
  70. httpUrl := "http://" + loc.Url + "/" + fileId
  71. // Prefer same data center
  72. if mc.DataCenter != "" && mc.DataCenter == loc.DataCenter {
  73. fullUrls = append([]string{httpUrl}, fullUrls...)
  74. } else {
  75. fullUrls = append(fullUrls, httpUrl)
  76. }
  77. }
  78. }
  79. return nil
  80. })
  81. return
  82. }
  83. func (mc *MasterClient) getCurrentMaster() pb.ServerAddress {
  84. mc.currentMasterLock.RLock()
  85. defer mc.currentMasterLock.RUnlock()
  86. return mc.currentMaster
  87. }
  88. func (mc *MasterClient) setCurrentMaster(master pb.ServerAddress) {
  89. mc.currentMasterLock.Lock()
  90. mc.currentMaster = master
  91. mc.currentMasterLock.Unlock()
  92. }
  93. func (mc *MasterClient) GetMaster(ctx context.Context) pb.ServerAddress {
  94. mc.WaitUntilConnected(ctx)
  95. return mc.getCurrentMaster()
  96. }
  97. func (mc *MasterClient) GetMasters(ctx context.Context) []pb.ServerAddress {
  98. mc.WaitUntilConnected(ctx)
  99. return mc.masters.GetInstances()
  100. }
  101. func (mc *MasterClient) WaitUntilConnected(ctx context.Context) {
  102. for {
  103. select {
  104. case <-ctx.Done():
  105. glog.V(0).Infof("Connection wait stopped: %v", ctx.Err())
  106. return
  107. default:
  108. if mc.getCurrentMaster() != "" {
  109. return
  110. }
  111. time.Sleep(time.Duration(rand.Int31n(200)) * time.Millisecond)
  112. print(".")
  113. }
  114. }
  115. }
  116. func (mc *MasterClient) KeepConnectedToMaster(ctx context.Context) {
  117. glog.V(1).Infof("%s.%s masterClient bootstraps with masters %v", mc.FilerGroup, mc.clientType, mc.masters)
  118. for {
  119. select {
  120. case <-ctx.Done():
  121. glog.V(0).Infof("Connection to masters stopped: %v", ctx.Err())
  122. return
  123. default:
  124. mc.tryAllMasters(ctx)
  125. time.Sleep(time.Second)
  126. }
  127. }
  128. }
  129. func (mc *MasterClient) FindLeaderFromOtherPeers(myMasterAddress pb.ServerAddress) (leader string) {
  130. for _, master := range mc.masters.GetInstances() {
  131. if master == myMasterAddress {
  132. continue
  133. }
  134. if grpcErr := pb.WithMasterClient(false, master, mc.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  135. ctx, cancel := context.WithTimeout(context.Background(), 120*time.Millisecond)
  136. defer cancel()
  137. resp, err := client.GetMasterConfiguration(ctx, &master_pb.GetMasterConfigurationRequest{})
  138. if err != nil {
  139. return err
  140. }
  141. leader = resp.Leader
  142. return nil
  143. }); grpcErr != nil {
  144. glog.V(0).Infof("connect to %s: %v", master, grpcErr)
  145. }
  146. if leader != "" {
  147. glog.V(0).Infof("existing leader is %s", leader)
  148. return
  149. }
  150. }
  151. glog.V(0).Infof("No existing leader found!")
  152. return
  153. }
  154. func (mc *MasterClient) tryAllMasters(ctx context.Context) {
  155. var nextHintedLeader pb.ServerAddress
  156. mc.masters.RefreshBySrvIfAvailable()
  157. for _, master := range mc.masters.GetInstances() {
  158. nextHintedLeader = mc.tryConnectToMaster(ctx, master)
  159. for nextHintedLeader != "" {
  160. select {
  161. case <-ctx.Done():
  162. glog.V(0).Infof("Connection attempt to all masters stopped: %v", ctx.Err())
  163. return
  164. default:
  165. nextHintedLeader = mc.tryConnectToMaster(ctx, nextHintedLeader)
  166. }
  167. }
  168. mc.setCurrentMaster("")
  169. }
  170. }
  171. func (mc *MasterClient) tryConnectToMaster(ctx context.Context, master pb.ServerAddress) (nextHintedLeader pb.ServerAddress) {
  172. glog.V(1).Infof("%s.%s masterClient Connecting to master %v", mc.FilerGroup, mc.clientType, master)
  173. stats.MasterClientConnectCounter.WithLabelValues("total").Inc()
  174. gprcErr := pb.WithMasterClient(true, master, mc.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  175. ctx, cancel := context.WithCancel(ctx)
  176. defer cancel()
  177. stream, err := client.KeepConnected(ctx)
  178. if err != nil {
  179. glog.V(1).Infof("%s.%s masterClient failed to keep connected to %s: %v", mc.FilerGroup, mc.clientType, master, err)
  180. stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToKeepConnected).Inc()
  181. return err
  182. }
  183. if err = stream.Send(&master_pb.KeepConnectedRequest{
  184. FilerGroup: mc.FilerGroup,
  185. DataCenter: mc.DataCenter,
  186. Rack: mc.rack,
  187. ClientType: mc.clientType,
  188. ClientAddress: string(mc.clientHost),
  189. Version: util.Version(),
  190. }); err != nil {
  191. glog.V(0).Infof("%s.%s masterClient failed to send to %s: %v", mc.FilerGroup, mc.clientType, master, err)
  192. stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToSend).Inc()
  193. return err
  194. }
  195. glog.V(1).Infof("%s.%s masterClient Connected to %v", mc.FilerGroup, mc.clientType, master)
  196. resp, err := stream.Recv()
  197. if err != nil {
  198. glog.V(0).Infof("%s.%s masterClient failed to receive from %s: %v", mc.FilerGroup, mc.clientType, master, err)
  199. stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToReceive).Inc()
  200. return err
  201. }
  202. // check if it is the leader to determine whether to reset the vidMap
  203. if resp.VolumeLocation != nil {
  204. if resp.VolumeLocation.Leader != "" && string(master) != resp.VolumeLocation.Leader {
  205. glog.V(0).Infof("master %v redirected to leader %v", master, resp.VolumeLocation.Leader)
  206. nextHintedLeader = pb.ServerAddress(resp.VolumeLocation.Leader)
  207. stats.MasterClientConnectCounter.WithLabelValues(stats.RedirectedToLeader).Inc()
  208. return nil
  209. }
  210. mc.resetVidMap()
  211. mc.updateVidMap(resp)
  212. } else {
  213. mc.resetVidMap()
  214. }
  215. mc.setCurrentMaster(master)
  216. for {
  217. resp, err := stream.Recv()
  218. if err != nil {
  219. glog.V(0).Infof("%s.%s masterClient failed to receive from %s: %v", mc.FilerGroup, mc.clientType, master, err)
  220. stats.MasterClientConnectCounter.WithLabelValues(stats.FailedToReceive).Inc()
  221. return err
  222. }
  223. if resp.VolumeLocation != nil {
  224. // maybe the leader is changed
  225. if resp.VolumeLocation.Leader != "" && string(mc.GetMaster(ctx)) != resp.VolumeLocation.Leader {
  226. glog.V(0).Infof("currentMaster %v redirected to leader %v", mc.GetMaster(ctx), resp.VolumeLocation.Leader)
  227. nextHintedLeader = pb.ServerAddress(resp.VolumeLocation.Leader)
  228. stats.MasterClientConnectCounter.WithLabelValues(stats.RedirectedToLeader).Inc()
  229. return nil
  230. }
  231. mc.updateVidMap(resp)
  232. }
  233. if resp.ClusterNodeUpdate != nil {
  234. update := resp.ClusterNodeUpdate
  235. mc.OnPeerUpdateLock.RLock()
  236. if mc.OnPeerUpdate != nil {
  237. if update.FilerGroup == mc.FilerGroup {
  238. if update.IsAdd {
  239. glog.V(0).Infof("+ %s@%s noticed %s.%s %s\n", mc.clientType, mc.clientHost, update.FilerGroup, update.NodeType, update.Address)
  240. } else {
  241. glog.V(0).Infof("- %s@%s noticed %s.%s %s\n", mc.clientType, mc.clientHost, update.FilerGroup, update.NodeType, update.Address)
  242. }
  243. stats.MasterClientConnectCounter.WithLabelValues(stats.OnPeerUpdate).Inc()
  244. mc.OnPeerUpdate(update, time.Now())
  245. }
  246. }
  247. mc.OnPeerUpdateLock.RUnlock()
  248. }
  249. if err := ctx.Err(); err != nil {
  250. glog.V(0).Infof("Connection attempt to master stopped: %v", err)
  251. return err
  252. }
  253. }
  254. })
  255. if gprcErr != nil {
  256. stats.MasterClientConnectCounter.WithLabelValues(stats.Failed).Inc()
  257. glog.V(1).Infof("%s.%s masterClient failed to connect with master %v: %v", mc.FilerGroup, mc.clientType, master, gprcErr)
  258. }
  259. return
  260. }
  261. func (mc *MasterClient) updateVidMap(resp *master_pb.KeepConnectedResponse) {
  262. if resp.VolumeLocation.IsEmptyUrl() {
  263. glog.V(0).Infof("updateVidMap ignore short heartbeat: %+v", resp)
  264. return
  265. }
  266. // process new volume location
  267. loc := Location{
  268. Url: resp.VolumeLocation.Url,
  269. PublicUrl: resp.VolumeLocation.PublicUrl,
  270. DataCenter: resp.VolumeLocation.DataCenter,
  271. GrpcPort: int(resp.VolumeLocation.GrpcPort),
  272. }
  273. for _, newVid := range resp.VolumeLocation.NewVids {
  274. glog.V(2).Infof("%s.%s: %s masterClient adds volume %d", mc.FilerGroup, mc.clientType, loc.Url, newVid)
  275. mc.addLocation(newVid, loc)
  276. }
  277. for _, deletedVid := range resp.VolumeLocation.DeletedVids {
  278. glog.V(2).Infof("%s.%s: %s masterClient removes volume %d", mc.FilerGroup, mc.clientType, loc.Url, deletedVid)
  279. mc.deleteLocation(deletedVid, loc)
  280. }
  281. for _, newEcVid := range resp.VolumeLocation.NewEcVids {
  282. glog.V(2).Infof("%s.%s: %s masterClient adds ec volume %d", mc.FilerGroup, mc.clientType, loc.Url, newEcVid)
  283. mc.addEcLocation(newEcVid, loc)
  284. }
  285. for _, deletedEcVid := range resp.VolumeLocation.DeletedEcVids {
  286. glog.V(2).Infof("%s.%s: %s masterClient removes ec volume %d", mc.FilerGroup, mc.clientType, loc.Url, deletedEcVid)
  287. mc.deleteEcLocation(deletedEcVid, loc)
  288. }
  289. glog.V(1).Infof("updateVidMap(%s) %s.%s: %s volume add: %d, del: %d, add ec: %d del ec: %d",
  290. resp.VolumeLocation.DataCenter, mc.FilerGroup, mc.clientType, loc.Url,
  291. len(resp.VolumeLocation.NewVids), len(resp.VolumeLocation.DeletedVids),
  292. len(resp.VolumeLocation.NewEcVids), len(resp.VolumeLocation.DeletedEcVids))
  293. }
  294. func (mc *MasterClient) WithClient(streamingMode bool, fn func(client master_pb.SeaweedClient) error) error {
  295. getMasterF := func() pb.ServerAddress { return mc.GetMaster(context.Background()) }
  296. return mc.WithClientCustomGetMaster(getMasterF, streamingMode, fn)
  297. }
  298. func (mc *MasterClient) WithClientCustomGetMaster(getMasterF func() pb.ServerAddress, streamingMode bool, fn func(client master_pb.SeaweedClient) error) error {
  299. return util.Retry("master grpc", func() error {
  300. return pb.WithMasterClient(streamingMode, getMasterF(), mc.grpcDialOption, false, func(client master_pb.SeaweedClient) error {
  301. return fn(client)
  302. })
  303. })
  304. }
  305. func (mc *MasterClient) resetVidMap() {
  306. tail := &vidMap{
  307. vid2Locations: mc.vid2Locations,
  308. ecVid2Locations: mc.ecVid2Locations,
  309. DataCenter: mc.DataCenter,
  310. cache: mc.cache,
  311. }
  312. nvm := newVidMap(mc.DataCenter)
  313. nvm.cache = tail
  314. mc.vidMap = nvm
  315. //trim
  316. for i := 0; i < mc.vidMapCacheSize && tail.cache != nil; i++ {
  317. if i == mc.vidMapCacheSize-1 {
  318. tail.cache = nil
  319. } else {
  320. tail = tail.cache
  321. }
  322. }
  323. }