6 years ago · 60d3681371
--- a/note/memory_usage.txt
+++ b/note/memory_usage.txt
--- a/note/replication.txt
+++ b/note/replication.txt
@@ -1,109 +0,0 @@
 
				-1. each file can choose the replication factor
			
 
				-2. replication granularity is in volume level
			
 
				-3. if not enough spaces, we can automatically decrease some volume's the replication factor, especially for cold data
			
 
				-4. plan to support migrating data to cheaper storage
			
 
				-5. plan to manual volume placement, access-based volume placement, auction based volume placement
			
 
				-
			
 
				-When a new volume server is started, it reports 
			
 
				-  1. how many volumes it can hold
			
 
				-  2. current list of existing volumes and each volume's replication type
			
 
				-Each volume server remembers:
			
 
				-  1. current volume ids
			
 
				-  2. replica locations are read from the master
			
 
				-
			
 
				-The master assign volume ids based on
			
 
				-  1. replication factor
			
 
				-     data center, rack
			
 
				-  2. concurrent write support
			
 
				-On master, stores the replication configuration
			
 
				-{
			
 
				-  replication:{
			
 
				-    {type:"00", min_volume_count:3, weight:10},
			
 
				-    {type:"01", min_volume_count:2, weight:20},
			
 
				-    {type:"10", min_volume_count:2, weight:20},
			
 
				-    {type:"11", min_volume_count:3, weight:30},
			
 
				-    {type:"20", min_volume_count:2, weight:20}
			
 
				-  },
			
 
				-  port:9333,
			
 
				-}
			
 
				-Or manually via command line
			
 
				-  1. add volume with specified replication factor
			
 
				-  2. add volume with specified volume id
			
 
				-
			
 
				-
			
 
				-If duplicated volume ids are reported from different volume servers, 
			
 
				-the master determines the replication factor of the volume,
			
 
				-if less than the replication factor, the volume is in readonly mode
			
 
				-if more than the replication factor, the volume will purge the smallest/oldest volume
			
 
				-if equal, the volume will function as usual
			
 
				-
			
 
				-
			
 
				-Use cases:
			
 
				-  on volume server
			
 
				-    1. weed volume -mserver="xx.xx.xx.xx:9333" -publicUrl="good.com:8080" -dir="/tmp" -volumes=50
			
 
				-  on weed master
			
 
				-    1. weed master -port=9333
			
 
				-      generate a default json configuration file if doesn't exist
			
 
				-      
			
 
				-Bootstrap
			
 
				-  1. at the very beginning, the system has no volumes at all.
			
 
				-When data node starts:
			
 
				-  1. each data node send to master its existing volumes and max volume blocks
			
 
				-  2. master remembers the topology/data_center/rack/data_node/volumes
			
 
				-     for each replication level, stores
			
 
				-       volume id ~ data node
			
 
				-       writable volume ids
			
 
				-If any "assign" request comes in
			
 
				-  1. find a writable volume with the right replicationLevel
			
 
				-  2. if not found, grow the volumes with the right replication level
			
 
				-  3. return a writable volume to the user
			
 
				-
			
 
				-  
			
 
				-  for data node:
			
 
				-    0. detect existing volumes DONE
			
 
				-    1. onStartUp, and periodically, send existing volumes and maxVolumeCount  store.Join(), DONE
			
 
				-    2. accept command to grow a volume( id + replication level)  DONE
			
 
				-       /admin/assign_volume?volume=some_id&replicationType=01
			
 
				-    3. accept setting volumeLocationList  DONE
			
 
				-       /admin/set_volume_locations_list?volumeLocationsList=[{Vid:xxx,Locations:[loc1,loc2,loc3]}]
			
 
				-    4. for each write, pass the write to the next location, (Step 2)
			
 
				-       POST method should accept an index, like ttl, get decremented every hop
			
 
				-  for master:
			
 
				-    1. accept data node's report of existing volumes and maxVolumeCount ALREADY EXISTS /dir/join
			
 
				-    2. periodically refresh for active data nodes, and adjust writable volumes
			
 
				-    3. send command to grow a volume(id + replication level)  DONE
			
 
				-    5. accept lookup for volume locations    ALREADY EXISTS /dir/lookup
			
 
				-    6. read topology/datacenter/rack layout
			
 
				-
			
 
				-An algorithm to allocate volumes evenly, but may be inefficient if free volumes are plenty:
			
 
				-input: replication=xyz
			
 
				-algorithm:
			
 
				-ret_dcs = []
			
 
				-foreach dc that has y+z+1 volumes{
			
 
				-  ret_racks = []
			
 
				-  foreach rack with z+1 volumes{
			
 
				-    ret = select z+1 servers with 1 volume
			
 
				-  if ret.size()==z+1 {
			
 
				-    ret_racks.append(ret)
			
 
				-  }
			
 
				-  }
			
 
				-  randomly pick one rack from ret_racks
			
 
				-  ret += select y racks with 1 volume each
			
 
				-  if ret.size()==y+z+1{
			
 
				-    ret_dcs.append(ret)
			
 
				-  }
			
 
				-}
			
 
				-randomly pick one dc from ret_dcs
			
 
				-ret += select x data centers with 1 volume each
			
 
				-
			
 
				-A simple replica placement algorithm, but may fail when free volume slots are not plenty:
			
 
				-ret := []volumes
			
 
				-dc = randomly pick 1 data center with y+z+1 volumes
			
 
				-  rack = randomly pick 1 rack with z+1 volumes
			
 
				-    ret = ret.append(randomly pick z+1 volumes)
			
 
				-  ret = ret.append(randomly pick y racks with 1 volume)
			
 
				-ret = ret.append(randomly pick x data centers with 1 volume)
			
 
				-
			
 
				-
			
 
				-TODO:
			
 
				-  1. replicate content to the other server if the replication type needs replicas
			
--- a/note/security.txt
+++ b/note/security.txt
@@ -1,51 +0,0 @@
 
				-Design for SeaweedFS security
			
 
				-
			
 
				-Design Objectives
			
 
				-	Security can mean many different things. The original vision is that: if you have one machine lying around
			
 
				-	somewhere with some disk space, it should be able to join your file system to contribute some disk space and
			
 
				-	network bandwidth, securely!
			
 
				-
			
 
				-	To achieve this purpose, the security should be able to:
			
 
				-	1. Secure the inter-server communication. Only real cluster servers can join and communicate.
			
 
				-	2. allow clients to securely write to volume servers
			
 
				-
			
 
				-Non Objective
			
 
				-	Multi-tenant support. Avoid filers or clients cross-updating files.
			
 
				-	User specific access control.
			
 
				-
			
 
				-Design Architect
			
 
				-	master, and volume servers all talk securely via 2-way SSL for admin operations.
			
 
				-	upon joining, master gives its secret key to volume servers.
			
 
				-	filer or clients talk to master to get secret key, and use the key to generate JWT to write on volume server.
			
 
				-	A side benefit:
			
 
				-		a time limited read feature?
			
 
				-	4. volume server needs to expose https ports
			
 
				-
			
 
				-HTTP Connections
			
 
				-	clear http
			
 
				-		filer~>master, need to get a JWT from master
			
 
				-		filer~>volume
			
 
				-	2-way https
			
 
				-		master~ssl~>volume
			
 
				-		volume~ssl~>master
			
 
				-
			
 
				-file uploading:
			
 
				-	when volume server starts, it asks master for the secret key to decode JWT
			
 
				-	when filer/clients wants to upload, master generate a JWT
			
 
				-		filer~>volume(public port)
			
 
				-		master~>volume(public port)
			
 
				-
			
 
				-Currently, volume server has 2 ip addresses: ip and publicUrl.
			
 
				-	The ip is for admin purpose, and master talk to volume server this way.
			
 
				-	The publicUrl is for clients to access the server, via http GET/POST/DELETE etc.
			
 
				-		The write operations are secured by JWT.
			
 
				-	clients talk to master also via https? possible. Decide on this later.
			
 
				-
			
 
				-Dev plan:
			
 
				-	1. volume server separate admin from public GET/POST/DELETE handlers
			
 
				-		The step 1 may be good enough for most use cases.
			
 
				-
			
 
				-	If 2-way ssl are still needed
			
 
				-	2. volume server add ssl support
			
 
				-	3. https connections to operate on volume servers
			
 
				-
			
--- a/note/weedfs.txt
+++ b/note/weedfs.txt
@@ -1,46 +0,0 @@
 
				-How to submit a content
			
 
				-1. Find physical volumes
			
 
				-1.c Create a hash value
			
 
				-1.d find a write logic volume id, and return [logic volume id, {physical volume ids}]
			
 
				-2. submit to physical volumes
			
 
				-2.c
			
 
				-  generate the cookie
			
 
				-  generate a unique id as key
			
 
				-  choose the right altKey
			
 
				-  send bytes to physical volumes
			
 
				-2.s each
			
 
				-  save bytes
			
 
				-  store map[key uint64, altKey uint32]<offset, size>
			
 
				-    for updated entry, set old entry's offset to zero
			
 
				-3.c
			
 
				-  wait for all physical volumes to finish
			
 
				-  store the /<logic volume id>/<key>_<cookie>_<altKey>.<ext>
			
 
				-
			
 
				-How to retrieve a content
			
 
				-1.c
			
 
				-  send logic volume id
			
 
				-1.d
			
 
				-  find least busy volume's id
			
 
				-2.c
			
 
				-  send URI /<physical volume id>/<key>_<cookie>_<altKey>.<ext>
			
 
				-
			
 
				-
			
 
				-How to submit a content
			
 
				-1. send bytes to SeaweedFS, got <volume id, key uint64, cookie code>
			
 
				-   store <key uint64, volume id uint32, cookie code uint32, ext>, and other information
			
 
				-
			
 
				-To read a content
			
 
				-2. use logic volume id to lookup a <machine id>
			
 
				-   render url as /<machine id>/<volume id>/<key>/<cookie>.ext
			
 
				-
			
 
				-The directory server
			
 
				-0.init
			
 
				-  load and collect <logic volume id, machine ids> mapping
			
 
				-1.on submit content
			
 
				-  find a free logic volume id, start sending content to 3 machines
			
 
				-  if all of them finishes, return <logic volume id, key, cookie code>
			
 
				-2.on read content
			
 
				-  based on logic volume id, pick a machine with less load,
			
 
				-    return <machine id>
			
 
				-
			
 
				-
			
--- a/weed/command/server.go
+++ b/weed/command/server.go
@@ -89,7 +89,7 @@ func init() {
 
				 	serverOptions.v.port = cmdServer.Flag.Int("volume.port", 8080, "volume server http listen port")
			
 
				 	serverOptions.v.publicPort = cmdServer.Flag.Int("volume.port.public", 0, "volume server public port")
			
 
				 	serverOptions.v.indexType = cmdServer.Flag.String("volume.index", "memory", "Choose [memory|leveldb|boltdb|btree] mode for memory~performance balance.")
			
 
				-	serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", true, "Adjust jpg orientation when uploading.")
			
 
				+	serverOptions.v.fixJpgOrientation = cmdServer.Flag.Bool("volume.images.fix.orientation", false, "Adjust jpg orientation when uploading.")
			
 
				 	serverOptions.v.readRedirect = cmdServer.Flag.Bool("volume.read.redirect", true, "Redirect moved or non-local volumes.")
			
 
				 	serverOptions.v.publicUrl = cmdServer.Flag.String("volume.publicUrl", "", "publicly accessible address")
			
 
				 
			
--- a/weed/command/volume.go
+++ b/weed/command/volume.go
@@ -57,7 +57,7 @@ func init() {
 
				 	v.dataCenter = cmdVolume.Flag.String("dataCenter", "", "current volume server's data center name")
			
 
				 	v.rack = cmdVolume.Flag.String("rack", "", "current volume server's rack name")
			
 
				 	v.indexType = cmdVolume.Flag.String("index", "memory", "Choose [memory|leveldb|boltdb|btree] mode for memory~performance balance.")
			
 
				-	v.fixJpgOrientation = cmdVolume.Flag.Bool("images.fix.orientation", true, "Adjust jpg orientation when uploading.")
			
 
				+	v.fixJpgOrientation = cmdVolume.Flag.Bool("images.fix.orientation", false, "Adjust jpg orientation when uploading.")
			
 
				 	v.readRedirect = cmdVolume.Flag.Bool("read.redirect", true, "Redirect moved or non-local volumes.")
			
 
				 	v.cpuProfile = cmdVolume.Flag.String("cpuprofile", "", "cpu profile output file")
			
 
				 	v.memProfile = cmdVolume.Flag.String("memprofile", "", "memory profile output file")