# HELP python_gc_objects_collected_total Objects collected during gc # TYPE python_gc_objects_collected_total counter python_gc_objects_collected_total{generation="0"} 26187.0 python_gc_objects_collected_total{generation="1"} 2568.0 python_gc_objects_collected_total{generation="2"} 27.0 # HELP python_gc_objects_uncollectable_total Uncollectable objects found during GC # TYPE python_gc_objects_uncollectable_total counter python_gc_objects_uncollectable_total{generation="0"} 0.0 python_gc_objects_uncollectable_total{generation="1"} 0.0 python_gc_objects_uncollectable_total{generation="2"} 0.0 # HELP python_gc_collections_total Number of times this generation was collected # TYPE python_gc_collections_total counter python_gc_collections_total{generation="0"} 366.0 python_gc_collections_total{generation="1"} 33.0 python_gc_collections_total{generation="2"} 2.0 # HELP python_info Python platform information # TYPE python_info gauge python_info{implementation="CPython",major="3",minor="9",patchlevel="19",version="3.9.19"} 1.0 # HELP process_virtual_memory_bytes Virtual memory size in bytes. # TYPE process_virtual_memory_bytes gauge process_virtual_memory_bytes 4.52128768e+09 # HELP process_resident_memory_bytes Resident memory size in bytes. # TYPE process_resident_memory_bytes gauge process_resident_memory_bytes 1.27315968e+08 # HELP process_start_time_seconds Start time of the process since unix epoch in seconds. # TYPE process_start_time_seconds gauge process_start_time_seconds 1.76572711402e+09 # HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. # TYPE process_cpu_seconds_total counter process_cpu_seconds_total 192.78 # HELP process_open_fds Number of open file descriptors. # TYPE process_open_fds gauge process_open_fds 42.0 # HELP process_max_fds Maximum number of open file descriptors. # TYPE process_max_fds gauge process_max_fds 65536.0 # HELP ray_event_aggregator_agent_events_received_total Total number of events received. # TYPE ray_event_aggregator_agent_events_received_total counter ray_event_aggregator_agent_events_received_total{Component="event_aggregator_agent",SessionName="session_2025-12-11_19-15-17_971054_1",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 0.0 # HELP ray_event_aggregator_agent_events_received_created Total number of events received. # TYPE ray_event_aggregator_agent_events_received_created gauge ray_event_aggregator_agent_events_received_created{Component="event_aggregator_agent",SessionName="session_2025-12-11_19-15-17_971054_1",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 1.7657271153029194e+09 # HELP ray_event_aggregator_agent_events_dropped_at_core_worker_total Total number of events dropped at core worker. # TYPE ray_event_aggregator_agent_events_dropped_at_core_worker_total counter ray_event_aggregator_agent_events_dropped_at_core_worker_total{Component="event_aggregator_agent",SessionName="session_2025-12-11_19-15-17_971054_1",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 0.0 # HELP ray_event_aggregator_agent_events_dropped_at_core_worker_created Total number of events dropped at core worker. # TYPE ray_event_aggregator_agent_events_dropped_at_core_worker_created gauge ray_event_aggregator_agent_events_dropped_at_core_worker_created{Component="event_aggregator_agent",SessionName="session_2025-12-11_19-15-17_971054_1",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 1.765727115302943e+09 # HELP ray_event_aggregator_agent_events_dropped_at_event_aggregator_total Total number of events dropped at the event aggregator. # TYPE ray_event_aggregator_agent_events_dropped_at_event_aggregator_total counter ray_event_aggregator_agent_events_dropped_at_event_aggregator_total{Component="event_aggregator_agent",SessionName="session_2025-12-11_19-15-17_971054_1",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 0.0 # HELP ray_event_aggregator_agent_events_dropped_at_event_aggregator_created Total number of events dropped at the event aggregator. # TYPE ray_event_aggregator_agent_events_dropped_at_event_aggregator_created gauge ray_event_aggregator_agent_events_dropped_at_event_aggregator_created{Component="event_aggregator_agent",SessionName="session_2025-12-11_19-15-17_971054_1",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 1.7657271153029563e+09 # HELP ray_event_aggregator_agent_events_published_total Total number of events successfully published to the external server. # TYPE ray_event_aggregator_agent_events_published_total counter ray_event_aggregator_agent_events_published_total{Component="event_aggregator_agent",SessionName="session_2025-12-11_19-15-17_971054_1",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 0.0 # HELP ray_event_aggregator_agent_events_published_created Total number of events successfully published to the external server. # TYPE ray_event_aggregator_agent_events_published_created gauge ray_event_aggregator_agent_events_published_created{Component="event_aggregator_agent",SessionName="session_2025-12-11_19-15-17_971054_1",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 1.7657271153029668e+09 # HELP ray_node_mem_shared_bytes Total shared memory usage on a ray node # TYPE ray_node_mem_shared_bytes gauge ray_node_mem_shared_bytes{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 7.4149888e+07 # HELP ray_component_cpu_percentage Total CPU usage of the components on a node. # TYPE ray_component_cpu_percentage gauge ray_component_cpu_percentage{Component="raylet",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="36"} 0.4 ray_component_cpu_percentage{Component="agent",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 1.6 # HELP ray_component_mem_shared_bytes SHM usage of all components of the node. It is equivalent to the top command's SHR column. # TYPE ray_component_mem_shared_bytes gauge ray_component_mem_shared_bytes{Component="raylet",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="36"} 2.0529152e+07 ray_component_mem_shared_bytes{Component="agent",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 5.5533568e+07 # HELP ray_component_rss_mb RSS usage of all components on the node. # TYPE ray_component_rss_mb gauge ray_component_rss_mb{Component="raylet",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="36"} 49.037312 ray_component_rss_mb{Component="agent",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 127.315968 # HELP ray_component_uss_mb USS usage of all components on the node. # TYPE ray_component_uss_mb gauge ray_component_uss_mb{Component="raylet",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="36"} 34.197504 ray_component_uss_mb{Component="agent",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 72.413184 # HELP ray_component_num_fds Number of open fds of all components on the node (Not available on Windows). # TYPE ray_component_num_fds gauge ray_component_num_fds{Component="raylet",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="36"} 50.0 ray_component_num_fds{Component="agent",SessionName="",Version="2.48.0.1",ip="10.212.195.180",pid="98"} 41.0 # HELP ray_node_cpu_utilization Total CPU usage on a ray node # TYPE ray_node_cpu_utilization gauge ray_node_cpu_utilization{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 0.0 # HELP ray_node_cpu_count Total CPUs available on a ray node # TYPE ray_node_cpu_count gauge ray_node_cpu_count{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 64.0 # HELP ray_node_mem_used Memory usage on a ray node # TYPE ray_node_mem_used gauge ray_node_mem_used{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 114688.0 # HELP ray_node_mem_available Memory available on a ray node # TYPE ray_node_mem_available gauge ray_node_mem_available{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 1.32680896512e+011 # HELP ray_node_mem_total Total memory on a ray node # TYPE ray_node_mem_total gauge ray_node_mem_total{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 1.326810112e+011 # HELP ray_node_disk_io_read Total read from disk # TYPE ray_node_disk_io_read gauge ray_node_disk_io_read{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 7.891911567872e+013 # HELP ray_node_disk_io_write Total written to disk # TYPE ray_node_disk_io_write gauge ray_node_disk_io_write{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 1.057096960512e+014 # HELP ray_node_disk_io_read_count Total read ops from disk # TYPE ray_node_disk_io_read_count gauge ray_node_disk_io_read_count{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 6.54526414e+08 # HELP ray_node_disk_io_write_count Total write ops to disk # TYPE ray_node_disk_io_write_count gauge ray_node_disk_io_write_count{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 1.416531293e+09 # HELP ray_node_disk_io_read_speed Disk read speed # TYPE ray_node_disk_io_read_speed gauge ray_node_disk_io_read_speed{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 2.428299848322141e+06 # HELP ray_node_disk_io_write_speed Disk write speed # TYPE ray_node_disk_io_write_speed gauge ray_node_disk_io_write_speed{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 840214.8677084395 # HELP ray_node_disk_read_iops Disk read iops # TYPE ray_node_disk_read_iops gauge ray_node_disk_read_iops{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 20.45329218014965 # HELP ray_node_disk_write_iops Disk write iops # TYPE ray_node_disk_write_iops gauge ray_node_disk_write_iops{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 54.951621035954005 # HELP ray_node_disk_usage Total disk usage (bytes) on a ray node # TYPE ray_node_disk_usage gauge ray_node_disk_usage{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 1.46616799232e+011 # HELP ray_node_disk_free Total disk free (bytes) on a ray node # TYPE ray_node_disk_free gauge ray_node_disk_free{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 1.531866112e+011 # HELP ray_node_disk_utilization_percentage Total disk utilization (percentage) on a ray node # TYPE ray_node_disk_utilization_percentage gauge ray_node_disk_utilization_percentage{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 48.90431333677404 # HELP ray_node_network_sent Total network sent # TYPE ray_node_network_sent gauge ray_node_network_sent{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 2.39788559e+08 # HELP ray_node_network_received Total network received # TYPE ray_node_network_received gauge ray_node_network_received{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 3.98558e+07 # HELP ray_node_network_send_speed Network send speed # TYPE ray_node_network_send_speed gauge ray_node_network_send_speed{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 17874.948839784127 # HELP ray_node_network_receive_speed Network receive speed # TYPE ray_node_network_receive_speed gauge ray_node_network_receive_speed{IsHeadNode="false",SessionName="",Version="2.48.0.1",ip="10.212.195.180"} 2594.5798012033342 # HELP ray_object_directory_removed_locations Number of object locations removed per second. If this is high, a lot of objects have been removed from this node. # TYPE ray_object_directory_removed_locations gauge ray_object_directory_removed_locations{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_directory_added_locations Number of object locations added per second., If this is high, a lot of objects have been added on this node. # TYPE ray_object_directory_added_locations gauge ray_object_directory_added_locations{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_directory_lookups Number of object location lookups per second. If this is high, the raylet is waiting on a lot of objects. # TYPE ray_object_directory_lookups gauge ray_object_directory_lookups{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_directory_subscriptions Number of object location subscriptions. If this is high, the raylet is attempting to pull a lot of objects. # TYPE ray_object_directory_subscriptions gauge ray_object_directory_subscriptions{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_manager_num_pull_requests Number of active pull requests for objects. # TYPE ray_object_manager_num_pull_requests gauge ray_object_manager_num_pull_requests{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_store_num_local_objects Number of objects currently in the object store. # TYPE ray_object_store_num_local_objects gauge ray_object_store_num_local_objects{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_store_fallback_memory Amount of memory in fallback allocations in the filesystem. # TYPE ray_object_store_fallback_memory gauge ray_object_store_fallback_memory{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_store_used_memory Amount of memory currently occupied in the object store. # TYPE ray_object_store_used_memory gauge ray_object_store_used_memory{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_pull_manager_usage_bytes The total number of bytes usage broken per type {Available, BeingPulled, Pinned} # TYPE ray_pull_manager_usage_bytes gauge ray_pull_manager_usage_bytes{Component="raylet",NodeAddress="10.212.195.180",Type="Available",Version="2.48.0.1"} 1.5461882265e+010 ray_pull_manager_usage_bytes{Component="raylet",NodeAddress="10.212.195.180",Type="BeingPulled",Version="2.48.0.1"} 0.0 ray_pull_manager_usage_bytes{Component="raylet",NodeAddress="10.212.195.180",Type="Pinned",Version="2.48.0.1"} 0.0 # HELP ray_scheduler_failed_worker_startup_total Number of tasks that fail to be scheduled because workers were not available. Labels are broken per reason {JobConfigMissing, RegistrationTimedOut, RateLimited} # TYPE ray_scheduler_failed_worker_startup_total gauge ray_scheduler_failed_worker_startup_total{Component="raylet",NodeAddress="10.212.195.180",Reason="JobConfigMissing",Version="2.48.0.1"} 0.0 ray_scheduler_failed_worker_startup_total{Component="raylet",NodeAddress="10.212.195.180",Reason="RegistrationTimedOut",Version="2.48.0.1"} 0.0 ray_scheduler_failed_worker_startup_total{Component="raylet",NodeAddress="10.212.195.180",Reason="RateLimited",Version="2.48.0.1"} 0.0 # HELP ray_grpc_server_req_finished_total Finished request number in grpc server # TYPE ray_grpc_server_req_finished_total counter ray_grpc_server_req_finished_total{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1"} 843.0 ray_grpc_server_req_finished_total{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1"} 12970.0 # HELP ray_grpc_server_req_process_time_ms Request latency in grpc server # TYPE ray_grpc_server_req_process_time_ms histogram ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1",le="0.1"} 43.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1",le="1.0"} 843.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1",le="10.0"} 843.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1",le="100.0"} 843.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1",le="1000.0"} 843.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1",le="10000.0"} 843.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1",le="+Inf"} 843.0 ray_grpc_server_req_process_time_ms_count{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1"} 843.0 ray_grpc_server_req_process_time_ms_sum{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1"} 127.75113799999988 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1",le="0.1"} 20.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1",le="1.0"} 12970.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1",le="10.0"} 12970.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1",le="100.0"} 12970.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1",le="1000.0"} 12970.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1",le="10000.0"} 12970.0 ray_grpc_server_req_process_time_ms_bucket{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1",le="+Inf"} 12970.0 ray_grpc_server_req_process_time_ms_count{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1"} 12970.0 ray_grpc_server_req_process_time_ms_sum{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1"} 2102.7583339999983 # HELP ray_object_manager_received_chunks Number object chunks received broken per type {Total, FailedTotal, FailedCancelled, FailedPlasmaFull}. # TYPE ray_object_manager_received_chunks gauge ray_object_manager_received_chunks{Component="raylet",NodeAddress="10.212.195.180",Type="FailedCancelled",Version="2.48.0.1"} 0.0 ray_object_manager_received_chunks{Component="raylet",NodeAddress="10.212.195.180",Type="Total",Version="2.48.0.1"} 0.0 ray_object_manager_received_chunks{Component="raylet",NodeAddress="10.212.195.180",Type="FailedTotal",Version="2.48.0.1"} 0.0 ray_object_manager_received_chunks{Component="raylet",NodeAddress="10.212.195.180",Type="FailedPlasmaFull",Version="2.48.0.1"} 0.0 # HELP ray_object_directory_updates Number of object location updates per second., If this is high, the raylet is attempting to pull a lot of objects and/or the locations for objects are frequently changing (e.g. due to many object copies or evictions). # TYPE ray_object_directory_updates gauge ray_object_directory_updates{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_pull_manager_requests Number of pull requests broken per type {Queued, Active, Pinned}. # TYPE ray_pull_manager_requests gauge ray_pull_manager_requests{Component="raylet",NodeAddress="10.212.195.180",Type="Pinned",Version="2.48.0.1"} 0.0 ray_pull_manager_requests{Component="raylet",NodeAddress="10.212.195.180",Type="Queued",Version="2.48.0.1"} 0.0 ray_pull_manager_requests{Component="raylet",NodeAddress="10.212.195.180",Type="Active",Version="2.48.0.1"} 0.0 # HELP ray_grpc_server_req_new_total New request number in grpc server # TYPE ray_grpc_server_req_new_total counter ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.ReleaseUnusedBundles",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.GlobalGC",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.IsLocalWorkerDead",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.FormatGlobalMemoryInfo",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.CancelResourceReserve",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.CancelWorkerLease",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.ReleaseUnusedActorWorkers",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.ReturnWorker",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.ReportWorkerBacklog",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.PushMutableObject",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="ObjectManagerService.grpc_server.Push",NodeAddress="10.212.195.180",Version="2.48.0.1"} 64.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1"} 875.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.GetTaskFailureCause",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.RequestWorkerLease",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.ShutdownRaylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.PrestartWorkers",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.DrainRaylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.NotifyGCSRestart",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="ObjectManagerService.grpc_server.Pull",NodeAddress="10.212.195.180",Version="2.48.0.1"} 64.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.GetObjectsInfo",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.CancelTasksWithResourceShapes",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.RegisterMutableObject",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="ObjectManagerService.grpc_server.FreeObjects",NodeAddress="10.212.195.180",Version="2.48.0.1"} 64.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.GetSystemConfig",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.CommitBundleResources",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.PinObjectIDs",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.PrepareBundleResources",NodeAddress="10.212.195.180",Version="2.48.0.1"} 32.0 ray_grpc_server_req_new_total{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1"} 13002.0 # HELP ray_spill_manager_objects Number of local objects broken per state {Pinned, PendingRestore, PendingSpill}. # TYPE ray_spill_manager_objects gauge ray_spill_manager_objects{Component="raylet",NodeAddress="10.212.195.180",State="PendingSpill",Version="2.48.0.1"} 0.0 ray_spill_manager_objects{Component="raylet",NodeAddress="10.212.195.180",State="PendingRestore",Version="2.48.0.1"} 0.0 ray_spill_manager_objects{Component="raylet",NodeAddress="10.212.195.180",State="Pinned",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_infeasible_scheduling_classes The number of unique scheduling classes that are infeasible. # TYPE ray_internal_num_infeasible_scheduling_classes gauge ray_internal_num_infeasible_scheduling_classes{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_processes_started_from_cache_total The total number of workers started from a cached worker process. # TYPE ray_internal_num_processes_started_from_cache_total counter ray_internal_num_processes_started_from_cache_total{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_processes_started_from_cache (DEPRECATED, use ray_internal_num_processes_started_from_cache_total metric instead) The total number of workers started from a cached worker process. # TYPE ray_internal_num_processes_started_from_cache gauge ray_internal_num_processes_started_from_cache{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_pull_manager_requested_bundles Number of requested bundles broken per type {Get, Wait, TaskArgs}. # TYPE ray_pull_manager_requested_bundles gauge ray_pull_manager_requested_bundles{Component="raylet",NodeAddress="10.212.195.180",Type="Wait",Version="2.48.0.1"} 0.0 ray_pull_manager_requested_bundles{Component="raylet",NodeAddress="10.212.195.180",Type="Get",Version="2.48.0.1"} 0.0 ray_pull_manager_requested_bundles{Component="raylet",NodeAddress="10.212.195.180",Type="TaskArgs",Version="2.48.0.1"} 0.0 ray_pull_manager_requested_bundles{Component="raylet",NodeAddress="10.212.195.180",Type="CumulativeTotal",Version="2.48.0.1"} 1.0 # HELP ray_pull_manager_active_bundles Number of active bundle requests # TYPE ray_pull_manager_active_bundles gauge ray_pull_manager_active_bundles{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_store_memory Object store memory by various sub-kinds on this node # TYPE ray_object_store_memory gauge ray_object_store_memory{Component="raylet",Location="SPILLED",NodeAddress="10.212.195.180",ObjectState="",Version="2.48.0.1"} 0.0 ray_object_store_memory{Component="raylet",Location="MMAP_SHM",NodeAddress="10.212.195.180",ObjectState="SEALED",Version="2.48.0.1"} 0.0 ray_object_store_memory{Component="raylet",Location="MMAP_SHM",NodeAddress="10.212.195.180",ObjectState="UNSEALED",Version="2.48.0.1"} 0.0 ray_object_store_memory{Component="raylet",Location="MMAP_DISK",NodeAddress="10.212.195.180",ObjectState="SEALED",Version="2.48.0.1"} 0.0 ray_object_store_memory{Component="raylet",Location="MMAP_DISK",NodeAddress="10.212.195.180",ObjectState="UNSEALED",Version="2.48.0.1"} 0.0 # HELP ray_pull_manager_num_object_pins Number of object pin attempts by the pull manager, can be {Success, Failure}. # TYPE ray_pull_manager_num_object_pins gauge ray_pull_manager_num_object_pins{Component="raylet",NodeAddress="10.212.195.180",Type="Success",Version="2.48.0.1"} 0.0 ray_pull_manager_num_object_pins{Component="raylet",NodeAddress="10.212.195.180",Type="Failure",Version="2.48.0.1"} 0.0 # HELP ray_object_manager_bytes Number of bytes pushed or received by type {PushedFromLocalPlasma, PushedFromLocalDisk, Received}. # TYPE ray_object_manager_bytes gauge ray_object_manager_bytes{Component="raylet",NodeAddress="10.212.195.180",Type="Received",Version="2.48.0.1"} 0.0 ray_object_manager_bytes{Component="raylet",NodeAddress="10.212.195.180",Type="PushedFromLocalDisk",Version="2.48.0.1"} 0.0 ray_object_manager_bytes{Component="raylet",NodeAddress="10.212.195.180",Type="PushedFromLocalPlasma",Version="2.48.0.1"} 0.0 # HELP ray_pull_manager_retries_total Number of cumulative pull retries. # TYPE ray_pull_manager_retries_total gauge ray_pull_manager_retries_total{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_push_manager_num_pushes_remaining Number of pushes not completed. # TYPE ray_push_manager_num_pushes_remaining gauge ray_push_manager_num_pushes_remaining{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_spill_manager_objects_bytes Byte size of local objects broken per state {Pinned, PendingSpill}. # TYPE ray_spill_manager_objects_bytes gauge ray_spill_manager_objects_bytes{Component="raylet",NodeAddress="10.212.195.180",State="Spilled",Version="2.48.0.1"} 0.0 ray_spill_manager_objects_bytes{Component="raylet",NodeAddress="10.212.195.180",State="PendingSpill",Version="2.48.0.1"} 0.0 ray_spill_manager_objects_bytes{Component="raylet",NodeAddress="10.212.195.180",State="Restored",Version="2.48.0.1"} 0.0 ray_spill_manager_objects_bytes{Component="raylet",NodeAddress="10.212.195.180",State="PendingRestore",Version="2.48.0.1"} 0.0 ray_spill_manager_objects_bytes{Component="raylet",NodeAddress="10.212.195.180",State="Pinned",Version="2.48.0.1"} 0.0 # HELP ray_push_manager_chunks Number of object chunks transfer broken per type {InFlight, Remaining}. # TYPE ray_push_manager_chunks gauge ray_push_manager_chunks{Component="raylet",NodeAddress="10.212.195.180",Type="InFlight",Version="2.48.0.1"} 0.0 ray_push_manager_chunks{Component="raylet",NodeAddress="10.212.195.180",Type="Remaining",Version="2.48.0.1"} 0.0 # HELP ray_scheduler_tasks Number of tasks waiting for scheduling broken per state {Cancelled, Executing, Waiting, Dispatched, Received}. # TYPE ray_scheduler_tasks gauge ray_scheduler_tasks{Component="raylet",NodeAddress="10.212.195.180",State="Cancelled",Version="2.48.0.1"} 0.0 ray_scheduler_tasks{Component="raylet",NodeAddress="10.212.195.180",State="Received",Version="2.48.0.1"} 0.0 ray_scheduler_tasks{Component="raylet",NodeAddress="10.212.195.180",State="Dispatched",Version="2.48.0.1"} 0.0 ray_scheduler_tasks{Component="raylet",NodeAddress="10.212.195.180",State="SpilledWaiting",Version="2.48.0.1"} 0.0 ray_scheduler_tasks{Component="raylet",NodeAddress="10.212.195.180",State="SpilledUnschedulable",Version="2.48.0.1"} 0.0 ray_scheduler_tasks{Component="raylet",NodeAddress="10.212.195.180",State="Executing",Version="2.48.0.1"} 0.0 ray_scheduler_tasks{Component="raylet",NodeAddress="10.212.195.180",State="Waiting",Version="2.48.0.1"} 0.0 # HELP ray_scheduler_unscheduleable_tasks Number of pending tasks (not scheduleable tasks) broken per reason {Infeasible, WaitingForResources, WaitingForPlasmaMemory, WaitingForRemoteResources, WaitingForWorkers}. # TYPE ray_scheduler_unscheduleable_tasks gauge ray_scheduler_unscheduleable_tasks{Component="raylet",NodeAddress="10.212.195.180",Reason="Infeasible",Version="2.48.0.1"} 0.0 ray_scheduler_unscheduleable_tasks{Component="raylet",NodeAddress="10.212.195.180",Reason="WaitingForPlasmaMemory",Version="2.48.0.1"} 0.0 ray_scheduler_unscheduleable_tasks{Component="raylet",NodeAddress="10.212.195.180",Reason="WaitingForResources",Version="2.48.0.1"} 0.0 ray_scheduler_unscheduleable_tasks{Component="raylet",NodeAddress="10.212.195.180",Reason="WaitingForRemoteResources",Version="2.48.0.1"} 0.0 ray_scheduler_unscheduleable_tasks{Component="raylet",NodeAddress="10.212.195.180",Reason="WaitingForWorkers",Version="2.48.0.1"} 0.0 # HELP ray_grpc_server_req_succeeded_total Succeeded request count in grpc server # TYPE ray_grpc_server_req_succeeded_total counter ray_grpc_server_req_succeeded_total{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1"} 843.0 ray_grpc_server_req_succeeded_total{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1"} 12970.0 # HELP ray_grpc_server_req_handling_total Request number are handling in grpc server # TYPE ray_grpc_server_req_handling_total counter ray_grpc_server_req_handling_total{Component="raylet",Method="NodeManagerService.grpc_server.GetNodeStats",NodeAddress="10.212.195.180",Version="2.48.0.1"} 843.0 ray_grpc_server_req_handling_total{Component="raylet",Method="NodeManagerService.grpc_server.GetResourceLoad",NodeAddress="10.212.195.180",Version="2.48.0.1"} 12970.0 # HELP ray_resources Logical Ray resources broken per state {AVAILABLE, USED} # TYPE ray_resources gauge ray_resources{Component="raylet",Name="object_store_memory",NodeAddress="10.212.195.180",State="USED",Version="2.48.0.1"} 0.0 ray_resources{Component="raylet",Name="memory",NodeAddress="10.212.195.180",State="AVAILABLE",Version="2.48.0.1"} 1.7179869184e+010 ray_resources{Component="raylet",Name="CPU",NodeAddress="10.212.195.180",State="AVAILABLE",Version="2.48.0.1"} 4.0 ray_resources{Component="raylet",Name="memory",NodeAddress="10.212.195.180",State="USED",Version="2.48.0.1"} 0.0 ray_resources{Component="raylet",Name="CPU",NodeAddress="10.212.195.180",State="USED",Version="2.48.0.1"} 0.0 ray_resources{Component="raylet",Name="object_store_memory",NodeAddress="10.212.195.180",State="AVAILABLE",Version="2.48.0.1"} 1.6320875724e+010 # HELP ray_spill_manager_request_total Number of {spill, restore} requests. # TYPE ray_spill_manager_request_total gauge ray_spill_manager_request_total{Component="raylet",NodeAddress="10.212.195.180",Type="Spilled",Version="2.48.0.1"} 0.0 ray_spill_manager_request_total{Component="raylet",NodeAddress="10.212.195.180",Type="FailedDeletion",Version="2.48.0.1"} 0.0 ray_spill_manager_request_total{Component="raylet",NodeAddress="10.212.195.180",Type="Restored",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_processes_started_total The total number of worker processes the worker pool has created. # TYPE ray_internal_num_processes_started_total counter ray_internal_num_processes_started_total{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_processes_started (DEPRECATED, use ray_internal_num_processes_started_total metric instead) The total number of worker processes the worker pool has created. # TYPE ray_internal_num_processes_started gauge ray_internal_num_processes_started{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_processes_skipped_job_mismatch_total The total number of cached workers skipped due to job mismatch. # TYPE ray_internal_num_processes_skipped_job_mismatch_total counter ray_internal_num_processes_skipped_job_mismatch_total{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_processes_skipped_job_mismatch (DEPRECATED, use ray_internal_num_processes_skipped_job_mismatch_total metric instead) The total number of cached workers skipped due to job mismatch. # TYPE ray_internal_num_processes_skipped_job_mismatch gauge ray_internal_num_processes_skipped_job_mismatch{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_processes_skipped_runtime_environment_mismatch_total The total number of cached workers skipped due to runtime environment mismatch. # TYPE ray_internal_num_processes_skipped_runtime_environment_mismatch_total counter ray_internal_num_processes_skipped_runtime_environment_mismatch_total{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_processes_skipped_runtime_environment_mismatch (DEPRECATED, use ray_internal_num_processes_skipped_runtime_environment_mismatch_total metric instead) The total number of cached workers skipped due to runtime environment mismatch. # TYPE ray_internal_num_processes_skipped_runtime_environment_mismatch gauge ray_internal_num_processes_skipped_runtime_environment_mismatch{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_internal_num_spilled_tasks The cumulative number of lease requeusts that this raylet has spilled to other raylets. # TYPE ray_internal_num_spilled_tasks gauge ray_internal_num_spilled_tasks{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 0.0 # HELP ray_object_store_available_memory Amount of memory currently available in the object store. # TYPE ray_object_store_available_memory gauge ray_object_store_available_memory{Component="raylet",NodeAddress="10.212.195.180",Version="2.48.0.1"} 1.6320875724e+010