Improve NVIDIA device lookups

This commit is contained in:
henryruhs 2024-11-13 17:34:46 +01:00
parent c7e7751b81
commit 50837a6ba5
3 changed files with 36 additions and 22 deletions

View File

@ -1,7 +1,8 @@
import shutil
import subprocess import subprocess
import xml.etree.ElementTree as ElementTree import xml.etree.ElementTree as ElementTree
from functools import lru_cache from functools import lru_cache
from typing import Any, List from typing import Any, List, Optional
from onnxruntime import get_available_providers, set_default_logger_severity from onnxruntime import get_available_providers, set_default_logger_severity
@ -65,7 +66,7 @@ def create_execution_providers(execution_device_id : str, execution_provider_key
def run_nvidia_smi() -> subprocess.Popen[bytes]: def run_nvidia_smi() -> subprocess.Popen[bytes]:
commands = [ 'nvidia-smi', '--query', '--xml-format' ] commands = [ shutil.which('nvidia-smi'), '--query', '--xml-format' ]
return subprocess.Popen(commands, stdout = subprocess.PIPE) return subprocess.Popen(commands, stdout = subprocess.PIPE)
@ -86,37 +87,44 @@ def detect_execution_devices() -> List[ExecutionDevice]:
for gpu_element in root_element.findall('gpu'): for gpu_element in root_element.findall('gpu'):
execution_devices.append( execution_devices.append(
{ {
'driver_version': root_element.find('driver_version').text, 'driver_version': root_element.findtext('driver_version'),
'framework': 'framework':
{ {
'name': 'CUDA', 'name': 'CUDA',
'version': root_element.find('cuda_version').text 'version': root_element.findtext('cuda_version')
}, },
'product': 'product':
{ {
'vendor': 'NVIDIA', 'vendor': 'NVIDIA',
'name': gpu_element.find('product_name').text.replace('NVIDIA ', '') 'name': gpu_element.findtext('product_name').replace('NVIDIA', '').strip()
}, },
'video_memory': 'video_memory':
{ {
'total': create_value_and_unit(gpu_element.find('fb_memory_usage/total').text), 'total': create_value_and_unit(gpu_element.findtext('fb_memory_usage/total')),
'free': create_value_and_unit(gpu_element.find('fb_memory_usage/free').text) 'free': create_value_and_unit(gpu_element.findtext('fb_memory_usage/free'))
},
'temperature':
{
'gpu': create_value_and_unit(gpu_element.findtext('temperature/gpu_temp')),
'memory': create_value_and_unit(gpu_element.findtext('temperature/memory_temp'))
}, },
'utilization': 'utilization':
{ {
'gpu': create_value_and_unit(gpu_element.find('utilization/gpu_util').text), 'gpu': create_value_and_unit(gpu_element.findtext('utilization/gpu_util')),
'memory': create_value_and_unit(gpu_element.find('utilization/memory_util').text) 'memory': create_value_and_unit(gpu_element.findtext('utilization/memory_util'))
} }
}) })
return execution_devices return execution_devices
def create_value_and_unit(text : str) -> ValueAndUnit: def create_value_and_unit(text : str) -> Optional[ValueAndUnit]:
value, unit = text.split() if ' ' in text:
value_and_unit : ValueAndUnit =\ value, unit = text.split(' ')
{
'value': int(value),
'unit': str(unit)
}
return value_and_unit return\
{
'value': int(value),
'unit': str(unit)
}
return None

View File

@ -146,13 +146,18 @@ ExecutionDeviceProduct = TypedDict('ExecutionDeviceProduct',
}) })
ExecutionDeviceVideoMemory = TypedDict('ExecutionDeviceVideoMemory', ExecutionDeviceVideoMemory = TypedDict('ExecutionDeviceVideoMemory',
{ {
'total' : ValueAndUnit, 'total' : Optional[ValueAndUnit],
'free' : ValueAndUnit 'free' : Optional[ValueAndUnit]
})
ExecutionDeviceTemperature = TypedDict('ExecutionDeviceTemperature',
{
'gpu' : Optional[ValueAndUnit],
'memory' : Optional[ValueAndUnit]
}) })
ExecutionDeviceUtilization = TypedDict('ExecutionDeviceUtilization', ExecutionDeviceUtilization = TypedDict('ExecutionDeviceUtilization',
{ {
'gpu' : ValueAndUnit, 'gpu' : Optional[ValueAndUnit],
'memory' : ValueAndUnit 'memory' : Optional[ValueAndUnit]
}) })
ExecutionDevice = TypedDict('ExecutionDevice', ExecutionDevice = TypedDict('ExecutionDevice',
{ {
@ -160,6 +165,7 @@ ExecutionDevice = TypedDict('ExecutionDevice',
'framework' : ExecutionDeviceFramework, 'framework' : ExecutionDeviceFramework,
'product' : ExecutionDeviceProduct, 'product' : ExecutionDeviceProduct,
'video_memory' : ExecutionDeviceVideoMemory, 'video_memory' : ExecutionDeviceVideoMemory,
'temperature': ExecutionDeviceTemperature,
'utilization' : ExecutionDeviceUtilization 'utilization' : ExecutionDeviceUtilization
}) })

View File

@ -1,5 +1,5 @@
filetype==1.2.0 filetype==1.2.0
gradio==5.4.0 gradio==5.5.0
gradio-rangeslider==0.0.8 gradio-rangeslider==0.0.8
numpy==2.1.2 numpy==2.1.2
onnx==1.17.0 onnx==1.17.0