Improve NVIDIA device lookups

This commit is contained in:
henryruhs 2024-11-13 17:34:46 +01:00
parent c7e7751b81
commit 50837a6ba5
3 changed files with 36 additions and 22 deletions

View File

@ -1,7 +1,8 @@
import shutil
import subprocess
import xml.etree.ElementTree as ElementTree
from functools import lru_cache
from typing import Any, List
from typing import Any, List, Optional
from onnxruntime import get_available_providers, set_default_logger_severity
@ -65,7 +66,7 @@ def create_execution_providers(execution_device_id : str, execution_provider_key
def run_nvidia_smi() -> subprocess.Popen[bytes]:
commands = [ 'nvidia-smi', '--query', '--xml-format' ]
commands = [ shutil.which('nvidia-smi'), '--query', '--xml-format' ]
return subprocess.Popen(commands, stdout = subprocess.PIPE)
@ -86,37 +87,44 @@ def detect_execution_devices() -> List[ExecutionDevice]:
for gpu_element in root_element.findall('gpu'):
execution_devices.append(
{
'driver_version': root_element.find('driver_version').text,
'driver_version': root_element.findtext('driver_version'),
'framework':
{
'name': 'CUDA',
'version': root_element.find('cuda_version').text
'version': root_element.findtext('cuda_version')
},
'product':
{
'vendor': 'NVIDIA',
'name': gpu_element.find('product_name').text.replace('NVIDIA ', '')
'name': gpu_element.findtext('product_name').replace('NVIDIA', '').strip()
},
'video_memory':
{
'total': create_value_and_unit(gpu_element.find('fb_memory_usage/total').text),
'free': create_value_and_unit(gpu_element.find('fb_memory_usage/free').text)
'total': create_value_and_unit(gpu_element.findtext('fb_memory_usage/total')),
'free': create_value_and_unit(gpu_element.findtext('fb_memory_usage/free'))
},
'temperature':
{
'gpu': create_value_and_unit(gpu_element.findtext('temperature/gpu_temp')),
'memory': create_value_and_unit(gpu_element.findtext('temperature/memory_temp'))
},
'utilization':
{
'gpu': create_value_and_unit(gpu_element.find('utilization/gpu_util').text),
'memory': create_value_and_unit(gpu_element.find('utilization/memory_util').text)
'gpu': create_value_and_unit(gpu_element.findtext('utilization/gpu_util')),
'memory': create_value_and_unit(gpu_element.findtext('utilization/memory_util'))
}
})
return execution_devices
def create_value_and_unit(text : str) -> ValueAndUnit:
value, unit = text.split()
value_and_unit : ValueAndUnit =\
{
'value': int(value),
'unit': str(unit)
}
def create_value_and_unit(text : str) -> Optional[ValueAndUnit]:
if ' ' in text:
value, unit = text.split(' ')
return value_and_unit
return\
{
'value': int(value),
'unit': str(unit)
}
return None

View File

@ -146,13 +146,18 @@ ExecutionDeviceProduct = TypedDict('ExecutionDeviceProduct',
})
ExecutionDeviceVideoMemory = TypedDict('ExecutionDeviceVideoMemory',
{
'total' : ValueAndUnit,
'free' : ValueAndUnit
'total' : Optional[ValueAndUnit],
'free' : Optional[ValueAndUnit]
})
ExecutionDeviceTemperature = TypedDict('ExecutionDeviceTemperature',
{
'gpu' : Optional[ValueAndUnit],
'memory' : Optional[ValueAndUnit]
})
ExecutionDeviceUtilization = TypedDict('ExecutionDeviceUtilization',
{
'gpu' : ValueAndUnit,
'memory' : ValueAndUnit
'gpu' : Optional[ValueAndUnit],
'memory' : Optional[ValueAndUnit]
})
ExecutionDevice = TypedDict('ExecutionDevice',
{
@ -160,6 +165,7 @@ ExecutionDevice = TypedDict('ExecutionDevice',
'framework' : ExecutionDeviceFramework,
'product' : ExecutionDeviceProduct,
'video_memory' : ExecutionDeviceVideoMemory,
'temperature': ExecutionDeviceTemperature,
'utilization' : ExecutionDeviceUtilization
})

View File

@ -1,5 +1,5 @@
filetype==1.2.0
gradio==5.4.0
gradio==5.5.0
gradio-rangeslider==0.0.8
numpy==2.1.2
onnx==1.17.0