# my global config global: scrape_interval: 60s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. scrape_timeout: 60s # scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with # external systems (federation, remote storage, Alertmanager). external_labels: monitor: 'k8s-prometheus-monitor'
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: # - "first.rules" # - "second.rules" - /home/server/prometheus/rule.yml # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself.
scrape_configs: # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. - job_name: 'prometheus'
# metrics_path defaults to '/metrics' # scheme defaults to 'http'. static_configs: - targets: ['localhost:9090']
if response: return response.json() else: return None
def json_node_exporter_cadvisor(): kube_node_status_condition = simple_query( query='kube_node_status_condition{status="true"}==1') kube_node_labels = simple_query(query='kube_node_labels') kube_node_info = simple_query(query='kube_node_info') try: cluster_count_old = {} with open("/home/server/prometheus/groups/node-exporter/config_node_exporter.json") as f: config = json.loads(f.read()) for node in config: cluster = node["labels"]["cluster"] if cluster not in cluster_count_old: cluster_count_old[cluster] = 1 else: cluster_count_old[cluster] += 1 except: cluster_count_old = {}
try: nodes_ready = [i["metric"]["node"] for i in kube_node_status_condition["data"]["result"]] node_label_dict = { i["metric"]["node"]: { "cluster": i["metric"]["cluster"], "group": i["metric"].get("label_group") } for i in kube_node_labels["data"]["result"] } node_version_dict = { i["metric"]["node"]: str( i["metric"]["kubelet_version"]).strip("v").split(".") for i in kube_node_info["data"]["result"] } config_node_exporter = [] config_cadvisor = [] config_cadvisor_standalone = [] cluster_count_new = {} for n in nodes_ready: targets_9100 = [str(n) + ":9100"] targets_4194 = [str(n) + ":4194"] version = node_version_dict[n] cluster = node_label_dict[n]["cluster"] if cluster not in cluster_count_new: cluster_count_new[cluster] = 1 else: cluster_count_new[cluster] += 1 item_node = {"labels": node_label_dict[n], "targets": targets_9100} item_cadvisor = { "labels": node_label_dict[n], "targets": targets_4194} config_node_exporter.append(item_node) if int(version[1]) == 1 and int(version[1]) >= 14: config_cadvisor_standalone.append(item_cadvisor) else: config_cadvisor.append(item_cadvisor)
cluster_config_change = { cluster: cluster_count_new.get(cluster, 0) - cluster_count_old.get(cluster) for cluster in cluster_count_old } change_min = min(list(cluster_config_change.values())) if change_min >= -3: #node节点减少如果大于3个则不自动更新配置 with open("/home/server/prometheus/groups/node-exporter/config_node_exporter.json", "w") as f: f.write(json.dumps(config_node_exporter, indent=4)) with open("/home/server/prometheus/groups/cadvisor/config_cadvisor.json", "w") as f: f.write(json.dumps(config_cadvisor, indent=4)) with open("/home/server/prometheus/groups/cadvisor-standalone/config_cadvisor_standalone.json", "w") as f: f.write(json.dumps(config_cadvisor_standalone, indent=4)) else: with open("/home/server/prometheus/groups/node-exporter/config_node_exporter.json.new", "w") as f: f.write(json.dumps(config_node_exporter, indent=4)) with open("/home/server/prometheus/groups/cadvisor/config_cadvisor.json.new", "w") as f: f.write(json.dumps(config_cadvisor, indent=4)) with open("/home/server/prometheus/groups/cadvisor-standalone/config_cadvisor_standalone.json.new", "w") as f: f.write(json.dumps(config_cadvisor_standalone, indent=4))