Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
67 views
in Technique[技术] by (71.8m points)

java map string data to dictionary in python

I am getting below a java map string from a data source.

{0={_shards={total=1, failed=0, successful=1, skipped=0}, hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc, _source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c, type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0}, log={file={path=/var/log/auth.log}, offset=46607}, service={type=system}, host={hostname=ubuntu_fresh, os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu, family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu}, containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598, 192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98, 08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z, timezone=+00:00, module=system, dataset=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}

I don't have the privilege to convert it in java. I have a python application I want to access that data as a python dictionary. So want to convert it into a python dictionary.

question from:https://stackoverflow.com/questions/66058796/java-map-string-data-to-dictionary-in-python

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Answer

0 votes
by (71.8m points)

The .toString() for Java Collections (Map, List, etc.) is lossy because it does not disambiguate delimiters. As such, there is no way to 100% reliably reconstruct the data-structure from the output of Map.toString(). However, if there are some constraints applied to the problem:

  1. the keys and values do not contain certain characters (approximately {}=[],")
  2. arrays do not contain a mixture of primitive values and objects/arrays

then we can somewhat reliably transform the output of toString() to JSON, and then parse the JSON into a Python data-structure. I wouldn't use this code in production, but as long as you know it can break, it could be useful in certain cases:

TEST_VALUE = "{0={_shards={total=1, failed=0, successful=1, skipped=0}, hits={hits=[{_index=filebeat-7.10.0-2021.02.02-000001, _type=_doc, _source={input={type=log}, agent={hostname=ubuntu_fresh, name=ubuntu_fresh, id=879f36f2-4ade-47b6-a7b9-7972634c7b8c, type=filebeat, ephemeral_id=5676523f-bc61-4c12-b319-8b463348ba63, version=7.10.0}, @timestamp=2021-02-04T12:36:33.475Z, ecs={version=1.6.0}, log={file={path=/var/log/auth.log}, offset=46607}, service={type=system}, host={hostname=ubuntu_fresh, os={kernel=4.15.0-135-generic, codename=bionic, name=Ubuntu, family=debian, version=18.04.1 LTS (Bionic Beaver), platform=ubuntu}, containerized=false, ip=[10.0.2.15, fe80::a00:27ff:fe82:f598, 192.168.56.22, fe80::a00:27ff:fe32:fab0], name=ubuntu_fresh, id=cdfcdf6a39d44b98b2aa51700134f415, mac=[08:00:27:82:f5:98, 08:00:27:32:fa:b0], architecture=x86_64}, fileset={name=auth}, message=Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2, error={message=Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]}, event={ingested=2021-02-04T12:36:39.482598548Z, timezone=+00:00, module=system, dataset=system.auth}}, _id=nNALbXcBbfKg8Fh6Zci7, _score=25.188179}], total={value=1, relation=eq}, max_score=25.188179}, took=1, timed_out=false}}"

def quote_value_array_values(match):
    s = match.group()
    qvalues = [f'"{value}"' for value in s.split(r", ")]
    return ", ".join(qvalues)

def javastr_to_jsonstr(s):
    import re
    s = re.sub(r"(?<==[)[^{[]]+(?=])", quote_value_array_values, s)
    s = re.sub(r'(?<={)([^"=]+)[=:](?!{|[)([^,}]+)', r'"1":"2"', s)
    s = re.sub(r'(?<=, )([^"=]+)[=:](?!{|[)([^,}]+)', r'"1":"2"', s)
    s = re.sub(r'(?<={)([^"=]+)=(?!")', r'"1":', s)
    s = re.sub(r'(?<=, )([^"=]+)=(?!")', r'"1":', s)
    return s

import json
json_str = javastr_to_jsonstr(TEST_VALUE)
json_obj = json.loads(json_str)
print(json.dumps(json_obj, indent=1))

Output:

{
 "0": {
  "_shards": {
   "total": "1",
   "failed": "0",
   "successful": "1",
   "skipped": "0"
  },
  "hits": {
   "hits": [
    {
     "_index": "filebeat-7.10.0-2021.02.02-000001",
     "_type": "_doc",
     "_source": {
      "input": {
       "type": "log"
      },
      "agent": {
       "hostname": "ubuntu_fresh",
       "name": "ubuntu_fresh",
       "id": "879f36f2-4ade-47b6-a7b9-7972634c7b8c",
       "type": "filebeat",
       "ephemeral_id": "5676523f-bc61-4c12-b319-8b463348ba63",
       "version": "7.10.0"
      },
      "@timestamp": "2021-02-04T12:36:33.475Z",
      "ecs": {
       "version": "1.6.0"
      },
      "log": {
       "file": {
        "path": "/var/log/auth.log"
       },
       "offset": "46607"
      },
      "service": {
       "type": "system"
      },
      "host": {
       "hostname": "ubuntu_fresh",
       "os": {
        "kernel": "4.15.0-135-generic",
        "codename": "bionic",
        "name": "Ubuntu",
        "family": "debian",
        "version": "18.04.1 LTS (Bionic Beaver)",
        "platform": "ubuntu"
       },
       "containerized": "false",
       "ip": [
        "10.0.2.15",
        "fe80::a00:27ff:fe82:f598",
        "192.168.56.22",
        "fe80::a00:27ff:fe32:fab0"
       ],
       "name": "ubuntu_fresh",
       "id": "cdfcdf6a39d44b98b2aa51700134f415",
       "mac": [
        "08:00:27:82:f5:98",
        "08:00:27:32:fa:b0"
       ],
       "architecture": "x86_64"
      },
      "fileset": {
       "name": "auth"
      },
      "message": "Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2",
      "error": {
       "message": "Provided Grok expressions do not match field value: [Feb 4 12:36:28 ubuntu_fresh sshd[2662]: Failed password for root from 192.168.56.1 port 35830 ssh2]"
      },
      "event": {
       "ingested": "2021-02-04T12:36:39.482598548Z",
       "timezone": "+00:00",
       "module": "system",
       "dataset": "system.auth"
      }
     },
     "_id": "nNALbXcBbfKg8Fh6Zci7",
     "_score": "25.188179"
    }
   ],
   "total": {
    "value": "1",
    "relation": "eq"
   },
   "max_score": "25.188179"
  },
  "took": "1",
  "timed_out": "false"
 }
}

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome to OStack Knowledge Sharing Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...